├── .gitignore
├── .template.clash_config.yaml
├── .template.env
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── app.py
├── chatbot.py
├── configs
    └── config.py
├── docker-compose.yml
├── environment.yml
├── ingest.py
├── requirements.txt
├── schema
    ├── __init__.py
    └── schemas.py
├── templates
    ├── __init__.py
    ├── condense_prompt.py
    ├── conversational_prompt.py
    └── qa_prompt.py
├── textsplitter
    ├── __init__.py
    └── chinese_text_splitter.py
├── utils
    ├── __init__.py
    ├── callback.py
    ├── log.py
    └── tools.py
└── vectorstore.pkl


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .chroma
163 | images
164 | vector_store
165 | draf.md
166 | temp
167 | docs/*
168 | !docs/周易
169 | question.md
170 | logs
171 | chat_history.json
172 | clash_config.yaml
173 | .DS_Store
174 | node_modules
175 | tot
176 | guidance_tool
177 | guidance_bot.py
178 | retrieval_chatbot.py
179 | tot_bot.py


--------------------------------------------------------------------------------
/.template.clash_config.yaml:
--------------------------------------------------------------------------------
1 | # 务必指定external-ui
2 | external-ui: /opt/clash/ui
3 | # 否则UI服务无法启动


--------------------------------------------------------------------------------
/.template.env:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY="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"
2 | PINECONE_API_KEY=""
3 | PINECONE_ENVIRONMENT=""
4 | PINECONE_INDEX=""
5 | https_proxy="http://127.0.0.1:7890"
6 | http_proxy="http://127.0.0.1:7890"
7 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04 AS builder
 2 | 
 3 | RUN sed -i 's#archive.ubuntu.com#mirrors.aliyun.com#g' /etc/apt/sources.list  \
 4 |     && sed -i 's#security.ubuntu.com#mirrors.aliyun.com#g' /etc/apt/sources.list
 5 | 
 6 | ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN:zh LC_ALL=zh_CN.UTF-8 DEBIAN_FRONTEND=noninteractive
 7 | 
 8 | RUN rm -rf  /etc/apt/sources.list.d/  && apt update
 9 | 
10 | RUN apt-get update && apt-get install -y --no-install-recommends \
11 |     zsh \
12 |     vim \
13 |     curl \
14 |     wget \
15 |     unzip \
16 |     supervisor \
17 |     ca-certificates \
18 |     language-pack-zh-hans
19 | 
20 | RUN locale-gen zh_CN.UTF-8
21 | RUN dpkg-reconfigure locales
22 | 
23 | CMD ["supervisord", "-n"]
24 | 
25 | 
26 | FROM builder AS conda
27 | ENV MINICONDA_VERSION 3
28 | ENV CONDA_FORGE https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge
29 | RUN chsh -s `which zsh`
30 | RUN curl -o ~/miniconda.sh -O  https://mirrors.bfsu.edu.cn/anaconda/miniconda/Miniconda${MINICONDA_VERSION}-latest-Linux-x86_64.sh  && \
31 |     chmod +x ~/miniconda.sh && \
32 |     ~/miniconda.sh -b -p /opt/conda && \
33 |     rm ~/miniconda.sh
34 | RUN ln /opt/conda/bin/conda /usr/local/bin/conda
35 | RUN conda init zsh
36 | RUN conda install mamba -n base -c ${CONDA_FORGE}
37 | RUN ln /opt/conda/bin/mamba /usr/local/bin/mamba && mamba init zsh
38 | 
39 | 
40 | FROM conda AS python
41 | ENV WORKDIR /app
42 | WORKDIR ${WORKDIR}
43 | ADD environment.yml /environment.yml
44 | RUN mamba clean --all -y && mamba update -n base -c ${CONDA_FORGE} conda mamba -y && mamba env create -f /environment.yml && rm -rf /root/.cache
45 | 
46 | RUN echo "\
47 | [program:be]\n\
48 | directory=/app\n\
49 | command=/opt/conda/envs/py310/bin/python /app/app.py\n\
50 | autorestart=true\n\
51 | startretries=100\n\
52 | redirect_stderr=true\n\
53 | stdout_logfile=/var/log/be.log\n\
54 | stdout_logfile_maxbytes=50MB\n\
55 | environment=PYTHONUNBUFFERED=1, PYTHONIOENCODING=utf-8\n\
56 | " > /etc/supervisor/conf.d/be.conf
57 | 
58 | FROM python AS clash
59 | WORKDIR /opt/clash
60 | RUN mkdir -p /root/.config/clash && \
61 |     wget -O /root/.config/clash/Country.mmdb https://download.fastgit.ixmu.net/Dreamacro/maxmind-geoip/releases/latest/download/Country.mmdb
62 | RUN wget https://download.fastgit.ixmu.net/Dreamacro/clash/releases/download/v1.11.8/clash-linux-amd64-v1.11.8.gz && \
63 |     gunzip clash-linux-amd64-v1.11.8.gz && \
64 |     mv clash-linux-amd64-v1.11.8 clash && \
65 |     chmod +x clash 
66 | RUN wget https://download.fastgit.ixmu.net/haishanh/yacd/archive/refs/heads/gh-pages.zip && \
67 |     unzip gh-pages.zip && \
68 |     mv yacd-gh-pages ui && \
69 |     rm gh-pages.zip
70 | ADD ./clash_config.yaml /opt/clash/config.yaml
71 | RUN echo "\
72 | [program:clash] \n\
73 | command=/opt/clash/clash -f /opt/clash/config.yaml\n\
74 | autorestart=True\n\
75 | autostart=True\n\
76 | redirect_stderr = true\n\
77 | stdout_logfile=/var/log/clash.log\n\
78 | stdout_logfile_maxbytes=50MB\n\
79 | " > /etc/supervisor/conf.d/clash.conf
80 | EXPOSE 7890 7891 9090


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Toran Bruce Richards
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | chatbot:
2 | 	python app.py
3 | 
4 | kill:
5 | 	kill -9 `lsof -t -i:9000`
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 |  * @Description: 
 3 |  * @Author: colin gao
 4 |  * @Date: 2023-05-07 06:46:52
 5 |  * @LastEditTime: 2023-05-23 17:02:36
 6 | -->
 7 | 
 8 | # langchain-chatbot
 9 | Langchain-Chatbot 是基于 Langchain 框架的检索式和生成式聊天机器人。该机器人能够以对话的方式理解并回应用户的输入​1​。
10 | 
11 | ## Running by command
12 | ```
13 | pip install -r requirements.txt
14 | ```
15 | 
16 | ## copy env
17 | ```bash
18 | cp .template.env .env
19 | ```
20 | 
21 | ## edit env
22 | 配置你自己的environment，包括openai-key和科学上网代理
23 | 
24 | ## 构建知识库
25 | 运行以下命令构建知识库
26 | ```
27 | python ingest.py
28 | ```
29 | 
30 | ## run chatbot
31 | 运行以下命令启动聊天机器人
32 | ```
33 | python chatbot.py
34 | ```
35 | 
36 | ## 运行应用
37 | 运行以下命令启动应用
38 | ```
39 | python app.py
40 | ```
41 | 
42 | ## 通过HTTP服务器测试
43 | 您可以通过向 'http://127.0.0.1:9000/chat' 发送 POST 请求，其 JSON 主体包含聊天机器人要响应的文本，来测试聊天机器人。例如：
44 | ```curl
45 | curl --location --request POST 'http://127.0.0.1:9000/chat' \
46 | --header 'Content-Type: application/json' \
47 | --data-raw '{
48 | 	"text": "你好"
49 | }'
50 | ```
51 | 
52 | ## docker构建
53 | 运行以下命令构建Docker镜像并启动Docker容器
54 | ```
55 | docker-compose up -d
56 | ```
57 | 
58 | ## 进程管理工具 supervisor
59 | 您可以使用 Supervisor 来管理进程。这里是一些有用的命令：
60 | - 查看进程 `supervisorctl status`
61 | - 查看进程日志 `supervisorctl tail -f clash`
62 | - 查看进程日志文件 `/var/log/clash.log`
63 | 
64 | ## langchain过河记系列文章
65 | - [langchain过河记（一）](https://zhuanlan.zhihu.com/p/630925973)
66 | - [langchain过河记（二）](https://zhuanlan.zhihu.com/p/630930843)
67 | - [langchain过河记（三）](https://zhuanlan.zhihu.com/p/630971903)
68 | - [langchain过河记（四）](https://zhuanlan.zhihu.com/p/631600368)
69 | 
70 | ## external link
71 | [liveportraitweb](https://www.liveportraitweb.com/)
72 | [novelling](https://www.novelling.com/)
73 | [Rewritifyai](https://www.rewritifyai.com/)
74 | [MMAudio](https://www.mmaudio.pro/)
75 | [Image To Video AI](https://imagetovideoai.space/)
76 | [Creator Viral Video](https://www.creatorviralvideo.com/)
77 | [Transpixar](https://www.transpixar.pro/)
78 | [Rednote](https://www.rednote.pro/)
79 | [RednoteApp](https://www.rednoteapp.pro/)
80 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-17 15:54:12
 5 | LastEditTime: 2023-05-26 23:05:56
 6 | '''
 7 | from fastapi.middleware.cors import CORSMiddleware
 8 | from fastapi import FastAPI
 9 | 
10 | from schema import ChatItem
11 | from chatbot import get_chain
12 | 
13 | from utils import logger, test_youtube_access
14 | 
15 | from dotenv import load_dotenv
16 | load_dotenv()
17 | test_youtube_access()
18 | app = FastAPI()
19 | 
20 | app.add_middleware(CORSMiddleware,
21 |                    allow_origins=["*"],
22 |                    allow_credentials=True,
23 |                    allow_methods=["*"],
24 |                    allow_headers=["*"])
25 | 
26 | conversation_chat = get_chain()
27 | 
28 | @app.post("/chat", summary="chat接口", description="该接口为chat的接口")
29 | def chat(item: ChatItem):
30 |     question = item.text
31 |     result = conversation_chat(question)
32 | 
33 |     logger.info(f"chat result is {result['response']}")
34 | 
35 |     return {
36 |         "result": result['response']
37 |     }
38 | 
39 | @app.get("/ping")
40 | async def ping():
41 |     return "pong!!"
42 | 
43 | if __name__ == "__main__":
44 |     import uvicorn
45 |     uvicorn.run(app, host="0.0.0.0", port=9000)
46 | 


--------------------------------------------------------------------------------
/chatbot.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-14 17:16:12
 5 | LastEditTime: 2023-05-26 23:06:49
 6 | '''
 7 | from langchain.memory import ConversationBufferWindowMemory
 8 | from langchain.chat_models import ChatOpenAI
 9 | from langchain.chains import ConversationChain
10 | from langchain.prompts.prompt import PromptTemplate
11 | 
12 | from configs.config import *
13 | 
14 | from dotenv import load_dotenv
15 | load_dotenv()
16 | 
17 | import code
18 | 
19 | def get_chain():
20 |     conversation_memory = ConversationBufferWindowMemory(
21 |         memory_key="history",
22 |         k=5
23 |     )
24 | 
25 |     chat_llm = ChatOpenAI(
26 |         model_name="gpt-3.5-turbo",
27 |         temperature=TEMPERTURE
28 |     )
29 | 
30 |     DEFAULT_TEMPLATE = """这是一个专门用于回答占卜相关问题的工具。只要你提出与占卜相关的问题，或者明确说出"占卜"，这个工具就会被启动来寻找最合适的答案。无论是初次的占卜询问，还是后续的深入探讨，这个工具都可以提供协助。
31 |     最重要的一点，这个工具占卜的方式是周易占卜，针对所有的问题，都是通过聊天的模式实现周易占卜。
32 | 
33 |     Current conversation:
34 |     {history}
35 |     Human: {input}
36 |     AI:"""
37 |     PROMPT = PromptTemplate(input_variables=["history", "input"], template=DEFAULT_TEMPLATE)
38 | 
39 |     conversation = ConversationChain(
40 |         llm=chat_llm,
41 |         memory=conversation_memory,
42 |         prompt=PROMPT
43 |     )
44 | 
45 |     return conversation
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     from colorama import init, Fore, Style
50 |     init()
51 | 
52 |     conversation_chat = get_chain()
53 | 
54 |     while True:
55 |         question = input("Please enter your question (or type 'exit' to end): ")
56 |         if question.lower() == 'exit':
57 |             break
58 |         
59 |         result = conversation_chat(question)
60 | 
61 |         print(f'{Fore.BLUE}{Style.BRIGHT}AI:{Fore.RESET}{Style.NORMAL} {result["response"]}')
62 | 


--------------------------------------------------------------------------------
/configs/config.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-08 16:50:54
 5 | LastEditTime: 2023-05-21 15:02:03
 6 | '''
 7 | import os
 8 | 
 9 | VS_METHOD = "faiss" # faiss/pinecone/chroma
10 | 
11 | TEMPERTURE = 0.5
12 | 
13 | DOCS_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "docs")
14 | 
15 | MAX_TOKENS_LIMIT = 2000
16 | 
17 | # 文本分句长度
18 | SENTENCE_SIZE = 2000
19 | 
20 | # 匹配后单段上下文长度
21 | CHUNK_SIZE = 1000
22 | CHUNK_OVERLAP = 0
23 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '2'
 2 | services:
 3 |   langchain-chatbot:
 4 |     hostname: langchain-chatbot
 5 |     container_name: langchain-chatbot
 6 |     restart: always
 7 |     image: langchain-chatbot
 8 |     privileged: true
 9 |     ipc: host
10 |     tty: true
11 |     # working_dir: /workspace
12 |     ports:
13 |       - '4080:8080'
14 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: py310
 2 | channels:
 3 |   - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ # Anocanda清华镜像
 4 |   - defaults
 5 |   - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
 6 | dependencies:
 7 |   - python=3.10
 8 |   - ipython
 9 |   - pip
10 |   - pip:
11 |       - openai
12 |       - fastapi
13 |       - black
14 |       - isort
15 |       - websockets
16 |       - pydantic
17 |       - langchain
18 |       - uvicorn
19 |       - jinja2
20 |       - faiss-cpu
21 |       - bs4
22 |       - unstructured
23 |       - libmagic
24 |       - colorama==0.4.6
25 |       - pinecone-client==2.2.1
26 |       - streamlit==1.22.0
27 |       - pymongo
28 |       - loguru
29 |       - python-dotenv
30 |       - -i https://mirror.baidu.com/pypi/simple
31 | prefix: /opt/conda/envs/py310


--------------------------------------------------------------------------------
/ingest.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-10 14:12:34
 5 | LastEditTime: 2023-05-16 15:04:47
 6 | '''
 7 | import pickle
 8 | 
 9 | from langchain.document_loaders import TextLoader, DirectoryLoader
10 | from langchain.embeddings.openai import OpenAIEmbeddings
11 | from textsplitter import ChineseTextSplitter
12 | from langchain.vectorstores.faiss import FAISS
13 | from configs.config import *
14 | 
15 | from dotenv import load_dotenv
16 | load_dotenv()
17 | 
18 | def ingest():
19 |     loader = DirectoryLoader(DOCS_ROOT_PATH, glob="**/*.txt", loader_cls=TextLoader)
20 |     documents = loader.load()
21 | 
22 |     text_splitter = ChineseTextSplitter(
23 |         chunk_size=CHUNK_SIZE,
24 |         chunk_overlap=CHUNK_OVERLAP
25 |     )
26 | 
27 |     documents = text_splitter.split_documents(documents)
28 |     embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
29 | 
30 |     vector_store = FAISS.from_documents(documents, embeddings)
31 | 
32 |     # Save vectorstore
33 |     with open("vectorstore.pkl", "wb") as f:
34 |         pickle.dump(vector_store, f)
35 | 
36 | if __name__ == "__main__":
37 |     ingest()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | openai
 2 | fastapi
 3 | black
 4 | isort
 5 | websockets
 6 | pydantic
 7 | langchain
 8 | uvicorn
 9 | jinja2
10 | faiss-cpu
11 | bs4
12 | unstructured
13 | libmagic
14 | colorama==0.4.6
15 | pinecone-client==2.2.1
16 | streamlit==1.22.0
17 | pymongo
18 | loguru
19 | 


--------------------------------------------------------------------------------
/schema/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Description: 
3 | Author: colin gao
4 | Date: 2023-05-17 15:54:12
5 | LastEditTime: 2023-05-18 15:15:04
6 | '''
7 | from .schemas import ChatResponse, ChatItem


--------------------------------------------------------------------------------
/schema/schemas.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-17 15:54:12
 5 | LastEditTime: 2023-05-17 16:43:13
 6 | '''
 7 | """Schemas for the chat app."""
 8 | from pydantic import BaseModel, validator
 9 | 
10 | 
11 | class ChatItem(BaseModel):
12 |     text: str
13 |     history: list=[]
14 | 
15 | 
16 | class ChatResponse(BaseModel):
17 |     """Chat response schema."""
18 | 
19 |     sender: str
20 |     message: str
21 |     type: str
22 | 
23 |     @validator("sender")
24 |     def sender_must_be_bot_or_you(cls, v):
25 |         if v not in ["bot", "you"]:
26 |             raise ValueError("sender must be bot or you")
27 |         return v
28 | 
29 |     @validator("type")
30 |     def validate_message_type(cls, v):
31 |         if v not in ["start", "stream", "end", "error", "info"]:
32 |             raise ValueError("type must be start, stream or end")
33 |         return v
34 | 


--------------------------------------------------------------------------------
/templates/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-09 15:25:46
 5 | LastEditTime: 2023-05-24 19:09:23
 6 | '''
 7 | from .condense_prompt import CONDENSE_PROMPT
 8 | from .qa_prompt import QA_PROMPT
 9 | from .conversational_prompt import PREFIX, SUFFIX, TEMPLATE_TOOL_RESPONSE, FORMAT_INSTRUCTIONS
10 | 


--------------------------------------------------------------------------------
/templates/condense_prompt.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-09 17:39:11
 5 | LastEditTime: 2023-05-21 14:12:31
 6 | '''
 7 | CONDENSE_PROMPT = """
 8 | 根据以下对话和一个后续问题，将后续问题改写成一个独立的问题。
 9 | 
10 | 聊天记录：
11 | {chat_history}
12 | 
13 | 后续输入：{question}
14 | 独立问题：
15 | """


--------------------------------------------------------------------------------
/templates/conversational_prompt.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-24 19:08:36
 5 | LastEditTime: 2023-05-24 19:15:42
 6 | '''
 7 | PREFIX = """AI占卜助手是一个大型的语言模型，由OpenAI进行训练。它被设计成能够帮助进行周易占卜，并根据占卜的结果提供解释。
 8 | 
 9 |     AI占卜助手已经学习了周易占卜的知识，可以帮助用户进行占卜。它可以指导用户如何提出问题，如何进行占卜，以及如何解读占卜的结果。
10 | 
11 |     AI占卜助手的占卜步骤如下：
12 | 
13 |     1. 提问：首先，AI占卜助手会引导用户提出一个特定的问题，这个问题应该是开放性的，不能简单地用"是"或"否"来回答。
14 | 
15 |     2. 产生爻：然后，AI占卜助手会通过某种方式产生六个爻以形成卦象。这个过程可以是随机的，也可以是通过某种算法实现的。
16 | 
17 |     3. 解读卦象：得到卦象后，AI占卜助手会解释这个卦象的含义。这个解释是基于周易的知识，也会考虑到用户的问题和情况。
18 | 
19 |     4. 理解动爻：如果在产生爻的过程中有动爻（即6或9），AI占卜助手会解释这个动爻如何改变了卦象，以及这个改变如何影响到解答。
20 | 
21 |     5. 反思与解答：最后，AI占卜助手会帮助用户理解卦象和动爻的含义，应用到他们的问题上，给出一个反思和解答。
22 | 
23 |     请注意，尽管AI占卜助手具有进行周易占卜和解释结果的能力，但是它仍然只是一个AI模型，它的解答并不能预知未来，也不能替代专业的咨询或建议。请用户在理解和使用AI占卜助手的解答时，持有理性和批判性的态度。
24 | 
25 |     当回答问题时，AI占卜助手必须使用以下语言：中文。
26 |     """
27 | 
28 | 
29 | SUFFIX = """TOOLS
30 | ------
31 | Assistant can ask the user to use tools to look up information that may be helpful in answering the users original question. The tools the human can use are:
32 | 
33 | {{tools}}
34 | 
35 | {format_instructions}
36 | 
37 | USER'S INPUT
38 | --------------------
39 | Here is the user's input (remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else):
40 | 
41 | {{{{input}}}}"""
42 | 
43 | 
44 | TEMPLATE_TOOL_RESPONSE = """TOOL RESPONSE:
45 | ---------------------
46 | {observation}
47 | 
48 | USER'S INPUT
49 | --------------------
50 | 
51 | Okay, so what is the response to my last comment? If using information obtained from the tools you must mention it explicitly without mentioning the tool names - I have forgotten all TOOL RESPONSES! Remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else."""
52 | 
53 | 
54 | FORMAT_INSTRUCTIONS = """RESPONSE FORMAT INSTRUCTIONS
55 | ----------------------------
56 | 
57 | When responding to me, please output a response in one of two formats:
58 | 
59 | **Option 1:**
60 | Use this if you want the human to use a tool.
61 | Markdown code snippet formatted in the following schema:
62 | 
63 | ```json
64 | {{{{
65 |     "action": string \\ The action to take. Must be one of {tool_names}
66 |     "action_input": string \\ The input to the action
67 | }}}}
68 | ```
69 | 
70 | **Option #2:**
71 | Use this if you want to respond directly to the human. Markdown code snippet formatted in the following schema:
72 | 
73 | ```json
74 | {{{{
75 |     "action": "Final Answer",
76 |     "action_input": string \\ You should put what you want to return to use here
77 | }}}}
78 | ```"""
79 | 


--------------------------------------------------------------------------------
/templates/qa_prompt.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-09 15:24:53
 5 | LastEditTime: 2023-05-21 14:12:39
 6 | '''
 7 | QA_PROMPT = """
 8 | 您是一个有用的AI助手。请使用以下上下文信息来回答最后的问题。
 9 | 如果您不知道答案，请直接说您不知道。请不要试图编造答案。
10 | 如果问题与上下文无关，请礼貌地回应您只能回答与上下文相关的问题。
11 | 回答时尽可能详细。
12 | 
13 | {context}
14 | 
15 | 问题: {question}
16 | 有帮助的答案:
17 | """


--------------------------------------------------------------------------------
/textsplitter/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Description: 
3 | Author: colin gao
4 | Date: 2023-05-10 11:35:55
5 | LastEditTime: 2023-05-10 11:37:51
6 | '''
7 | from .chinese_text_splitter import ChineseTextSplitter


--------------------------------------------------------------------------------
/textsplitter/chinese_text_splitter.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-10 11:35:55
 5 | LastEditTime: 2023-05-14 17:11:13
 6 | '''
 7 | from langchain.text_splitter import CharacterTextSplitter
 8 | import re
 9 | from typing import List
10 | from configs.config import SENTENCE_SIZE
11 | 
12 | """
13 | class ChineseTextSplitter(CharacterTextSplitter):
14 |     def __init__(self, **kwargs):
15 |         super().__init__(**kwargs)
16 | 
17 |     def split_text(self, text: str) -> List[str]:
18 |         text = re.sub(r'([;；.!?。！？\?])([^”’])', r"\1\n\2", text)  # 单字符断句符
19 |         text = re.sub(r'(\.{6})([^"’”」』])', r"\1\n\2", text)  # 英文省略号
20 |         text = re.sub(r'(\…{2})([^"’”」』])', r"\1\n\2", text)  # 中文省略号
21 |         text = re.sub(r'([;；!?。！？\?]["’”」』]{0,2})([^;；!?，。！？\?])', r'\1\n\2', text)
22 |         text = text.replace("\u3000", "")
23 |         # 如果双引号前有终止符，那么双引号才是句子的终点，把分句符\n放到双引号后，注意前面的几句都小心保留了双引号
24 |         text = text.rstrip()
25 |         ls = [text[i:i + SENTENCE_SIZE] for i in range(0, len(text), SENTENCE_SIZE)]
26 | 
27 |         return ls
28 | """
29 | 
30 | class ChineseTextSplitter(CharacterTextSplitter):
31 |     def __init__(self, pdf: bool = False, **kwargs):
32 |         super().__init__(**kwargs)
33 |         self.pdf = pdf
34 | 
35 |     def split_text(self, text: str) -> List[str]:
36 |         if self.pdf:
37 |             text = re.sub(r"\n{3,}", "\n", text)
38 |             text = re.sub('\s', ' ', text)
39 |             text = text.replace("\n\n", "")
40 |         sent_sep_pattern = re.compile(
41 |             '([﹒﹔﹖﹗．。！？]["’”」』]{0,2}|(?=["‘“「『]{1,2}|$))') 
42 |         sent_list = []
43 |         for ele in sent_sep_pattern.split(text):
44 |             if sent_sep_pattern.match(ele) and sent_list:
45 |                 sent_list[-1] += ele
46 |             elif ele:
47 |                 sent_list.append(ele)
48 |         return sent_list
49 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-21 14:11:51
 5 | LastEditTime: 2023-05-26 19:24:25
 6 | '''
 7 | from .log import logger
 8 | from .callback import StreamingLLMCallbackHandler, QuestionGenCallbackHandler
 9 | from .tools import load_tools, test_youtube_access
10 | 


--------------------------------------------------------------------------------
/utils/callback.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-10 17:21:02
 5 | LastEditTime: 2023-05-10 17:22:19
 6 | '''
 7 | 
 8 | """Callback handlers used in the app."""
 9 | import sys
10 | 
11 | sys.path.append("..")
12 | 
13 | from typing import Any, Dict, List
14 | from langchain.callbacks.base import AsyncCallbackHandler
15 | from schema import ChatResponse
16 | 
17 | 
18 | class StreamingLLMCallbackHandler(AsyncCallbackHandler):
19 |     """Callback handler for streaming LLM responses."""
20 | 
21 |     def __init__(self, websocket):
22 |         self.websocket = websocket
23 | 
24 |     async def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
25 |         resp = ChatResponse(sender="bot", message=token, type="stream")
26 |         await self.websocket.send_json(resp.dict())
27 | 
28 | 
29 | class QuestionGenCallbackHandler(AsyncCallbackHandler):
30 |     """Callback handler for question generation."""
31 | 
32 |     def __init__(self, websocket):
33 |         self.websocket = websocket
34 | 
35 |     async def on_llm_start(
36 |         self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
37 |     ) -> None:
38 |         """Run when LLM starts running."""
39 |         resp = ChatResponse(
40 |             sender="bot", message="Synthesizing question...", type="info"
41 |         )
42 |         await self.websocket.send_json(resp.dict())
43 | 


--------------------------------------------------------------------------------
/utils/log.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2022-08-30 11:39:10
 5 | LastEditTime: 2023-05-18 18:35:33
 6 | '''
 7 | import os
 8 | from loguru import logger
 9 | 
10 | currentdirPath = os.path.dirname(__file__)
11 | logger_path = os.path.join(currentdirPath, '../logs/file_{time}.log')
12 | 
13 | logger.add(logger_path, rotation="1 MB", enqueue=True, backtrace=True, diagnose=True, serialize=True)


--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Author: colin gao
 4 | Date: 2023-05-26 17:54:39
 5 | LastEditTime: 2023-05-26 23:06:42
 6 | '''
 7 | import sys
 8 | sys.path.append("..")
 9 | 
10 | from langchain.chains import RetrievalQA
11 | from langchain.chat_models import ChatOpenAI
12 | from langchain.callbacks import get_openai_callback
13 | 
14 | from configs.config import *
15 | from .log import logger
16 | 
17 | def load_tools(vectorstore):
18 |     def searchVector(key_word):
19 |         chat_llm = ChatOpenAI(
20 |             model_name="gpt-3.5-turbo",
21 |             temperature=TEMPERTURE
22 |         )
23 | 
24 |         retriever = RetrievalQA.from_chain_type(
25 |             llm=chat_llm,
26 |             chain_type="stuff",
27 |             retriever=vectorstore.as_retriever(),
28 |             return_source_documents=False
29 |         )
30 |         
31 |         result = retriever.run(key_word)
32 |         
33 |         return result
34 | 
35 |     dict_tools = {
36 |         'Vector Search': searchVector
37 |     }
38 |     return dict_tools
39 | 
40 | 
41 | def count_tokens(chain, query):
42 |     with get_openai_callback() as cb:
43 |         result = chain.run(query)
44 |         logger.info(f'Spent a total of {cb.total_tokens} tokens')
45 | 
46 |     return result
47 | 
48 | import requests
49 | def test_youtube_access(in_logger=''):
50 |     logger = print if not in_logger else in_logger.info
51 |     url = "https://www.youtube.com"
52 |     headers = {
53 |         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
54 |     }
55 | 
56 |     try:
57 |         response = requests.get(url, headers=headers, timeout=5)
58 |         if response.status_code == 200:
59 |             logger("成功访问YouTube")
60 |             return True
61 |         else:
62 |             logger(f"访问YouTube失败，状态码：{response.status_code}")
63 |             return False
64 |     except requests.exceptions.RequestException as e:
65 |         logger(f"访问YouTube时出现异常: {e}")
66 |         return False
67 | 


--------------------------------------------------------------------------------
/vectorstore.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GaoQ1/langchain-chatbot/7050791ee0d6dceb707f025159b633af6ea24a04/vectorstore.pkl


--------------------------------------------------------------------------------