├── .gitattributes
├── .gitignore
├── .replit
├── .streamlit
└── secrets.toml.example
├── LICENSE
├── README.md
├── agent_helper.py
├── app-agent.py
├── app-agent2.py
├── app.py
├── embed_pdf.py
├── index
├── Mahmoudi_Nima_202202_PhD.pdf.index.faiss
├── Mahmoudi_Nima_202202_PhD.pdf.index.pkl
├── NIPS-2017-attention-is-all-you-need-Paper.pdf.index.faiss
└── NIPS-2017-attention-is-all-you-need-Paper.pdf.index.pkl
├── llm_helper.py
├── pdf
└── .gitignore
├── poetry.lock
├── pyproject.toml
└── requirements.txt
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 |
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 |
111 | # SageMath parsed files
112 | *.sage.py
113 |
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 |
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 |
127 | # Rope project settings
128 | .ropeproject
129 |
130 | # mkdocs documentation
131 | /site
132 |
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 |
138 | # Pyre type checker
139 | .pyre/
140 |
141 | # pytype static type analyzer
142 | .pytype/
143 |
144 | # Cython debug symbols
145 | cython_debug/
146 |
147 | # PyCharm
148 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
149 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
150 | # and can be added to the global gitignore or merged into this file. For a more nuclear
151 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
152 | #.idea/
153 |
154 |
155 | .streamlit/secrets.toml
156 | index/*.faiss
157 | index/*.pkl
158 | .pythonlibs
159 |
--------------------------------------------------------------------------------
/.replit:
--------------------------------------------------------------------------------
1 | modules = ["python-3.10:v18-20230807-322e88b"]
2 |
3 | hidden = [".pythonlibs"]
4 | run = "streamlit run app.py"
5 |
6 | [nix]
7 | channel = "stable-23_05"
8 |
9 | [unitTest]
10 | language = "python3"
11 |
12 | [deployment]
13 | run = ["sh", "-c", "streamlit run app.py"]
14 | deploymentTarget = "cloudrun"
15 |
--------------------------------------------------------------------------------
/.streamlit/secrets.toml.example:
--------------------------------------------------------------------------------
1 | # OPENAI_API_KEY = "sk-..."
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Nima Mahmoudi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LLM Streamlit Demo (Basic)
2 |
3 | - [Live Demo](https://llmappdemobasic.streamlit.app/)
4 |
5 | ## Installation Process
6 |
7 | ### Without Poetry
8 |
9 | You can use conda to install the required packages:
10 |
11 | ```sh
12 | conda create -n p311-llm python=3.11
13 | conda activate p311-llm
14 | pip install -r requirements.txt
15 | ```
16 |
17 | ### With Poetry (optional)
18 |
19 | To install the required packages, you can install them using poetry:
20 |
21 | ```sh
22 | # (optional) to set poetry to use the project folder
23 | # poetry config virtualenvs.in-project true
24 | # install all dependencies
25 | poetry install
26 | ```
27 |
28 | In case you changed requirements.txt, you'll need to run the following command to update the poetry definitions:
29 |
30 | ```sh
31 | cat requirements.txt | xargs poetry add
32 | ```
33 |
34 | You can then enable poetry shell:
35 |
36 | ```sh
37 | poetry shell
38 | ```
39 |
40 | ## Running the App
41 |
42 | After installing all requirements, you'll need to add your OpenAI API key to the secrets,
43 | or let the user input it in the sidebar every time they visit the page.
44 | You can add your secrets to `.streamlit/secrets.toml` in the following format:
45 |
46 | ```toml
47 | OPENAI_API_KEY = "sk-..."
48 | ```
49 |
50 | Then, you can run the code using the following command:
51 |
52 | ```sh
53 | streamlit run app.py
54 | ```
55 |
56 | For more detail about this code, you can follow [my blog posts](https://medium.com/@nima.mahmoudi).
57 |
--------------------------------------------------------------------------------
/agent_helper.py:
--------------------------------------------------------------------------------
1 | # from langchain.callbacks import StreamlitCallbackHandler
2 | from langchain.callbacks.streamlit.streamlit_callback_handler import StreamlitCallbackHandler
3 | from tenacity import retry, wait_exponential, stop_after_attempt
4 |
5 | def bind_logger(toolClass):
6 | class newToolClass(toolClass):
7 | def __init__(self, tool_name: str, st_cb: StreamlitCallbackHandler, *args, **kwargs):
8 | super().__init__(*args, **kwargs)
9 | self.st_cb = st_cb
10 | self.tool_name = tool_name
11 |
12 | def run(self, *args, **kwargs):
13 | print(f"Running {toolClass.__name__} {[*args]}, {kwargs}")
14 |
15 | if self.st_cb._current_thought is None:
16 | self.st_cb.on_llm_start({}, [])
17 |
18 | args_str = ' '.join(args) + ' ' + ' '.join([f'{k}=`{v}`' for k, v in kwargs.items()])
19 | self.st_cb.on_tool_start({'name': self.tool_name}, args_str)
20 |
21 | try:
22 | ret_val = retry(
23 | wait=wait_exponential(min=2, max=20),
24 | stop=stop_after_attempt(5),
25 | )(super().run)(*args, **kwargs)
26 | self.st_cb.on_tool_end(ret_val)
27 | return ret_val
28 | except Exception as e:
29 | original_exception = e.last_attempt.result()
30 | print(f"Exception {original_exception} in {toolClass.__name__} {[*args]}, {kwargs}")
31 | raise original_exception
32 |
33 |
34 | return newToolClass
35 |
36 | from functools import wraps
37 |
38 | def retry_and_streamlit_callback(st_cb: StreamlitCallbackHandler, tool_name: str):
39 | if st_cb is None:
40 | return lambda x: x
41 |
42 | def decorator(tool_func):
43 | @wraps(tool_func)
44 | def decorated_func(*args, **kwargs):
45 | print(f"Running {tool_name} {args}, {kwargs}")
46 |
47 | if st_cb._current_thought is None:
48 | st_cb.on_llm_start({}, [])
49 |
50 | args_str = ' '.join(args) + ' ' + ' '.join([f'{k}=`{v}`' for k, v in kwargs.items()])
51 | st_cb.on_tool_start({'name': tool_name}, args_str)
52 |
53 | @retry(wait=wait_exponential(min=2, max=20), stop=stop_after_attempt(5))
54 | def retry_wrapper():
55 | return tool_func(*args, **kwargs)
56 |
57 | try:
58 | ret_val = retry_wrapper()
59 | st_cb.on_tool_end(ret_val)
60 | return ret_val
61 | except Exception as e:
62 | print(f"Exception {e} in {tool_name} {args}, {kwargs}")
63 | raise e
64 |
65 | return decorated_func
66 |
67 | return decorator
68 |
--------------------------------------------------------------------------------
/app-agent.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 |
3 | from langchain.agents import initialize_agent, AgentType
4 | from langchain.callbacks import StreamlitCallbackHandler
5 | from langchain.chat_models import ChatOpenAI
6 | from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
7 |
8 | from llm_helper import get_agent_chain, get_lc_oai_tools
9 |
10 | with st.sidebar:
11 | openai_api_key = st.secrets["OPENAI_API_KEY"]
12 | "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
13 | "[View the source code](https://github.com/streamlit/llm-examples/blob/main/pages/2_Chat_with_search.py)"
14 | "[](https://codespaces.new/streamlit/llm-examples?quickstart=1)"
15 |
16 | st.title("🔎 LangChain - Chat with search")
17 |
18 | """
19 | In this example, we're using `StreamlitCallbackHandler` to display the thoughts and actions of an agent in an interactive Streamlit app.
20 | Try more LangChain 🤝 Streamlit Agent examples at [github.com/langchain-ai/streamlit-agent](https://github.com/langchain-ai/streamlit-agent).
21 | """
22 |
23 | if "messages" not in st.session_state:
24 | st.session_state["messages"] = [
25 | {"role": "assistant", "content": "Hi, I'm a chatbot who can search the web. How can I help you?"}
26 | ]
27 |
28 | for msg in st.session_state.messages:
29 | st.chat_message(msg["role"]).write(msg["content"])
30 |
31 | if prompt := st.chat_input(placeholder="Who won the Women's U.S. Open in 2018?"):
32 | st.session_state.messages.append({"role": "user", "content": prompt})
33 | st.chat_message("user").write(prompt)
34 |
35 | if not openai_api_key:
36 | st.info("Please add your OpenAI API key to continue.")
37 | st.stop()
38 |
39 | llm = ChatOpenAI(model_name="gpt-3.5-turbo-1106", openai_api_key=openai_api_key, streaming=True)
40 | lc_tools, _ = get_lc_oai_tools()
41 | search_agent = initialize_agent(lc_tools, llm, agent=AgentType.OPENAI_FUNCTIONS, handle_parsing_errors=True, verbose=True)
42 |
43 | agent_prompt = ChatPromptTemplate.from_messages(
44 | [
45 | ("system", "You are a helpful assistant, use the search tool to answer the user's question and cite only the page number when you use information coming (like [p1]) from the source document. Always use the content from the source document to answer the user's question. If you need to compare multiple subjects, search them one by one."),
46 | ("user", "{input}"),
47 | MessagesPlaceholder(variable_name="agent_scratchpad"),
48 | ]
49 | )
50 | search_agent.agent.prompt = agent_prompt
51 | with st.chat_message("assistant"):
52 | st_cb = StreamlitCallbackHandler(st.container(), expand_new_thoughts=False)
53 | response = search_agent.run(prompt, callbacks=[st_cb])
54 | # search_agent = get_agent_chain(callbacks=[st_cb])
55 | # response = search_agent.invoke({"input": prompt})
56 | # response = response["output"]
57 |
58 | st.session_state.messages.append({"role": "assistant", "content": response})
59 | st.write(response)
60 |
--------------------------------------------------------------------------------
/app-agent2.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 |
3 | from langchain.agents import initialize_agent, AgentType
4 | from langchain.callbacks import StreamlitCallbackHandler
5 | from langchain.chat_models import ChatOpenAI
6 | from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
7 |
8 | from langchain.agents.format_scratchpad import format_to_openai_function_messages
9 | from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
10 |
11 | from llm_helper import get_agent_chain, get_lc_oai_tools, convert_message
12 | from langchain.agents import AgentExecutor
13 |
14 | with st.sidebar:
15 | openai_api_key = st.secrets["OPENAI_API_KEY"]
16 | "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
17 | "[View the source code](https://github.com/streamlit/llm-examples/blob/main/pages/2_Chat_with_search.py)"
18 | "[](https://codespaces.new/streamlit/llm-examples?quickstart=1)"
19 |
20 | st.title("🔎 LangChain - Chat with search")
21 |
22 | """
23 | In this example, we're using `StreamlitCallbackHandler` to display the thoughts and actions of an agent in an interactive Streamlit app.
24 | Try more LangChain 🤝 Streamlit Agent examples at [github.com/langchain-ai/streamlit-agent](https://github.com/langchain-ai/streamlit-agent).
25 | """
26 |
27 | if "messages" not in st.session_state:
28 | st.session_state["messages"] = [
29 | {"role": "assistant", "content": "Hi, I'm a chatbot who can search the web. How can I help you?"}
30 | ]
31 |
32 | for msg in st.session_state.messages:
33 | st.chat_message(msg["role"]).write(msg["content"])
34 |
35 | if prompt := st.chat_input(placeholder="Who won the Women's U.S. Open in 2018?"):
36 | st.session_state.messages.append({"role": "user", "content": prompt})
37 | st.chat_message("user").write(prompt)
38 |
39 | if not openai_api_key:
40 | st.info("Please add your OpenAI API key to continue.")
41 | st.stop()
42 |
43 | if "messages" in st.session_state:
44 | chat_history = [convert_message(m) for m in st.session_state.messages[:-1]]
45 | else:
46 | chat_history = []
47 |
48 | with st.chat_message("assistant"):
49 | st_cb = StreamlitCallbackHandler(st.container(), expand_new_thoughts=False)
50 | agent = get_agent_chain(st_cb=st_cb)
51 |
52 | response = agent.invoke({
53 | "input": prompt,
54 | "chat_history": chat_history,
55 | })
56 | response = response["output"]
57 |
58 | st.session_state.messages.append({"role": "assistant", "content": response})
59 | st.write(response)
60 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import os
3 | import embed_pdf
4 |
5 | # create sidebar and ask for openai api key if not set in secrets
6 | secrets_file_path = os.path.join(".streamlit", "secrets.toml")
7 | if os.path.exists(secrets_file_path):
8 | try:
9 | if "OPENAI_API_KEY" in st.secrets:
10 | os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
11 | else:
12 | print("OpenAI API Key not found in environment variables")
13 | except FileNotFoundError:
14 | print('Secrets file not found')
15 | else:
16 | print('Secrets file not found')
17 |
18 | if not os.getenv('OPENAI_API_KEY', '').startswith("sk-"):
19 | os.environ["OPENAI_API_KEY"] = st.sidebar.text_input(
20 | "OpenAI API Key", type="password"
21 | )
22 | else:
23 | if st.sidebar.button("Embed Documents"):
24 | st.sidebar.info("Embedding documents...")
25 | try:
26 | embed_pdf.embed_all_pdf_docs()
27 | st.sidebar.info("Done!")
28 | except Exception as e:
29 | st.sidebar.error(e)
30 | st.sidebar.error("Failed to embed documents.")
31 |
32 | # create the app
33 | st.title("Welcome to NimaGPT")
34 |
35 | chosen_file = st.radio(
36 | "Choose a file to search", embed_pdf.get_all_index_files(), index=0
37 | )
38 |
39 | # check if openai api key is set
40 | if not os.getenv('OPENAI_API_KEY', '').startswith("sk-"):
41 | st.warning("Please enter your OpenAI API key!", icon="⚠")
42 | st.stop()
43 |
44 | # load the agent
45 | from llm_helper import convert_message, get_rag_chain, get_rag_fusion_chain
46 |
47 | rag_method_map = {
48 | 'Basic RAG': get_rag_chain,
49 | 'RAG Fusion': get_rag_fusion_chain
50 | }
51 | chosen_rag_method = st.radio(
52 | "Choose a RAG method", rag_method_map.keys(), index=0
53 | )
54 | get_rag_chain_func = rag_method_map[chosen_rag_method]
55 | ## get the chain WITHOUT the retrieval callback (not used)
56 | # custom_chain = get_rag_chain_func(chosen_file)
57 |
58 | # create the message history state
59 | if "messages" not in st.session_state:
60 | st.session_state.messages = []
61 |
62 | # render older messages
63 | for message in st.session_state.messages:
64 | with st.chat_message(message["role"]):
65 | st.markdown(message["content"])
66 |
67 | # render the chat input
68 | prompt = st.chat_input("Enter your message...")
69 | if prompt:
70 | st.session_state.messages.append({"role": "user", "content": prompt})
71 |
72 | # render the user's new message
73 | with st.chat_message("user"):
74 | st.markdown(prompt)
75 |
76 | # render the assistant's response
77 | with st.chat_message("assistant"):
78 | retrival_container = st.container()
79 | message_placeholder = st.empty()
80 |
81 | retrieval_status = retrival_container.status("**Context Retrieval**")
82 | queried_questions = []
83 | rendered_questions = set()
84 | def update_retrieval_status():
85 | for q in queried_questions:
86 | if q in rendered_questions:
87 | continue
88 | rendered_questions.add(q)
89 | retrieval_status.markdown(f"\n\n`- {q}`")
90 | def retrieval_cb(qs):
91 | for q in qs:
92 | if q not in queried_questions:
93 | queried_questions.append(q)
94 | return qs
95 |
96 | # get the chain with the retrieval callback
97 | custom_chain = get_rag_chain_func(chosen_file, retrieval_cb=retrieval_cb)
98 |
99 | if "messages" in st.session_state:
100 | chat_history = [convert_message(m) for m in st.session_state.messages[:-1]]
101 | else:
102 | chat_history = []
103 |
104 | full_response = ""
105 | for response in custom_chain.stream(
106 | {"input": prompt, "chat_history": chat_history}
107 | ):
108 | if "output" in response:
109 | full_response += response["output"]
110 | else:
111 | full_response += response.content
112 |
113 | message_placeholder.markdown(full_response + "▌")
114 | update_retrieval_status()
115 |
116 | retrieval_status.update(state="complete")
117 | message_placeholder.markdown(full_response)
118 |
119 | # add the full response to the message history
120 | st.session_state.messages.append({"role": "assistant", "content": full_response})
121 |
--------------------------------------------------------------------------------
/embed_pdf.py:
--------------------------------------------------------------------------------
1 | from langchain.document_loaders import PagedPDFSplitter
2 | from langchain.text_splitter import RecursiveCharacterTextSplitter
3 | from langchain.embeddings.openai import OpenAIEmbeddings
4 | from langchain.vectorstores import FAISS
5 |
6 | import os
7 |
8 |
9 | def embed_document(file_name, file_folder="pdf", embedding_folder="index"):
10 | file_path = f"{file_folder}/{file_name}"
11 | loader = PagedPDFSplitter(file_path)
12 | source_pages = loader.load_and_split()
13 |
14 | embedding_func = OpenAIEmbeddings()
15 | text_splitter = RecursiveCharacterTextSplitter(
16 | chunk_size=500,
17 | chunk_overlap=100,
18 | length_function=len,
19 | is_separator_regex=False,
20 | separators=["\n\n", "\n", " ", ""],
21 | )
22 | source_chunks = text_splitter.split_documents(source_pages)
23 | search_index = FAISS.from_documents(source_chunks, embedding_func)
24 | search_index.save_local(
25 | folder_path=embedding_folder, index_name=file_name + ".index"
26 | )
27 |
28 |
29 | def embed_all_pdf_docs():
30 | # Define the directory path
31 | pdf_directory = "pdf"
32 |
33 | # Check if the directory exists
34 | if os.path.exists(pdf_directory):
35 | # List all PDF files in the directory
36 | pdf_files = [
37 | file for file in os.listdir(pdf_directory) if file.endswith(".pdf")
38 | ]
39 |
40 | if pdf_files:
41 | for pdf_file in pdf_files:
42 | print(f"Embedding {pdf_file}...")
43 | embed_document(file_name=pdf_file, file_folder=pdf_directory)
44 | print("Done!")
45 | else:
46 | raise Exception("No PDF files found in the directory.")
47 | else:
48 | raise Exception(f"Directory '{pdf_directory}' does not exist.")
49 |
50 |
51 | def get_all_index_files():
52 | # Define the directory path
53 | index_directory = "index"
54 |
55 | # Check if the directory exists
56 | if os.path.exists(index_directory):
57 | # List all index files in the directory
58 | postfix = ".index.faiss"
59 | index_files = [
60 | file.replace(postfix, "")
61 | for file in os.listdir(index_directory)
62 | if file.endswith(postfix)
63 | ]
64 |
65 | if index_files:
66 | return index_files
67 | else:
68 | raise Exception("No index files found in the directory.")
69 | else:
70 | raise Exception(f"Directory '{index_directory}' does not exist.")
71 |
--------------------------------------------------------------------------------
/index/Mahmoudi_Nima_202202_PhD.pdf.index.faiss:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nimamahmoudi/LLMStreamlitDemoBasic/0d466282626dba8a1951534c1fbd9719caaa35fd/index/Mahmoudi_Nima_202202_PhD.pdf.index.faiss
--------------------------------------------------------------------------------
/index/Mahmoudi_Nima_202202_PhD.pdf.index.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nimamahmoudi/LLMStreamlitDemoBasic/0d466282626dba8a1951534c1fbd9719caaa35fd/index/Mahmoudi_Nima_202202_PhD.pdf.index.pkl
--------------------------------------------------------------------------------
/index/NIPS-2017-attention-is-all-you-need-Paper.pdf.index.faiss:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nimamahmoudi/LLMStreamlitDemoBasic/0d466282626dba8a1951534c1fbd9719caaa35fd/index/NIPS-2017-attention-is-all-you-need-Paper.pdf.index.faiss
--------------------------------------------------------------------------------
/index/NIPS-2017-attention-is-all-you-need-Paper.pdf.index.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nimamahmoudi/LLMStreamlitDemoBasic/0d466282626dba8a1951534c1fbd9719caaa35fd/index/NIPS-2017-attention-is-all-you-need-Paper.pdf.index.pkl
--------------------------------------------------------------------------------
/llm_helper.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | # langchain imports
4 | from langchain.chat_models import ChatOpenAI
5 | from langchain.schema.runnable import RunnableMap
6 | from langchain.prompts.prompt import PromptTemplate
7 | from langchain.prompts import ChatPromptTemplate
8 | from langchain.schema.runnable import RunnablePassthrough
9 | from langchain.schema.output_parser import StrOutputParser
10 | from operator import itemgetter
11 | from langchain.schema.messages import HumanMessage, SystemMessage, AIMessage
12 | from langchain.callbacks.streamlit.streamlit_callback_handler import StreamlitCallbackHandler
13 |
14 |
15 | def format_docs(docs):
16 | res = ""
17 | # res = str(docs)
18 | for doc in docs:
19 | escaped_page_content = doc.page_content.replace("\n", "\\n")
20 | res += "\n"
21 | res += f" {escaped_page_content}\n"
22 | for m in doc.metadata:
23 | res += f" <{m}>{doc.metadata[m]}{m}>\n"
24 | res += "\n"
25 | return res
26 |
27 |
28 | def get_search_index(file_name="Mahmoudi_Nima_202202_PhD.pdf", index_folder="index"):
29 | # load embeddings
30 | from langchain.vectorstores import FAISS
31 | from langchain.embeddings.openai import OpenAIEmbeddings
32 |
33 | search_index = FAISS.load_local(
34 | folder_path=index_folder,
35 | index_name=file_name + ".index",
36 | embeddings=OpenAIEmbeddings(),
37 | )
38 | return search_index
39 |
40 |
41 | def convert_message(m):
42 | if m["role"] == "user":
43 | return HumanMessage(content=m["content"])
44 | elif m["role"] == "assistant":
45 | return AIMessage(content=m["content"])
46 | elif m["role"] == "system":
47 | return SystemMessage(content=m["content"])
48 | else:
49 | raise ValueError(f"Unknown role {m['role']}")
50 |
51 |
52 | _condense_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
53 |
54 | Chat History:
55 | {chat_history}
56 | Follow Up Input: {input}
57 | Standalone question:"""
58 | CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_condense_template)
59 |
60 | _rag_template = """Answer the question based only on the following context, citing the page number(s) of the document(s) you used to answer the question:
61 | {context}
62 |
63 | Question: {question}
64 | """
65 | ANSWER_PROMPT = ChatPromptTemplate.from_template(_rag_template)
66 |
67 |
68 | def _format_chat_history(chat_history):
69 | def format_single_chat_message(m):
70 | if type(m) is HumanMessage:
71 | return "Human: " + m.content
72 | elif type(m) is AIMessage:
73 | return "Assistant: " + m.content
74 | elif type(m) is SystemMessage:
75 | return "System: " + m.content
76 | else:
77 | raise ValueError(f"Unknown role {m['role']}")
78 |
79 | return "\n".join([format_single_chat_message(m) for m in chat_history])
80 |
81 | def get_standalone_question_from_chat_history_chain():
82 | _inputs = RunnableMap(
83 | standalone_question=RunnablePassthrough.assign(
84 | chat_history=lambda x: _format_chat_history(x["chat_history"])
85 | )
86 | | CONDENSE_QUESTION_PROMPT
87 | | ChatOpenAI(temperature=0)
88 | | StrOutputParser(),
89 | )
90 | return _inputs
91 |
92 | def get_rag_chain(file_name="Mahmoudi_Nima_202202_PhD.pdf", index_folder="index", retrieval_cb=None):
93 | vectorstore = get_search_index(file_name, index_folder)
94 | retriever = vectorstore.as_retriever()
95 |
96 | if retrieval_cb is None:
97 | retrieval_cb = lambda x: x
98 |
99 | def context_update_fn(q):
100 | retrieval_cb([q])
101 | return q
102 |
103 | _inputs = RunnableMap(
104 | standalone_question=RunnablePassthrough.assign(
105 | chat_history=lambda x: _format_chat_history(x["chat_history"])
106 | )
107 | | CONDENSE_QUESTION_PROMPT
108 | | ChatOpenAI(temperature=0)
109 | | StrOutputParser(),
110 | )
111 | _context = {
112 | "context": itemgetter("standalone_question") | RunnablePassthrough(context_update_fn) | retriever | format_docs,
113 | "question": lambda x: x["standalone_question"],
114 | }
115 | conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | ChatOpenAI()
116 | return conversational_qa_chain
117 |
118 |
119 | # RAG fusion chain
120 | # source1: https://youtu.be/GchC5WxeXGc?si=6i7J0rPZI7SNwFYZ
121 | # source2: https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1
122 | def reciprocal_rank_fusion(results: list[list], k=60):
123 | from langchain.load import dumps, loads
124 | fused_scores = {}
125 | for docs in results:
126 | # Assumes the docs are returned in sorted order of relevance
127 | for rank, doc in enumerate(docs):
128 | doc_str = dumps(doc)
129 | if doc_str not in fused_scores:
130 | fused_scores[doc_str] = 0
131 | fused_scores[doc_str] += 1 / (rank + k)
132 |
133 | reranked_results = [
134 | (loads(doc), score)
135 | for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
136 | ]
137 | return reranked_results
138 |
139 |
140 | def get_search_query_generation_chain():
141 | from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
142 | prompt = ChatPromptTemplate(
143 | input_variables=['original_query'],
144 | messages=[
145 | SystemMessagePromptTemplate(
146 | prompt=PromptTemplate(
147 | input_variables=[],
148 | template='You are a helpful assistant that generates multiple search queries based on a single input query.'
149 | )
150 | ),
151 | HumanMessagePromptTemplate(
152 | prompt=PromptTemplate(
153 | input_variables=['original_query'],
154 | template='Generate multiple search queries related to: {original_query} \n OUTPUT (4 queries):'
155 | )
156 | )
157 | ]
158 | )
159 |
160 | generate_queries = (
161 | prompt |
162 | ChatOpenAI(temperature=0) |
163 | StrOutputParser() |
164 | (lambda x: x.split("\n"))
165 | )
166 |
167 | return generate_queries
168 |
169 | def get_rag_fusion_chain(file_name="Mahmoudi_Nima_202202_PhD.pdf", index_folder="index", retrieval_cb=None):
170 | vectorstore = get_search_index(file_name, index_folder)
171 | retriever = vectorstore.as_retriever()
172 | query_generation_chain = get_search_query_generation_chain()
173 | _inputs = RunnableMap(
174 | standalone_question=RunnablePassthrough.assign(
175 | chat_history=lambda x: _format_chat_history(x["chat_history"])
176 | )
177 | | CONDENSE_QUESTION_PROMPT
178 | | ChatOpenAI(temperature=0)
179 | | StrOutputParser(),
180 | )
181 |
182 | if retrieval_cb is None:
183 | retrieval_cb = lambda x: x
184 |
185 | _context = {
186 | "context":
187 | RunnablePassthrough.assign(
188 | original_query=lambda x: x["standalone_question"]
189 | )
190 | | query_generation_chain
191 | | retrieval_cb
192 | | retriever.map()
193 | | reciprocal_rank_fusion
194 | | (lambda x: [item[0] for item in x])
195 | | format_docs,
196 | "question": lambda x: x["standalone_question"],
197 | }
198 | conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | ChatOpenAI()
199 | return conversational_qa_chain
200 |
201 |
202 | ####################
203 | # Adding agent chain with OpenAI function calling
204 |
205 | def get_search_tool_from_index(search_index, st_cb: Optional[StreamlitCallbackHandler] = None, ):
206 | from langchain.agents import tool
207 | from agent_helper import retry_and_streamlit_callback
208 |
209 | @tool
210 | @retry_and_streamlit_callback(st_cb=st_cb, tool_name="Content Seach Tool")
211 | def search(query: str) -> str:
212 | """Search the contents of the source document for the queries."""
213 |
214 | docs = search_index.similarity_search(query, k=5)
215 | return format_docs(docs)
216 |
217 | return search
218 |
219 | def get_lc_oai_tools(file_name:str = "Mahmoudi_Nima_202202_PhD.pdf", index_folder: str = "index", st_cb: Optional[StreamlitCallbackHandler] = None, ):
220 | from langchain.tools.render import format_tool_to_openai_tool
221 | search_index = get_search_index(file_name, index_folder)
222 | lc_tools = [get_search_tool_from_index(search_index=search_index, st_cb=st_cb)]
223 | oai_tools = [format_tool_to_openai_tool(t) for t in lc_tools]
224 | return lc_tools, oai_tools
225 |
226 | def get_agent_chain(file_name="Mahmoudi_Nima_202202_PhD.pdf", index_folder="index", callbacks=None, st_cb: Optional[StreamlitCallbackHandler] = None, ):
227 | if callbacks is None:
228 | callbacks = []
229 |
230 | from langchain.agents import initialize_agent, AgentType
231 | from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
232 | from langchain.agents.format_scratchpad.openai_tools import (
233 | format_to_openai_tool_messages,
234 | )
235 | from langchain.agents import AgentExecutor
236 | from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
237 |
238 | lc_tools, oai_tools = get_lc_oai_tools(file_name, index_folder, st_cb)
239 |
240 |
241 | prompt = ChatPromptTemplate.from_messages(
242 | [
243 | ("system", "You are a helpful assistant, use the search tool to answer the user's question and cite only the page number when you use information coming (like [p1]) from the source document.\nchat history: {chat_history}"),
244 | ("user", "{input}"),
245 | MessagesPlaceholder(variable_name="agent_scratchpad"),
246 | ]
247 | )
248 | llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-1106")
249 |
250 | agent = (
251 | {
252 | "input": lambda x: x["input"],
253 | "agent_scratchpad": lambda x: format_to_openai_tool_messages(
254 | x["intermediate_steps"]
255 | ),
256 | "chat_history": lambda x: _format_chat_history(x["chat_history"]),
257 | }
258 | | prompt
259 | | llm.bind(tools=oai_tools)
260 | | OpenAIToolsAgentOutputParser()
261 | )
262 |
263 | agent_executor = AgentExecutor(agent=agent, tools=lc_tools, verbose=True, callbacks=callbacks)
264 | return agent_executor
265 |
266 |
267 | if __name__ == "__main__":
268 | question_generation_chain = get_search_query_generation_chain()
269 | print('='*50)
270 | print('RAG Chain')
271 | chain = get_rag_chain()
272 | print(chain.invoke({'input': 'serverless computing', 'chat_history': []}))
273 |
274 | print('='*50)
275 | print('Question Generation Chain')
276 | print(question_generation_chain.invoke({'original_query': 'serverless computing'}))
277 |
278 | print('-'*50)
279 | print('RAG Fusion Chain')
280 | chain = get_rag_fusion_chain()
281 | print(chain.invoke({'input': 'serverless computing', 'chat_history': []}))
282 |
283 | agent_executor = get_agent_chain()
284 | print(
285 | agent_executor.invoke({
286 | "input": "based on the source document, compare FaaS with BaaS??",
287 | "chat_history": [],
288 | })
289 | )
290 |
--------------------------------------------------------------------------------
/pdf/.gitignore:
--------------------------------------------------------------------------------
1 | *.pdf
2 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "llm-streamlit-demo-basic"
3 | version = "0.1.0"
4 | description = ""
5 | authors = ["Nima Mahmoudi "]
6 |
7 | [tool.poetry.dependencies]
8 | python = ">=3.10.0,<3.11"
9 | langchain = "^0.0.321"
10 | openai = "^0.28.1"
11 | streamlit = "^1.27.2"
12 | faiss-cpu = "^1.7.4"
13 | tiktoken = "^0.5.1"
14 | langchainhub = "^0.1.13"
15 | pypdf = "^3.17.0"
16 |
17 | [tool.pyright]
18 | # https://github.com/microsoft/pyright/blob/main/docs/configuration.md
19 | useLibraryCodeForTypes = true
20 | exclude = [".cache"]
21 |
22 | [tool.ruff]
23 | # https://beta.ruff.rs/docs/configuration/
24 | select = ['E', 'W', 'F', 'I', 'B', 'C4', 'ARG', 'SIM']
25 | ignore = ['W291', 'W292', 'W293']
26 |
27 | [build-system]
28 | requires = ["poetry-core>=1.0.0"]
29 | build-backend = "poetry.core.masonry.api"
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain
2 | openai
3 | streamlit
4 | faiss-cpu
5 | tiktoken
6 | langchainhub
7 | pypdf
--------------------------------------------------------------------------------