├── .env.example
├── .gitignore
├── .streamlit
└── config.toml
├── Dockerfile
├── LICENSE
├── README.md
├── demo_app
├── __init__.py
├── components
│ ├── __init__.py
│ ├── faq.py
│ └── sidebar.py
└── main.py
├── docker-compose.yml
├── poetry.lock
├── poetry.toml
├── pyproject.toml
├── requirements.txt
└── ui.PNG
/.env.example:
--------------------------------------------------------------------------------
1 | HUGGINGFACE_API_KEY=
2 | OPENAI_API_KEY=
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Local data
2 | data/local_data/
3 |
4 | # Secrets
5 | .streamlit/secrets.toml
6 |
7 | # VSCode
8 | .vscode/
9 |
10 | # TODO
11 | TODO.md
12 |
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | *.py[cod]
16 | *$py.class
17 |
18 | # C extensions
19 | *.so
20 |
21 | # Distribution / packaging
22 | .Python
23 | build/
24 | develop-eggs/
25 | dist/
26 | downloads/
27 | eggs/
28 | .eggs/
29 | lib/
30 | lib64/
31 | parts/
32 | sdist/
33 | var/
34 | wheels/
35 | share/python-wheels/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 | MANIFEST
40 |
41 | # PyInstaller
42 | # Usually these files are written by a python script from a template
43 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
44 | *.manifest
45 | *.spec
46 |
47 | # Installer logs
48 | pip-log.txt
49 | pip-delete-this-directory.txt
50 |
51 | # Unit test / coverage reports
52 | htmlcov/
53 | .tox/
54 | .nox/
55 | .coverage
56 | .coverage.*
57 | .cache
58 | nosetests.xml
59 | coverage.xml
60 | *.cover
61 | *.py,cover
62 | .hypothesis/
63 | .pytest_cache/
64 | cover/
65 |
66 | # Translations
67 | *.mo
68 | *.pot
69 |
70 | # Django stuff:
71 | *.log
72 | local_settings.py
73 | db.sqlite3
74 | db.sqlite3-journal
75 |
76 | # Flask stuff:
77 | instance/
78 | .webassets-cache
79 |
80 | # Scrapy stuff:
81 | .scrapy
82 |
83 | # Sphinx documentation
84 | docs/_build/
85 |
86 | # PyBuilder
87 | .pybuilder/
88 | target/
89 |
90 | # Jupyter Notebook
91 | .ipynb_checkpoints
92 |
93 | # IPython
94 | profile_default/
95 | ipython_config.py
96 |
97 | # pdm
98 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
99 | #pdm.lock
100 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
101 | # in version control.
102 | # https://pdm.fming.dev/#use-with-ide
103 | .pdm.toml
104 |
105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
106 | __pypackages__/
107 |
108 | # Celery stuff
109 | celerybeat-schedule
110 | celerybeat.pid
111 |
112 | # SageMath parsed files
113 | *.sage.py
114 |
115 | # Environments
116 | .env
117 | .venv
118 | env/
119 | venv/
120 | ENV/
121 | env.bak/
122 | venv.bak/
123 |
124 | # Spyder project settings
125 | .spyderproject
126 | .spyproject
127 |
128 | # Rope project settings
129 | .ropeproject
130 |
131 | # mkdocs documentation
132 | /site
133 |
134 | # mypy
135 | .mypy_cache/
136 | .dmypy.json
137 | dmypy.json
138 |
139 | # Pyre type checker
140 | .pyre/
141 |
142 | # pytype static type analyzer
143 | .pytype/
144 |
145 | # Cython debug symbols
146 | cython_debug/
147 |
148 | # PyCharm
149 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
150 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
151 | # and can be added to the global gitignore or merged into this file. For a more nuclear
152 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
153 | .idea/
154 | credentials.json
155 | token.json
156 |
157 | /db/
158 |
--------------------------------------------------------------------------------
/.streamlit/config.toml:
--------------------------------------------------------------------------------
1 | [server]
2 | maxUploadSize = 15
3 | runOnSave = true
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # The builder image, used to build the virtual environment
2 | FROM python:3.11-buster as builder
3 |
4 | RUN apt-get update && apt-get install -y git
5 |
6 | RUN pip install poetry==1.4.2
7 |
8 | ENV POETRY_NO_INTERACTION=1 \
9 | POETRY_VIRTUALENVS_IN_PROJECT=1 \
10 | POETRY_VIRTUALENVS_CREATE=1 \
11 | POETRY_CACHE_DIR=/tmp/poetry_cache
12 |
13 | ENV HOST=0.0.0.0
14 | ENV LISTEN_PORT 8080
15 | EXPOSE 8080
16 |
17 | WORKDIR /app
18 |
19 | COPY pyproject.toml poetry.lock ./
20 |
21 | RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR
22 |
23 | # The runtime image, used to just run the code provided its virtual environment
24 | FROM python:3.11-slim-buster as runtime
25 |
26 | ENV VIRTUAL_ENV=/app/.venv \
27 | PATH="/app/.venv/bin:$PATH"
28 |
29 | COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
30 |
31 | COPY ./demo_app ./demo_app
32 | COPY ./.streamlit ./.streamlit
33 |
34 | CMD ["streamlit", "run", "demo_app/main.py", "--server.port", "8080"]
35 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 DR. AMJAD RAZA
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | 📖 EmbedChain-Streamlit-Docker App Template
3 |
4 |
5 | [](https://youtu.be/yJAWB13FhYQ)
6 |
7 | [https://youtu.be/yJAWB13FhYQ](https://youtu.be/yJAWB13FhYQ)
8 |
9 |
10 | ## 🔧 Features
11 |
12 | - Basic Skeleton App configured with `openai` API
13 | - A ChatBot using embedchain and Streamlit
14 | - Docker Support with Optimisation Cache etc
15 | - Deployment on Streamlit Public Cloud
16 |
17 | This repo contains an `main.py` file which has a template for a chatbot implementation.
18 |
19 | ## Example Input Data:
20 |
21 | Select the number of Data Sources from slider and enter the details.
22 |
23 |
24 | | Source | URL |
25 | | -------- | ------- |
26 | | youtube | https://www.youtube.com/watch?v=3qHkcs3kG44 |
27 | | pdf_file |https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf |
28 | | web | https://nav.al/feedback |
29 | |qna_pair| "Who is Naval Ravikant?", "Naval Ravikant is an Indian-American entrepreneur and investor." |
30 |
31 | **Question:** What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?
32 |
33 |
34 | ## Adding your chain
35 | To add your chain, you need to change the `load_chain` function in `main.py`.
36 | Depending on the type of your chain, you may also need to change the inputs/outputs that occur later on.
37 |
38 |
39 | ## 💻 Running Locally
40 |
41 | 1. Clone the repository📂
42 |
43 | ```bash
44 | git clone https://github.com/amjadraza/embedchain-streamlit-app.git
45 | ```
46 |
47 | 2. Install dependencies with [Poetry](https://python-poetry.org/) and activate virtual environment🔨
48 |
49 | ```bash
50 | poetry install
51 | poetry shell
52 | ```
53 |
54 | 3. Run the Streamlit server🚀
55 |
56 | ```bash
57 | streamlit run demo_app/main.py
58 | ```
59 |
60 | Run App using Docker
61 | --------------------
62 | This project includes `Dockerfile` to run the app in Docker container. In order to optimise the Docker Image
63 | size and building time with cache techniques, I have follow tricks in below Article
64 | https://medium.com/@albertazzir/blazing-fast-python-docker-builds-with-poetry-a78a66f5aed0
65 |
66 | Build the docker container
67 |
68 | ``docker build . -t embedchain-streamlit-app:latest ``
69 |
70 | To generate Image with `DOCKER_BUILDKIT`, follow below command
71 |
72 | ```DOCKER_BUILDKIT=1 docker build --target=runtime . -t embedchain-streamlit-app:latest```
73 |
74 | 1. Run the docker container directly
75 |
76 | ``docker run -d --name embedchain-streamlit-app -p 8080:8080 embedchain-streamlit-app:latest ``
77 |
78 | 2. Run the docker container using docker-compose (Recommended)
79 |
80 | ``docker-compose up``
81 |
82 |
83 | Deploy App on Streamlit Public Cloud
84 | ------------------------------------
85 | This app can be deployed on Streamlit Public Cloud using GitHub. Below is the Link to
86 | Publicly deployed App
87 |
88 | https://embedchain.streamlit.app/
89 |
90 |
91 |
92 | ## Report Feedbacks
93 |
94 | As `embedchain-streamlit-app:latest` is a template project with minimal example. Report issues if you face any.
95 |
96 | ## DISCLAIMER
97 |
98 | This is a template App, when using with openai_api key, you will be charged a nominal fee depending
99 | on number of prompts etc.
--------------------------------------------------------------------------------
/demo_app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amjadraza/embedchain-streamlit-app/5f8f16772c9c33f5ae5d35af95ca11c7abeae4d5/demo_app/__init__.py
--------------------------------------------------------------------------------
/demo_app/components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amjadraza/embedchain-streamlit-app/5f8f16772c9c33f5ae5d35af95ca11c7abeae4d5/demo_app/components/__init__.py
--------------------------------------------------------------------------------
/demo_app/components/faq.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | import streamlit as st
3 |
4 |
5 | def faq():
6 | st.markdown(
7 | """
8 | # FAQ
9 | ## What is embedchain?
10 | embedchain is a framework to digest data from various sources, video, text, pdf etc and
11 | prepares the Chat-Bot
12 |
13 |
14 | ## What Libraries are being use?
15 | Basic Setup is using embedchain, streamlit and openai.
16 |
17 | ## How is the Data Ingestion Works?
18 | Fill the Data Input form with URL of relevant Data and submit. Data is
19 | downloaded, chuncked and embeddings are stored in local db
20 |
21 | ## Bot Response
22 | Having Data Ingestion complete, users can ask the questions, relevant to data.
23 |
24 | ## Disclaimer?
25 | This is a template App, when using with openai_api key, you will be charged a nominal fee depending
26 | on number of prompts etc.
27 |
28 | """
29 | )
30 |
--------------------------------------------------------------------------------
/demo_app/components/sidebar.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import os
3 | from dotenv import load_dotenv
4 |
5 | from demo_app.components.faq import faq
6 |
7 | def set_open_api_key(api_key: str):
8 | st.session_state["OPENAI_API_KEY"] = api_key
9 | os.environ["OPENAI_API_KEY"] = api_key
10 | st.session_state["OPENAI_API_CONFIGURED"] = True
11 | print('OPENAI API key is Configured Successfully!')
12 |
13 |
14 | def sidebar():
15 | load_dotenv()
16 | with st.sidebar:
17 |
18 | try:
19 | api_key = os.getenv("OPENAI_API_KEY")
20 | os.environ["OPENAI_API_KEY"] = api_key
21 | st.session_state["OPENAI_API_CONFIGURED"] = True
22 | st.markdown("Open API Key Configured!")
23 | except:
24 | st.markdown(
25 | "## How to use\n"
26 | "1. Enter your [OpenAI API key](https://platform.openai.com/account/api-keys) below🔑\n"
27 | # noqa: E501
28 | )
29 | open_api_key_input = st.text_input(
30 | "Openai API Key",
31 | type="password",
32 | placeholder="Paste your API key here (sk-...)",
33 | help="You can get your API key from https://platform.openai.com/account/api-keys.",
34 | # noqa: E501
35 | value=st.session_state.get("OPEN_API_KEY", ""),
36 | )
37 | if open_api_key_input:
38 | set_open_api_key(open_api_key_input)
39 |
40 | if not st.session_state.get("OPENAI_API_CONFIGURED"):
41 | st.error("Please configure your Open API key!")
42 | else:
43 | st.markdown("Open API Key Configured!")
44 |
45 | st.markdown("---")
46 | st.markdown("# About")
47 | st.markdown(
48 | "📖 This App is template of embedchain-streamlit-app example"
49 | )
50 | st.markdown("Made by [DR. AMJAD RAZA](https://www.linkedin.com/in/amjadraza/)")
51 | st.markdown("embedchain: https://github.com/embedchain/embedchain")
52 | st.markdown("---")
53 |
54 | st.markdown(" # Example: Demo Data")
55 | st.markdown(
56 | """
57 | | Source | URL |
58 | | -------- | ------- |
59 | | youtube | https://www.youtube.com/watch?v=3qHkcs3kG44 |
60 | | pdf_file |https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf |
61 | | web | https://nav.al/feedback |
62 | |qna_pair| "Who is Naval Ravikant?", "Naval Ravikant is an Indian-American entrepreneur and investor." |
63 |
64 | Question: What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?
65 | """
66 | )
67 |
68 | faq()
69 |
--------------------------------------------------------------------------------
/demo_app/main.py:
--------------------------------------------------------------------------------
1 | """Python file to serve as the frontend"""
2 | import sys
3 | import os
4 | import time
5 |
6 | sys.path.append(os.path.abspath('.'))
7 |
8 | import streamlit as st
9 | from demo_app.components.sidebar import sidebar
10 |
11 | def ingest_data_dynamic(n):
12 | print(f'Number of Data Sources are {n}')
13 | for r in range(n):
14 | url_= st.session_state.get(f"value_{r}")
15 | print(f"Ingestion {r+1}/{n}: {url_}")
16 | naval_chat_bot.add(url_)
17 |
18 | st.session_state["IS_BOT_READY"] = True
19 |
20 |
21 | def response_embedchain(query):
22 | """Logic for loading the chain you want to use should go here."""
23 | print(f'Calling response on: {query}')
24 | response = naval_chat_bot.query(query)
25 | return response
26 |
27 | def add_data_form(r):
28 | st.session_state[f"url_{r}"] = [st.session_state.get(f"value_{r}")]
29 | print(st.session_state.get(f"{r}"))
30 |
31 |
32 | def add_form_row(row):
33 | # Inputs listed within a form
34 | # loaders_type = ["youtube_video", "pdf_file", "web_page", "qna_pair", "text"]
35 | data_form = st.form(key=f'{row}-Form')
36 | with data_form:
37 | data_columns = st.columns(1)
38 | with data_columns[0]:
39 | st.text_input(f"Enter Doc URL: {row}",
40 | value="https://www.youtube.com/watch?v=3qHkcs3kG44",
41 | key=f"value_{row}")
42 | st.form_submit_button(on_click=add_data_form(row))
43 |
44 |
45 | def provide_data_dynamic():
46 |
47 | with st.expander("Sources Data Form", expanded=st.session_state["expander_state"]):
48 | num_data_sources = st.slider('Number of Data Sources', min_value=1, max_value=10)
49 | for r in range(num_data_sources):
50 | add_form_row(r)
51 | submit_data_form = st.button("Submit Data", on_click=toggle_closed)
52 | if submit_data_form:
53 | st.session_state["submit_data_form"] = True
54 | return num_data_sources
55 |
56 |
57 | def toggle_closed():
58 | st.session_state["expander_state"] = False
59 |
60 |
61 | if __name__ == "__main__":
62 |
63 | st.set_page_config(
64 | page_title="💂♂️: EmbedChain Demo",
65 | page_icon="💂♂️",
66 | layout="wide",
67 | initial_sidebar_state="expanded", )
68 | st.header("📖 Private Knowledge Store: EmbedChain Demo")
69 |
70 | sidebar()
71 |
72 | if "expander_state" not in st.session_state:
73 | st.session_state["expander_state"] = True
74 |
75 | # data_dict = provide_data_urls()
76 | num_data_sources = provide_data_dynamic()
77 |
78 | if not st.session_state.get("OPENAI_API_CONFIGURED"):
79 | st.error("Please configure your API Keys!")
80 |
81 | if not st.session_state.get("submit_data_form"):
82 | st.error("Please Submit the Data Form")
83 |
84 | if st.session_state.get("OPENAI_API_CONFIGURED") and st.session_state.get("submit_data_form"):
85 | st.markdown("Main App: Started")
86 | from embedchain import App as ecApp
87 | naval_chat_bot = ecApp()
88 | # ingesting data
89 | if not st.session_state.get("IS_BOT_READY"):
90 | with st.spinner('Wait for DATA Ingestion'):
91 | # ingest_data(data_dict)
92 | ingest_data_dynamic(num_data_sources)
93 | st.success('Data Ingestion Done!')
94 |
95 | if st.session_state.get("IS_BOT_READY"):
96 |
97 | if "messages" not in st.session_state:
98 | st.session_state["messages"] = [
99 | {"role": "assistant", "content": "How can I help you?"}]
100 |
101 | # Display chat messages from history on app rerun
102 | for message in st.session_state.messages:
103 | with st.chat_message(message["role"]):
104 | st.markdown(message["content"])
105 |
106 | if user_input := st.chat_input("What is your question?"):
107 | # Add user message to chat history
108 | st.session_state.messages.append({"role": "user", "content": user_input})
109 | # Display user message in chat message container
110 | with st.chat_message("user"):
111 | st.markdown(user_input)
112 | # Display assistant response in chat message container
113 | with st.chat_message("assistant"):
114 | message_placeholder = st.empty()
115 | full_response = ""
116 | with st.chat_message("assistant"):
117 | message_placeholder = st.empty()
118 | full_response = ""
119 |
120 | with st.spinner('CHAT-BOT is at Work ...'):
121 | assistant_response = response_embedchain(user_input)
122 | # Simulate stream of response with milliseconds delay
123 | for chunk in assistant_response.split():
124 | full_response += chunk + " "
125 | time.sleep(0.05)
126 | # Add a blinking cursor to simulate typing
127 | message_placeholder.markdown(full_response + "▌")
128 | message_placeholder.markdown(full_response)
129 | st.session_state.messages.append({"role": "assistant", "content": full_response})
130 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | langchain-chat-app:
4 | image: embedchain-streamlit-app:latest
5 | build: ./app
6 | command: streamlit run demo_app/main.py --server.port 8080
7 | volumes:
8 | - ./demo_app/:/app/demo_app
9 | ports:
10 | - 8080:8080
11 |
--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | in-project = true
3 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "embedchain-streamlit-app"
3 | version = "0.1.0"
4 | description = "A template embedchain Streamlit App with Docker"
5 | authors = ["Amjad Raza"]
6 | license = "MIT"
7 | readme = "README.md"
8 | packages = [{include = "embedchain-streamlit-app"}]
9 |
10 |
11 | [tool.poetry.dependencies]
12 | python = "^3.10"
13 | embedchain = "^0.0.52"
14 | streamlit = "^1.26.0"
15 |
16 |
17 | [build-system]
18 | requires = ["poetry-core"]
19 | build-backend = "poetry.core.masonry.api"
20 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | embedchain >= 0.0.11
2 | streamlit >= 1.24.0
3 |
--------------------------------------------------------------------------------
/ui.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amjadraza/embedchain-streamlit-app/5f8f16772c9c33f5ae5d35af95ca11c7abeae4d5/ui.PNG
--------------------------------------------------------------------------------