├── .env.example ├── .gitignore ├── .streamlit └── config.toml ├── Dockerfile ├── LICENSE ├── README.md ├── demo_app ├── __init__.py ├── components │ ├── __init__.py │ ├── faq.py │ └── sidebar.py └── main.py ├── docker-compose.yml ├── poetry.lock ├── poetry.toml ├── pyproject.toml ├── requirements.txt └── ui.PNG /.env.example: -------------------------------------------------------------------------------- 1 | HUGGINGFACE_API_KEY= 2 | OPENAI_API_KEY= -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Local data 2 | data/local_data/ 3 | 4 | # Secrets 5 | .streamlit/secrets.toml 6 | 7 | # VSCode 8 | .vscode/ 9 | 10 | # TODO 11 | TODO.md 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | wheels/ 35 | share/python-wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | MANIFEST 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .nox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *.cover 61 | *.py,cover 62 | .hypothesis/ 63 | .pytest_cache/ 64 | cover/ 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | local_settings.py 73 | db.sqlite3 74 | db.sqlite3-journal 75 | 76 | # Flask stuff: 77 | instance/ 78 | .webassets-cache 79 | 80 | # Scrapy stuff: 81 | .scrapy 82 | 83 | # Sphinx documentation 84 | docs/_build/ 85 | 86 | # PyBuilder 87 | .pybuilder/ 88 | target/ 89 | 90 | # Jupyter Notebook 91 | .ipynb_checkpoints 92 | 93 | # IPython 94 | profile_default/ 95 | ipython_config.py 96 | 97 | # pdm 98 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 99 | #pdm.lock 100 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 101 | # in version control. 102 | # https://pdm.fming.dev/#use-with-ide 103 | .pdm.toml 104 | 105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 106 | __pypackages__/ 107 | 108 | # Celery stuff 109 | celerybeat-schedule 110 | celerybeat.pid 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # Environments 116 | .env 117 | .venv 118 | env/ 119 | venv/ 120 | ENV/ 121 | env.bak/ 122 | venv.bak/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | .spyproject 127 | 128 | # Rope project settings 129 | .ropeproject 130 | 131 | # mkdocs documentation 132 | /site 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # Pyre type checker 140 | .pyre/ 141 | 142 | # pytype static type analyzer 143 | .pytype/ 144 | 145 | # Cython debug symbols 146 | cython_debug/ 147 | 148 | # PyCharm 149 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 150 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 151 | # and can be added to the global gitignore or merged into this file. For a more nuclear 152 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 153 | .idea/ 154 | credentials.json 155 | token.json 156 | 157 | /db/ 158 | -------------------------------------------------------------------------------- /.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [server] 2 | maxUploadSize = 15 3 | runOnSave = true -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # The builder image, used to build the virtual environment 2 | FROM python:3.11-buster as builder 3 | 4 | RUN apt-get update && apt-get install -y git 5 | 6 | RUN pip install poetry==1.4.2 7 | 8 | ENV POETRY_NO_INTERACTION=1 \ 9 | POETRY_VIRTUALENVS_IN_PROJECT=1 \ 10 | POETRY_VIRTUALENVS_CREATE=1 \ 11 | POETRY_CACHE_DIR=/tmp/poetry_cache 12 | 13 | ENV HOST=0.0.0.0 14 | ENV LISTEN_PORT 8080 15 | EXPOSE 8080 16 | 17 | WORKDIR /app 18 | 19 | COPY pyproject.toml poetry.lock ./ 20 | 21 | RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR 22 | 23 | # The runtime image, used to just run the code provided its virtual environment 24 | FROM python:3.11-slim-buster as runtime 25 | 26 | ENV VIRTUAL_ENV=/app/.venv \ 27 | PATH="/app/.venv/bin:$PATH" 28 | 29 | COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} 30 | 31 | COPY ./demo_app ./demo_app 32 | COPY ./.streamlit ./.streamlit 33 | 34 | CMD ["streamlit", "run", "demo_app/main.py", "--server.port", "8080"] 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 DR. AMJAD RAZA 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 📖 EmbedChain-Streamlit-Docker App Template 3 |

4 | 5 | [![A Video Guide](ui.PNG?raw=true)](https://youtu.be/yJAWB13FhYQ) 6 | 7 | [https://youtu.be/yJAWB13FhYQ](https://youtu.be/yJAWB13FhYQ) 8 | 9 | 10 | ## 🔧 Features 11 | 12 | - Basic Skeleton App configured with `openai` API 13 | - A ChatBot using embedchain and Streamlit 14 | - Docker Support with Optimisation Cache etc 15 | - Deployment on Streamlit Public Cloud 16 | 17 | This repo contains an `main.py` file which has a template for a chatbot implementation. 18 | 19 | ## Example Input Data: 20 | 21 | Select the number of Data Sources from slider and enter the details. 22 | 23 | 24 | | Source | URL | 25 | | -------- | ------- | 26 | | youtube | https://www.youtube.com/watch?v=3qHkcs3kG44 | 27 | | pdf_file |https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf | 28 | | web | https://nav.al/feedback | 29 | |qna_pair| "Who is Naval Ravikant?", "Naval Ravikant is an Indian-American entrepreneur and investor." | 30 | 31 | **Question:** What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts? 32 | 33 | 34 | ## Adding your chain 35 | To add your chain, you need to change the `load_chain` function in `main.py`. 36 | Depending on the type of your chain, you may also need to change the inputs/outputs that occur later on. 37 | 38 | 39 | ## 💻 Running Locally 40 | 41 | 1. Clone the repository📂 42 | 43 | ```bash 44 | git clone https://github.com/amjadraza/embedchain-streamlit-app.git 45 | ``` 46 | 47 | 2. Install dependencies with [Poetry](https://python-poetry.org/) and activate virtual environment🔨 48 | 49 | ```bash 50 | poetry install 51 | poetry shell 52 | ``` 53 | 54 | 3. Run the Streamlit server🚀 55 | 56 | ```bash 57 | streamlit run demo_app/main.py 58 | ``` 59 | 60 | Run App using Docker 61 | -------------------- 62 | This project includes `Dockerfile` to run the app in Docker container. In order to optimise the Docker Image 63 | size and building time with cache techniques, I have follow tricks in below Article 64 | https://medium.com/@albertazzir/blazing-fast-python-docker-builds-with-poetry-a78a66f5aed0 65 | 66 | Build the docker container 67 | 68 | ``docker build . -t embedchain-streamlit-app:latest `` 69 | 70 | To generate Image with `DOCKER_BUILDKIT`, follow below command 71 | 72 | ```DOCKER_BUILDKIT=1 docker build --target=runtime . -t embedchain-streamlit-app:latest``` 73 | 74 | 1. Run the docker container directly 75 | 76 | ``docker run -d --name embedchain-streamlit-app -p 8080:8080 embedchain-streamlit-app:latest `` 77 | 78 | 2. Run the docker container using docker-compose (Recommended) 79 | 80 | ``docker-compose up`` 81 | 82 | 83 | Deploy App on Streamlit Public Cloud 84 | ------------------------------------ 85 | This app can be deployed on Streamlit Public Cloud using GitHub. Below is the Link to 86 | Publicly deployed App 87 | 88 | https://embedchain.streamlit.app/ 89 | 90 | 91 | 92 | ## Report Feedbacks 93 | 94 | As `embedchain-streamlit-app:latest` is a template project with minimal example. Report issues if you face any. 95 | 96 | ## DISCLAIMER 97 | 98 | This is a template App, when using with openai_api key, you will be charged a nominal fee depending 99 | on number of prompts etc. -------------------------------------------------------------------------------- /demo_app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amjadraza/embedchain-streamlit-app/5f8f16772c9c33f5ae5d35af95ca11c7abeae4d5/demo_app/__init__.py -------------------------------------------------------------------------------- /demo_app/components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amjadraza/embedchain-streamlit-app/5f8f16772c9c33f5ae5d35af95ca11c7abeae4d5/demo_app/components/__init__.py -------------------------------------------------------------------------------- /demo_app/components/faq.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | import streamlit as st 3 | 4 | 5 | def faq(): 6 | st.markdown( 7 | """ 8 | # FAQ 9 | ## What is embedchain? 10 | embedchain is a framework to digest data from various sources, video, text, pdf etc and 11 | prepares the Chat-Bot 12 | 13 | 14 | ## What Libraries are being use? 15 | Basic Setup is using embedchain, streamlit and openai. 16 | 17 | ## How is the Data Ingestion Works? 18 | Fill the Data Input form with URL of relevant Data and submit. Data is 19 | downloaded, chuncked and embeddings are stored in local db 20 | 21 | ## Bot Response 22 | Having Data Ingestion complete, users can ask the questions, relevant to data. 23 | 24 | ## Disclaimer? 25 | This is a template App, when using with openai_api key, you will be charged a nominal fee depending 26 | on number of prompts etc. 27 | 28 | """ 29 | ) 30 | -------------------------------------------------------------------------------- /demo_app/components/sidebar.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | from dotenv import load_dotenv 4 | 5 | from demo_app.components.faq import faq 6 | 7 | def set_open_api_key(api_key: str): 8 | st.session_state["OPENAI_API_KEY"] = api_key 9 | os.environ["OPENAI_API_KEY"] = api_key 10 | st.session_state["OPENAI_API_CONFIGURED"] = True 11 | print('OPENAI API key is Configured Successfully!') 12 | 13 | 14 | def sidebar(): 15 | load_dotenv() 16 | with st.sidebar: 17 | 18 | try: 19 | api_key = os.getenv("OPENAI_API_KEY") 20 | os.environ["OPENAI_API_KEY"] = api_key 21 | st.session_state["OPENAI_API_CONFIGURED"] = True 22 | st.markdown("Open API Key Configured!") 23 | except: 24 | st.markdown( 25 | "## How to use\n" 26 | "1. Enter your [OpenAI API key](https://platform.openai.com/account/api-keys) below🔑\n" 27 | # noqa: E501 28 | ) 29 | open_api_key_input = st.text_input( 30 | "Openai API Key", 31 | type="password", 32 | placeholder="Paste your API key here (sk-...)", 33 | help="You can get your API key from https://platform.openai.com/account/api-keys.", 34 | # noqa: E501 35 | value=st.session_state.get("OPEN_API_KEY", ""), 36 | ) 37 | if open_api_key_input: 38 | set_open_api_key(open_api_key_input) 39 | 40 | if not st.session_state.get("OPENAI_API_CONFIGURED"): 41 | st.error("Please configure your Open API key!") 42 | else: 43 | st.markdown("Open API Key Configured!") 44 | 45 | st.markdown("---") 46 | st.markdown("# About") 47 | st.markdown( 48 | "📖 This App is template of embedchain-streamlit-app example" 49 | ) 50 | st.markdown("Made by [DR. AMJAD RAZA](https://www.linkedin.com/in/amjadraza/)") 51 | st.markdown("embedchain: https://github.com/embedchain/embedchain") 52 | st.markdown("---") 53 | 54 | st.markdown(" # Example: Demo Data") 55 | st.markdown( 56 | """ 57 | | Source | URL | 58 | | -------- | ------- | 59 | | youtube | https://www.youtube.com/watch?v=3qHkcs3kG44 | 60 | | pdf_file |https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf | 61 | | web | https://nav.al/feedback | 62 | |qna_pair| "Who is Naval Ravikant?", "Naval Ravikant is an Indian-American entrepreneur and investor." | 63 | 64 | Question: What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts? 65 | """ 66 | ) 67 | 68 | faq() 69 | -------------------------------------------------------------------------------- /demo_app/main.py: -------------------------------------------------------------------------------- 1 | """Python file to serve as the frontend""" 2 | import sys 3 | import os 4 | import time 5 | 6 | sys.path.append(os.path.abspath('.')) 7 | 8 | import streamlit as st 9 | from demo_app.components.sidebar import sidebar 10 | 11 | def ingest_data_dynamic(n): 12 | print(f'Number of Data Sources are {n}') 13 | for r in range(n): 14 | url_= st.session_state.get(f"value_{r}") 15 | print(f"Ingestion {r+1}/{n}: {url_}") 16 | naval_chat_bot.add(url_) 17 | 18 | st.session_state["IS_BOT_READY"] = True 19 | 20 | 21 | def response_embedchain(query): 22 | """Logic for loading the chain you want to use should go here.""" 23 | print(f'Calling response on: {query}') 24 | response = naval_chat_bot.query(query) 25 | return response 26 | 27 | def add_data_form(r): 28 | st.session_state[f"url_{r}"] = [st.session_state.get(f"value_{r}")] 29 | print(st.session_state.get(f"{r}")) 30 | 31 | 32 | def add_form_row(row): 33 | # Inputs listed within a form 34 | # loaders_type = ["youtube_video", "pdf_file", "web_page", "qna_pair", "text"] 35 | data_form = st.form(key=f'{row}-Form') 36 | with data_form: 37 | data_columns = st.columns(1) 38 | with data_columns[0]: 39 | st.text_input(f"Enter Doc URL: {row}", 40 | value="https://www.youtube.com/watch?v=3qHkcs3kG44", 41 | key=f"value_{row}") 42 | st.form_submit_button(on_click=add_data_form(row)) 43 | 44 | 45 | def provide_data_dynamic(): 46 | 47 | with st.expander("Sources Data Form", expanded=st.session_state["expander_state"]): 48 | num_data_sources = st.slider('Number of Data Sources', min_value=1, max_value=10) 49 | for r in range(num_data_sources): 50 | add_form_row(r) 51 | submit_data_form = st.button("Submit Data", on_click=toggle_closed) 52 | if submit_data_form: 53 | st.session_state["submit_data_form"] = True 54 | return num_data_sources 55 | 56 | 57 | def toggle_closed(): 58 | st.session_state["expander_state"] = False 59 | 60 | 61 | if __name__ == "__main__": 62 | 63 | st.set_page_config( 64 | page_title="💂‍♂️: EmbedChain Demo", 65 | page_icon="💂‍♂️", 66 | layout="wide", 67 | initial_sidebar_state="expanded", ) 68 | st.header("📖 Private Knowledge Store: EmbedChain Demo") 69 | 70 | sidebar() 71 | 72 | if "expander_state" not in st.session_state: 73 | st.session_state["expander_state"] = True 74 | 75 | # data_dict = provide_data_urls() 76 | num_data_sources = provide_data_dynamic() 77 | 78 | if not st.session_state.get("OPENAI_API_CONFIGURED"): 79 | st.error("Please configure your API Keys!") 80 | 81 | if not st.session_state.get("submit_data_form"): 82 | st.error("Please Submit the Data Form") 83 | 84 | if st.session_state.get("OPENAI_API_CONFIGURED") and st.session_state.get("submit_data_form"): 85 | st.markdown("Main App: Started") 86 | from embedchain import App as ecApp 87 | naval_chat_bot = ecApp() 88 | # ingesting data 89 | if not st.session_state.get("IS_BOT_READY"): 90 | with st.spinner('Wait for DATA Ingestion'): 91 | # ingest_data(data_dict) 92 | ingest_data_dynamic(num_data_sources) 93 | st.success('Data Ingestion Done!') 94 | 95 | if st.session_state.get("IS_BOT_READY"): 96 | 97 | if "messages" not in st.session_state: 98 | st.session_state["messages"] = [ 99 | {"role": "assistant", "content": "How can I help you?"}] 100 | 101 | # Display chat messages from history on app rerun 102 | for message in st.session_state.messages: 103 | with st.chat_message(message["role"]): 104 | st.markdown(message["content"]) 105 | 106 | if user_input := st.chat_input("What is your question?"): 107 | # Add user message to chat history 108 | st.session_state.messages.append({"role": "user", "content": user_input}) 109 | # Display user message in chat message container 110 | with st.chat_message("user"): 111 | st.markdown(user_input) 112 | # Display assistant response in chat message container 113 | with st.chat_message("assistant"): 114 | message_placeholder = st.empty() 115 | full_response = "" 116 | with st.chat_message("assistant"): 117 | message_placeholder = st.empty() 118 | full_response = "" 119 | 120 | with st.spinner('CHAT-BOT is at Work ...'): 121 | assistant_response = response_embedchain(user_input) 122 | # Simulate stream of response with milliseconds delay 123 | for chunk in assistant_response.split(): 124 | full_response += chunk + " " 125 | time.sleep(0.05) 126 | # Add a blinking cursor to simulate typing 127 | message_placeholder.markdown(full_response + "▌") 128 | message_placeholder.markdown(full_response) 129 | st.session_state.messages.append({"role": "assistant", "content": full_response}) 130 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | langchain-chat-app: 4 | image: embedchain-streamlit-app:latest 5 | build: ./app 6 | command: streamlit run demo_app/main.py --server.port 8080 7 | volumes: 8 | - ./demo_app/:/app/demo_app 9 | ports: 10 | - 8080:8080 11 | -------------------------------------------------------------------------------- /poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "embedchain-streamlit-app" 3 | version = "0.1.0" 4 | description = "A template embedchain Streamlit App with Docker" 5 | authors = ["Amjad Raza"] 6 | license = "MIT" 7 | readme = "README.md" 8 | packages = [{include = "embedchain-streamlit-app"}] 9 | 10 | 11 | [tool.poetry.dependencies] 12 | python = "^3.10" 13 | embedchain = "^0.0.52" 14 | streamlit = "^1.26.0" 15 | 16 | 17 | [build-system] 18 | requires = ["poetry-core"] 19 | build-backend = "poetry.core.masonry.api" 20 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | embedchain >= 0.0.11 2 | streamlit >= 1.24.0 3 | -------------------------------------------------------------------------------- /ui.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amjadraza/embedchain-streamlit-app/5f8f16772c9c33f5ae5d35af95ca11c7abeae4d5/ui.PNG --------------------------------------------------------------------------------