├── llama-logo.png ├── Dockerfile ├── .env_template ├── utils.py ├── requirements.txt ├── .devcontainer └── devcontainer.json ├── .gitignore ├── README.md └── llama2_chatbot.py /llama-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a16z-infra/llama2-chatbot/HEAD/llama-logo.png -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | WORKDIR /app 3 | COPY . /app 4 | RUN pip install -r requirements.txt 5 | EXPOSE 80 6 | CMD streamlit run --server.port 8080 --server.address 0.0.0.0 llama2_chatbot.py 7 | -------------------------------------------------------------------------------- /.env_template: -------------------------------------------------------------------------------- 1 | REPLICATE_API_TOKEN=update_your_own 2 | REPLICATE_MODEL_ENDPOINT7B=a16z-infra/llama7b-v2-chat:4f0a4744c7295c024a1de15e1a63c880d3da035fa1f49bfd344fe076074c8eea 3 | REPLICATE_MODEL_ENDPOINT13B=a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5 4 | REPLICATE_MODEL_ENDPOINT70B=replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48 5 | AUTH0_CLIENTID=update_your_own 6 | AUTH0_DOMAIN=update_your_own 7 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import replicate 2 | import time 3 | 4 | # Initialize debounce variables 5 | last_call_time = 0 6 | debounce_interval = 2 # Set the debounce interval (in seconds) to your desired value 7 | 8 | def debounce_replicate_run(llm, prompt, max_len, temperature, top_p, API_TOKEN): 9 | global last_call_time 10 | print("last call time: ", last_call_time) 11 | 12 | # Get the current time 13 | current_time = time.time() 14 | 15 | # Calculate the time elapsed since the last call 16 | elapsed_time = current_time - last_call_time 17 | 18 | # Check if the elapsed time is less than the debounce interval 19 | if elapsed_time < debounce_interval: 20 | print("Debouncing") 21 | return "Hello! You are sending requests too fast. Please wait a few seconds before sending another request." 22 | 23 | 24 | # Update the last call time to the current time 25 | last_call_time = time.time() 26 | 27 | output = replicate.run(llm, input={"prompt": prompt + "Assistant: ", "max_length": max_len, "temperature": temperature, "top_p": top_p, "repetition_penalty": 1}, api_token=API_TOKEN) 28 | return output 29 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiodns==3.0.0 2 | aiohttp==3.8.5 3 | aiosignal==1.3.1 4 | altair==5.0.1 5 | async-timeout==4.0.2 6 | attrs==23.1.0 7 | blinker==1.6.2 8 | Brotli==1.0.9 9 | cachetools==5.3.1 10 | cchardet==2.1.7 11 | certifi==2023.5.7 12 | cffi==1.15.1 13 | charset-normalizer==3.2.0 14 | click==8.1.5 15 | decorator==5.1.1 16 | ecdsa==0.18.0 17 | frozenlist==1.4.0 18 | gitdb==4.0.10 19 | GitPython==3.1.32 20 | idna==3.4 21 | importlib-metadata==6.8.0 22 | Jinja2==3.1.2 23 | jsonschema==4.18.3 24 | jsonschema-specifications==2023.6.1 25 | markdown-it-py==3.0.0 26 | MarkupSafe==2.1.3 27 | mdurl==0.1.2 28 | multidict==6.0.4 29 | numpy==1.25.1 30 | packaging==23.1 31 | pandas==2.0.3 32 | Pillow==9.5.0 33 | protobuf==4.23.4 34 | pyarrow==12.0.1 35 | pyasn1==0.5.0 36 | pycares==4.3.0 37 | pycparser==2.21 38 | pydantic==1.10.11 39 | pydeck==0.8.1b0 40 | Pygments==2.15.1 41 | Pympler==1.0.1 42 | python-dateutil==2.8.2 43 | python-dotenv==1.0.0 44 | python-jose==3.3.0 45 | pytz==2023.3 46 | pytz-deprecation-shim==0.1.0.post0 47 | referencing==0.29.1 48 | replicate==0.8.4 49 | requests==2.31.0 50 | rich==13.4.2 51 | rpds-py==0.8.10 52 | rsa==4.9 53 | six==1.16.0 54 | smmap==5.0.0 55 | streamlit==1.24.1 56 | streamlit-auth0-component==0.1.5 57 | streamlit-chat==0.1.1 58 | tenacity==8.2.2 59 | toml==0.10.2 60 | toolz==0.12.0 61 | tornado==6.3.2 62 | typing_extensions==4.7.1 63 | tzdata==2023.3 64 | tzlocal==4.3.1 65 | urllib3==2.0.3 66 | validators==0.20.0 67 | yarl==1.9.2 68 | zipp==3.16.2 69 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.209.6/containers/python-3 3 | { 4 | "image": "mcr.microsoft.com/devcontainers/python:3.11-bullseye", 5 | "customizations": { 6 | "codespaces": { 7 | "openFiles": [ 8 | "README.md", 9 | "llama2_chatbot.py" 10 | ] 11 | }, 12 | "vscode": { 13 | "settings": {}, 14 | "extensions": [ 15 | "ms-python.python", 16 | "ms-python.vscode-pylance" 17 | ] 18 | } 19 | }, 20 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 21 | "forwardPorts": [ 22 | 8501 23 | ], 24 | // Use 'postCreateCommand' to run commands after the container is created. 25 | // Install app dependencies. 26 | "postCreateCommand": "pip3 install --user -r requirements.txt", 27 | // Use 'postAttachCommand' to run commands after a tool has attached to the container. 28 | // Start the app. 29 | "postAttachCommand": { 30 | "server": "streamlit run llama2_chatbot.py --server.enableCORS false --server.enableXsrfProtection false" 31 | }, 32 | "portsAttributes": { 33 | "8501": { 34 | "label": "Application", 35 | "onAutoForward": "openPreview" 36 | } 37 | }, 38 | // Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. 39 | "remoteUser": "vscode", 40 | "features": { 41 | // Optional features for development - increase container boot time! 42 | // "ghcr.io/devcontainers-contrib/features/coverage-py:2": {}, 43 | // "git": "latest", 44 | // "github-cli": "latest" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | # private 163 | fly.toml -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLaMA 2 Chatbot App ⚡ 2 | 3 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/a16z-infra/llama2-chatbot?quickstart=1) 4 | 5 | ## 🤔 What is this? 6 | 7 | This is an experimental Streamlit chatbot app built for LLaMA2 (or any other LLM). The app includes session chat history and provides an option to select multiple LLaMA2 API endpoints on Replicate. 8 | 9 | Live demo: [LLaMA2.ai](https://llama2.ai/) 10 | 11 | For the LLaMA2 license agreement, please check the Meta Platforms, Inc official license documentation on their website. 12 | [More info.](https://ai.meta.com/llama/) 13 | 14 | llama2 demo 15 | 16 | ## Features 17 | 18 | - Chat history is maintained for each session (if you refresh, chat history clears) 19 | - Option to select between different LLaMA2 chat API endpoints (7B, 13B or 70B). The default is 70B. 20 | - Configure model hyperparameters from the sidebar (Temperature, Top P, Max Sequence Length). 21 | - Includes "User:" and "Assistant:" prompts for the chat conversation. 22 | - Each model (7B, 13B & 70B) runs on Replicate - (7B and 13B run on one A100 40Gb, and 70B runs on one A100 80Gb). 23 | - Docker image included to deploy this app in Fly.io 24 | 25 | ## Installation 26 | 27 | - Clone the repository 28 | - [Optional] Create a virtual python environment with the command `python -m venv .venv` and activate it with `source .venv/bin/activate` 29 | - Install dependencies with `pip install -r requirements.txt` 30 | - Create an account on [Replicate](https://replicate.com/) 31 | - Create an account on [Auth0 (free)](https://auth0.com/) and configure your application 32 | - Create a Single Page Application 33 | - Navigate to the Settings tab for that application 34 | - If you are running the app locally: set Allowed Web Origins to `http://localhost:8501` and set Allowed Callback URLs to `http://localhost:8501/component/auth0_component.login_button/index.html` 35 | - To run on a remote server: set Allowed Web Origins to `https://` and set Allowed Callback URLs to `http:///component/auth0_component.login_button/index.html` 36 | - Copy Client ID and Domain to use in the next step 37 | - Make your own `.env` file with the command `cp .env_template .env`. Then edit the `.env` file and add your: 38 | - [Replicate API token](https://replicate.com/account) as `REPLICATE_API_TOKEN` 39 | - [Auth0 Client ID](https://auth0.com/docs/get-started/applications/application-settings) as `AUTH0_CLIENTID` 40 | - [Auth0 Domain](https://auth0.com/docs/get-started/applications/application-settings) as `AUTH0_DOMAIN` 41 | - For your convenience, we include common model endpoints already in the `.env_template` file 42 | - Run the app with `streamlit run llama2_chatbot.py` 43 | - Dockerfile included to [deploy this app](#deploying-on-flyio) in Fly.io 44 | 45 | (Note: if you are using a Mac, you may need to use the command `python3` instead of `python` and `pip3` instead of `pip`) 46 | 47 | ## Usage 48 | 49 | - Start the chatbot by selecting an API endpoint from the sidebar. 50 | - Configure model hyperparameters from the sidebar. 51 | - Type your question in the input field at the bottom of the app and press enter. 52 | 53 | ## Deploying on fly.io 54 | 1. First you should [install flyctl](https://fly.io/docs/hands-on/install-flyctl/) and login from command line 55 | 2. `fly launch` -> this will generate a fly.toml for you automatically 56 | 3. `fly deploy --dockerfile Dockerfile` --> this will automatically package up the repo and deploy it on fly. If you have a free account, you can use `--ha=false` flag to only spin up one instance 57 | 4. Go to your deployed fly app dashboard, click on `Secrets` from the left hand side nav, and click on `Use the Web CLI to manage your secrets without leaving your browser`. Once you are on your app's web CLI, export all secrets needed. i.e `export REPLICATE_API_TOKEN=your_replicate_token`. Refer to .env.example file for necessary secrets. 58 | 59 | ## Authors 60 | 61 | - Marco Mascorro - [@mascobot](https://twitter.com/Mascobot) 62 | - Yoko Li - [@stuffyokodraws](https://twitter.com/stuffyokodraws) 63 | - Rajko Radovanović - [@rajko_rad](https://twitter.com/rajko_rad) 64 | - Matt Bornstein - [@BornsteinMatt](https://twitter.com/BornsteinMatt) 65 | - Guido Appenzeller - [@appenz](https://twitter.com/appenz) 66 | 67 | ## Version 68 | 69 | 0.9.0 (Experimental) - July 2023 70 | 71 | ## Contributing 72 | 73 | This project is under development. Contributions are welcome! 74 | 75 | ## License 76 | 77 | - Web chatbot license (this repo): Apache 2.0 78 | - For the LLaMA models license, please refer to the License Agreement from Meta Platforms, Inc. 79 | 80 | ## Acknowledgements 81 | 82 | - Special thanks to the team at Meta AI, Replicate, a16z-infra and the entire open-source community. 83 | 84 | ## Disclaimer 85 | 86 | This is an experimental version of the app. Use at your own risk. While the app has been tested, the authors hold no liability for any kind of losses arising out of using this application. 87 | 88 | ## UI Configuration 89 | 90 | The app has been styled and configured for a cleaner look. Main menu and footer visibility have been hidden. Feel free to modify this to your custom application. 91 | 92 | ## Resources 93 | 94 | - [Streamlit Cheat Sheet](https://docs.streamlit.io/library/cheatsheet) 95 | - [GitHub to deploy LLaMA2 on Replicate](https://github.com/a16z-infra/cog-llama-template) 96 | -------------------------------------------------------------------------------- /llama2_chatbot.py: -------------------------------------------------------------------------------- 1 | """ 2 | LLaMA 2 Chatbot app 3 | ====================== 4 | 5 | This is a Streamlit chatbot app with LLaMA2 that includes session chat history and an option to select multiple LLM 6 | API endpoints on Replicate. The 7B and 13B models run on Replicate on one A100 40Gb. The 70B runs in one A100 80Gb. The weights have been tensorized. 7 | 8 | Author: Marco Mascorro (@mascobot.com) 9 | Created: July 2023 10 | Version: 0.9.0 (Experimental) 11 | Status: Development 12 | Python version: 3.9.15 13 | a16z-infra 14 | """ 15 | #External libraries: 16 | import streamlit as st 17 | import replicate 18 | from dotenv import load_dotenv 19 | load_dotenv() 20 | import os 21 | from utils import debounce_replicate_run 22 | from auth0_component import login_button 23 | 24 | ###Global variables:### 25 | REPLICATE_API_TOKEN = os.environ.get('REPLICATE_API_TOKEN', default='') 26 | #Your your (Replicate) models' endpoints: 27 | REPLICATE_MODEL_ENDPOINT7B = os.environ.get('REPLICATE_MODEL_ENDPOINT7B', default='') 28 | REPLICATE_MODEL_ENDPOINT13B = os.environ.get('REPLICATE_MODEL_ENDPOINT13B', default='') 29 | REPLICATE_MODEL_ENDPOINT70B = os.environ.get('REPLICATE_MODEL_ENDPOINT70B', default='') 30 | PRE_PROMPT = "You are a helpful assistant. You do not respond as 'User' or pretend to be 'User'. You only respond once as Assistant." 31 | #Auth0 for auth 32 | AUTH0_CLIENTID = os.environ.get('AUTH0_CLIENTID', default='') 33 | AUTH0_DOMAIN = os.environ.get('AUTH0_DOMAIN', default='') 34 | 35 | if not (REPLICATE_API_TOKEN and REPLICATE_MODEL_ENDPOINT13B and REPLICATE_MODEL_ENDPOINT7B and 36 | AUTH0_CLIENTID and AUTH0_DOMAIN): 37 | st.warning("Add a `.env` file to your app directory with the keys specified in `.env_template` to continue.") 38 | st.stop() 39 | 40 | ###Initial UI configuration:### 41 | st.set_page_config(page_title="LLaMA2 Chatbot by a16z-infra", page_icon="🦙", layout="wide") 42 | 43 | def render_app(): 44 | 45 | # reduce font sizes for input text boxes 46 | custom_css = """ 47 | 51 | """ 52 | st.markdown(custom_css, unsafe_allow_html=True) 53 | 54 | #Left sidebar menu 55 | st.sidebar.header("LLaMA2 Chatbot") 56 | 57 | #Set config for a cleaner menu, footer & background: 58 | hide_streamlit_style = """ 59 | 63 | """ 64 | st.markdown(hide_streamlit_style, unsafe_allow_html=True) 65 | 66 | #container for the chat history 67 | response_container = st.container() 68 | #container for the user's text input 69 | container = st.container() 70 | #Set up/Initialize Session State variables: 71 | if 'chat_dialogue' not in st.session_state: 72 | st.session_state['chat_dialogue'] = [] 73 | if 'llm' not in st.session_state: 74 | #st.session_state['llm'] = REPLICATE_MODEL_ENDPOINT13B 75 | st.session_state['llm'] = REPLICATE_MODEL_ENDPOINT70B 76 | if 'temperature' not in st.session_state: 77 | st.session_state['temperature'] = 0.1 78 | if 'top_p' not in st.session_state: 79 | st.session_state['top_p'] = 0.9 80 | if 'max_seq_len' not in st.session_state: 81 | st.session_state['max_seq_len'] = 512 82 | if 'pre_prompt' not in st.session_state: 83 | st.session_state['pre_prompt'] = PRE_PROMPT 84 | if 'string_dialogue' not in st.session_state: 85 | st.session_state['string_dialogue'] = '' 86 | 87 | #Dropdown menu to select the model edpoint: 88 | selected_option = st.sidebar.selectbox('Choose a LLaMA2 model:', ['LLaMA2-70B', 'LLaMA2-13B', 'LLaMA2-7B'], key='model') 89 | if selected_option == 'LLaMA2-7B': 90 | st.session_state['llm'] = REPLICATE_MODEL_ENDPOINT7B 91 | elif selected_option == 'LLaMA2-13B': 92 | st.session_state['llm'] = REPLICATE_MODEL_ENDPOINT13B 93 | else: 94 | st.session_state['llm'] = REPLICATE_MODEL_ENDPOINT70B 95 | #Model hyper parameters: 96 | st.session_state['temperature'] = st.sidebar.slider('Temperature:', min_value=0.01, max_value=5.0, value=0.1, step=0.01) 97 | st.session_state['top_p'] = st.sidebar.slider('Top P:', min_value=0.01, max_value=1.0, value=0.9, step=0.01) 98 | st.session_state['max_seq_len'] = st.sidebar.slider('Max Sequence Length:', min_value=64, max_value=4096, value=2048, step=8) 99 | 100 | NEW_P = st.sidebar.text_area('Prompt before the chat starts. Edit here if desired:', PRE_PROMPT, height=60) 101 | if NEW_P != PRE_PROMPT and NEW_P != "" and NEW_P != None: 102 | st.session_state['pre_prompt'] = NEW_P + "\n\n" 103 | else: 104 | st.session_state['pre_prompt'] = PRE_PROMPT 105 | 106 | btn_col1, btn_col2 = st.sidebar.columns(2) 107 | 108 | # Add the "Clear Chat History" button to the sidebar 109 | def clear_history(): 110 | st.session_state['chat_dialogue'] = [] 111 | clear_chat_history_button = btn_col1.button("Clear History", 112 | use_container_width=True, 113 | on_click=clear_history) 114 | 115 | # add logout button 116 | def logout(): 117 | del st.session_state['user_info'] 118 | logout_button = btn_col2.button("Logout", 119 | use_container_width=True, 120 | on_click=logout) 121 | 122 | # add links to relevant resources for users to select 123 | st.sidebar.write(" ") 124 | 125 | text1 = 'Chatbot Demo Code' 126 | text2 = 'LLaMA2 70B Model on Replicate' 127 | text3 = 'LLaMa2 Cog Template' 128 | 129 | text1_link = "https://github.com/a16z-infra/llama2-chatbot" 130 | text2_link = "https://replicate.com/replicate/llama70b-v2-chat" 131 | text3_link = "https://github.com/a16z-infra/cog-llama-template" 132 | 133 | logo1 = 'https://storage.googleapis.com/llama2_release/a16z_logo.png' 134 | logo2 = 'https://storage.googleapis.com/llama2_release/Screen%20Shot%202023-07-21%20at%2012.34.05%20PM.png' 135 | 136 | st.sidebar.markdown( 137 | "**Resources** \n" 138 | f" [{text2}]({text2_link}) \n" 139 | f" [{text1}]({text1_link}) \n" 140 | f" [{text3}]({text3_link})", 141 | unsafe_allow_html=True) 142 | 143 | st.sidebar.write(" ") 144 | st.sidebar.markdown("*Made with ❤️ by a16z Infra and Replicate. Not associated with Meta Platforms, Inc.*") 145 | 146 | # Display chat messages from history on app rerun 147 | for message in st.session_state.chat_dialogue: 148 | with st.chat_message(message["role"]): 149 | st.markdown(message["content"]) 150 | 151 | # Accept user input 152 | if prompt := st.chat_input("Type your question here to talk to LLaMA2"): 153 | # Add user message to chat history 154 | st.session_state.chat_dialogue.append({"role": "user", "content": prompt}) 155 | # Display user message in chat message container 156 | with st.chat_message("user"): 157 | st.markdown(prompt) 158 | 159 | with st.chat_message("assistant"): 160 | message_placeholder = st.empty() 161 | full_response = "" 162 | string_dialogue = st.session_state['pre_prompt'] 163 | for dict_message in st.session_state.chat_dialogue: 164 | if dict_message["role"] == "user": 165 | string_dialogue = string_dialogue + "User: " + dict_message["content"] + "\n\n" 166 | else: 167 | string_dialogue = string_dialogue + "Assistant: " + dict_message["content"] + "\n\n" 168 | print (string_dialogue) 169 | output = debounce_replicate_run(st.session_state['llm'], string_dialogue + "Assistant: ", st.session_state['max_seq_len'], st.session_state['temperature'], st.session_state['top_p'], REPLICATE_API_TOKEN) 170 | for item in output: 171 | full_response += item 172 | message_placeholder.markdown(full_response + "▌") 173 | message_placeholder.markdown(full_response) 174 | # Add assistant response to chat history 175 | st.session_state.chat_dialogue.append({"role": "assistant", "content": full_response}) 176 | 177 | 178 | if 'user_info' in st.session_state: 179 | # if user_info: 180 | render_app() 181 | else: 182 | st.write("Please login to use the app. This is just to prevent abuse, we're not charging for usage.") 183 | st.session_state['user_info'] = login_button(AUTH0_CLIENTID, domain = AUTH0_DOMAIN) 184 | --------------------------------------------------------------------------------