├── .gitignore ├── requirements.txt ├── .gitattributes ├── lib └── chat.py ├── README.md └── main.py /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | 3 | __pycache__/ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | foundry-local-sdk 2 | streamlit 3 | PyPDF2 4 | openai -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /lib/chat.py: -------------------------------------------------------------------------------- 1 | from foundry_local import FoundryLocalManager 2 | from openai import OpenAI 3 | 4 | model_alias = "phi-3.5-mini" # Choose any model available in your Foundry Local deployment 5 | 6 | manager = FoundryLocalManager(model_alias) 7 | endpoint = manager.endpoint 8 | api_key = manager.api_key 9 | 10 | client = OpenAI( 11 | api_key=api_key, 12 | base_url=endpoint, 13 | ) 14 | 15 | model_info = manager.get_model_info(model_alias) 16 | model_id = model_info.id if model_info else model_alias 17 | 18 | conversation_history = [] 19 | 20 | def handle_message( 21 | user_message: str, 22 | full_doc_text: str, 23 | ): 24 | 25 | global conversation_history 26 | 27 | # Use full document text (ignore chunks) 28 | context_text = full_doc_text or "" 29 | 30 | cleaned_text = context_text.replace('\n', ' ') 31 | 32 | if len(conversation_history) == 0: 33 | system_prompt = f"""You are a helpful assistant answering questions about a document. Use ONLY the provided document content. If the answer is not present, say: 'Information not found in the provided document.' Do not invent details. Only respond in English (UK). {cleaned_text}""" 34 | conversation_history = [{"role": "system", "content": system_prompt}] 35 | 36 | conversation_history.append({"role": "user", "content": user_message}) 37 | 38 | print("Messages to model:", conversation_history) 39 | 40 | response = client.chat.completions.create( 41 | model=model_id, 42 | messages=conversation_history, 43 | stream=False 44 | ) 45 | assistant_text = response.choices[0].message.content 46 | 47 | # Store assistant 48 | conversation_history.append({ 49 | "role": "assistant", 50 | "content": assistant_text, 51 | }) 52 | 53 | return assistant_text -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Document Chat AI (Local) 2 | 3 | An offline Streamlit app that lets you upload a PDF and chat with its contents using a locally hosted AI model (Foundry Local). The app extracts text from a PDF and sends user messages plus the document content to a local LLM via the Foundry Local manager. 4 | 5 | Credit: Tom Shaw 6 | 7 | ## ⚠️ Important Requirements 8 | 9 | ### Windows AI Foundry Local (required) 10 | This project expects a Foundry Local deployment on your laptop. Install Windows AI Foundry Local before running the app: 11 | 12 | - Source / repo: https://aka.ms/Git/FoundryLocal 13 | 14 | Ensure Foundry Local is running and reachable from the machine where you run the Streamlit app. 15 | 16 | ### Other requirements 17 | - Python 3.8+ 18 | - Streamlit 19 | - PyPDF2 (optional — required for PDF text extraction) 20 | - openai-compatible client used with Foundry Local (this project uses an OpenAI-compatible client) 21 | 22 | Verify Python: 23 | ```powershell 24 | python --version 25 | ``` 26 | 27 | Install PyPDF2 if you want PDF extraction to work: 28 | ```powershell 29 | pip install PyPDF2 30 | ``` 31 | 32 | ## 🚀 Features 33 | 34 | - Upload a PDF and extract text locally 35 | - Chat UI (Streamlit) that sends the full document to a local LLM 36 | - Uses Foundry Local manager to obtain model endpoint and API key 37 | - Keeps conversation history during the Streamlit session 38 | 39 | ## 📋 Prerequisites 40 | 41 | - Foundry Local running on your laptop (see links above) 42 | - Appropriate model deployed to Foundry Local 43 | - Python environment with project dependencies installed 44 | 45 | ## ⚙️ Environment Setup 46 | 47 | Create a Python virtual environment and install dependencies: 48 | ```powershell 49 | python -m venv .venv 50 | .\.venv\Scripts\activate 51 | pip install -r requirements.txt 52 | ``` 53 | 54 | If you don't have a requirements.txt, at minimum install: 55 | ```powershell 56 | pip install streamlit PyPDF2 openai 57 | # plus any client libs required by FoundryLocalManager (see lib/chat.py) 58 | ``` 59 | 60 | ## 🔧 Installation & Run (Windows) 61 | 62 | 1. Clone or copy this repository to your machine: 63 | 64 | 2. Ensure Foundry Local is installed and running (see link above). 65 | 66 | 3. Create and activate a venv, install dependencies (see previous section). 67 | 68 | 4. Run the Streamlit app: 69 | ```powershell 70 | streamlit run main.py 71 | ``` 72 | 73 | 5. Open your browser to the Streamlit URL (typically http://localhost:8501). 74 | 75 | ## 🗂 Project Structure 76 | 77 | - main.py — Streamlit UI, PDF upload, session state 78 | - lib/chat.py — Foundry Local manager usage, message handling, calls local model 79 | - requirements.txt — Python dependencies (if present) 80 | 81 | ## 🛠 Usage 82 | 83 | 1. Start Foundry Local and ensure a model (e.g., phi-3.5-mini) is deployed. 84 | 2. Start the Streamlit app: streamlit run main.py 85 | 3. Upload a PDF in the sidebar. 86 | 4. Ask questions in the chat input — the assistant will use only the provided document content. 87 | 88 | ## 🔍 Troubleshooting 89 | 90 | - "PyPDF2 not installed" — install PyPDF2 to enable PDF extraction. 91 | - Foundry Local connection errors — confirm Foundry Local is running and the endpoint/api key returned by FoundryLocalManager are valid. 92 | - Model not found — verify model alias used in lib/chat.py matches a deployed model in Foundry Local. 93 | - Streamlit issues — check console/terminal for error tracebacks. 94 | 95 | ## Contributing 96 | 97 | - Fork the repo, create a branch, make changes, and submit a PR. 98 | - Keep Foundry Local-related configuration minimal and document any environment settings. 99 | 100 | ## Support & Notes 101 | 102 | - This is a local/demo project intended for use with a local Foundry deployment. 103 | - For Foundry Local installation and troubleshooting consult the official Windows AI Foundry Local docs linked above. 104 | 105 | --- 106 | 107 | ## 📝 License 108 | 109 | This project is licensed under the MIT License. 110 | 111 | --- 112 | 113 | Built with ❤️ by [Tom Shaw](https://tomshaw.dev) -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import uuid 3 | import io 4 | from lib.chat import handle_message, conversation_history 5 | 6 | try: 7 | import PyPDF2 8 | HAS_PYPDF2 = True 9 | except Exception: 10 | HAS_PYPDF2 = False 11 | 12 | st.set_page_config( 13 | page_title="Document Chat AI (Local)", 14 | page_icon="📄", 15 | layout="wide", 16 | initial_sidebar_state="expanded" 17 | ) 18 | 19 | # Session State 20 | if "session_id" not in st.session_state: 21 | st.session_state.session_id = str(uuid.uuid4()) 22 | if "doc_text" not in st.session_state: 23 | st.session_state.doc_text = "" 24 | if "doc_name" not in st.session_state: 25 | st.session_state.doc_name = None 26 | 27 | # Ensure session copy of messages mirrors global conversation_history 28 | if "messages" not in st.session_state: 29 | st.session_state.messages = list(conversation_history) 30 | else: 31 | if len(st.session_state.messages) != len(conversation_history): 32 | st.session_state.messages = list(conversation_history) 33 | 34 | 35 | def extract_pdf_text(file_bytes: bytes) -> str: 36 | if not HAS_PYPDF2: 37 | return "(PyPDF2 not installed)" 38 | reader = PyPDF2.PdfReader(io.BytesIO(file_bytes)) 39 | pages = [] 40 | for page in reader.pages: 41 | try: 42 | pages.append(page.extract_text() or "") 43 | except Exception: 44 | pages.append("") 45 | return "\n".join(pages) 46 | 47 | # Sidebar 48 | 49 | uploaded_file = st.sidebar.file_uploader("Select PDF Document", type=["pdf"]) 50 | 51 | # Process newly uploaded file 52 | if uploaded_file is not None and (st.session_state.doc_name != uploaded_file.name): 53 | file_bytes = uploaded_file.read() 54 | with st.spinner("Extracting text from PDF..."): 55 | st.session_state.doc_text = extract_pdf_text(file_bytes) 56 | st.session_state.doc_name = uploaded_file.name 57 | st.sidebar.success(f"Loaded '{uploaded_file.name}'") 58 | 59 | if st.sidebar.button("Clear Document", type="secondary"): 60 | st.session_state.doc_text = "" 61 | st.session_state.doc_name = None 62 | st.sidebar.info("Document cleared.") 63 | 64 | st.sidebar.markdown("---") 65 | st.sidebar.markdown(f"**Session ID:** `{st.session_state.session_id[:8]}...`") 66 | st.sidebar.markdown(f"**Messages:** {len(st.session_state.messages)}") 67 | if st.session_state.doc_name: 68 | st.sidebar.markdown(f"**Document:** {st.session_state.doc_name}") 69 | 70 | if st.sidebar.button("Clear Chat", type="secondary"): 71 | conversation_history.clear() 72 | st.session_state.messages = [] 73 | st.rerun() 74 | 75 | # Main 76 | st.title("📄 Document Chat AI (Local Mode)") 77 | if st.session_state.doc_name: 78 | st.caption(f"Chatting about: {st.session_state.doc_name}") 79 | else: 80 | st.caption("Upload a PDF to enable grounded chat.") 81 | 82 | chat_container = st.container() 83 | with chat_container: 84 | for idx, message in enumerate(st.session_state.messages): 85 | # hide the message in position 0 86 | if idx == 0: 87 | continue 88 | with st.chat_message(message["role"]): 89 | st.markdown(message["content"]) 90 | 91 | # Only show chat input when a document is loaded 92 | prompt = None 93 | if not st.session_state.doc_name: 94 | st.info("Please upload a PDF in the sidebar to start chatting.") 95 | prompt = st.chat_input("Ask (no document context will be used)...") 96 | else: 97 | prompt = st.chat_input("Ask something about the document...") 98 | 99 | if prompt: 100 | 101 | # Add the user message immediately 102 | 103 | with st.chat_message("user"): 104 | st.markdown(prompt) 105 | 106 | with st.chat_message("assistant"): 107 | with st.spinner("Generating response..."): 108 | # Ignore retrieval; we pass full document now 109 | response = handle_message( 110 | user_message=prompt, 111 | full_doc_text=st.session_state.doc_text, 112 | ) 113 | st.markdown(response) 114 | # Add the response to the conversation history 115 | st.session_state.messages = list(conversation_history) 116 | st.session_state.messages.append({ 117 | "role": "assistant", 118 | "content": response 119 | }) 120 | st.rerun() 121 | --------------------------------------------------------------------------------