├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── requirements.txt ├── sample.txt └── streamlit_editor.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .streamlit/ 163 | .hide/ 164 | *.db 165 | *.bin 166 | *.pickle 167 | *.sqlite3 168 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Kevin Chin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLM-Powered Document Editor: DSPy & LangChain Integration for Intelligent Writing (OpenRouter/OpenAI/Deepseek/Gemini/Github/Ollama) 2 | 3 | [](https://x.com/firstoryapp) 4 | [](https://doc-editor.streamlit.app) 5 | 6 | **An intelligent writing assistant with multi-LLM integration for enhanced content creation and editing.** 7 | 8 | [](https://github.com/user-attachments/assets/f37c4dd5-423c-4406-a08d-51f67942ac7b) 9 | 10 | Leverage DSPy's LLM orchestration and LangChain's document processing to create, refine, and manage content with unprecedented efficiency. Ideal for technical writers, content creators, and knowledge workers seeking intelligent document editing. 11 | 12 | ## 📚 Table of Contents 13 | - [LLM-Powered Document Editor: DSPy \& LangChain Integration for Intelligent Writing (OpenRouter/OpenAI/Deepseek/Gemini/Github/Ollama)](#llm-powered-document-editor-dspy--langchain-integration-for-intelligent-writing-openrouteropenaideepseekgeminigithubollama) 14 | - [📚 Table of Contents](#-table-of-contents) 15 | - [🚀 Quick Start](#-quick-start) 16 | - [✨ Intelligent Document Workflows](#-intelligent-document-workflows) 17 | - [1. Content Creation Phase](#1-content-creation-phase) 18 | - [2. AI Collaboration Phase](#2-ai-collaboration-phase) 19 | - [3. Finalization \& Management](#3-finalization--management) 20 | - [⚙️ System Architecture](#️-system-architecture) 21 | - [🔧 Technical Stack](#-technical-stack) 22 | - [📄 License](#-license) 23 | 24 | ## 🚀 Quick Start 25 | 26 | Try the live demo immediately: 27 | [](https://doc-editor.streamlit.app) 28 | 29 | 1. Clone repository: 30 | ``` 31 | git clone https://github.com/clchinkc/streamlit-editor.git 32 | python -m venv venv 33 | source venv/bin/activate # Unix/MacOS 34 | # .\venv\Scripts\activate # Windows 35 | ``` 36 | 37 | 2. Install dependencies: 38 | ``` 39 | pip install -r requirements.txt 40 | ``` 41 | 42 | 3. Configure Streamlit secrets: 43 | ```bash 44 | mkdir -p .streamlit 45 | touch .streamlit/secrets.toml 46 | ``` 47 | 48 | Add the following to `.streamlit/secrets.toml`: 49 | ```toml 50 | # API Keys (at least one required) 51 | [openrouter] 52 | OPENROUTER_API_KEY = "your_openrouter_api_key" 53 | OPENROUTER_MODEL = "your_openrouter_model" 54 | 55 | [openai] 56 | OPENAI_API_KEY = "your_openai_api_key" 57 | 58 | [deepseek] 59 | DEEPSEEK_API_KEY = "your_deepseek_api_key" 60 | 61 | [gemini] 62 | GEMINI_API_KEY = "your_gemini_api_key" 63 | 64 | [github] 65 | GITHUB_TOKEN = "your_github_token" 66 | 67 | [ollama] 68 | OLLAMA_MODEL = "your_ollama_model" 69 | ``` 70 | 71 | 4. (If you want to use Ollama) Setup Ollama: 72 | 73 | First, install [Ollama](https://ollama.com/download). 74 | 75 | Then start Ollama server with the specified model: 76 | ``` 77 | ollama run your_ollama_model 78 | ``` 79 | 80 | 5. Launch application: 81 | ``` 82 | streamlit run streamlit_editor.py 83 | ``` 84 | And the app will be running on http://localhost:8501. 85 | 86 | ## ✨ Intelligent Document Workflows 87 | 88 | **Combined Features & User Processes** 89 | 90 | ### 1. Content Creation Phase 91 | - **Multi-format Editing Suite** 92 | - ✍️ Dual-mode editor (Editor + Markdown Preview) 93 | - 📥 File ingestion: Drag-and-drop `.md`/`.txt` support 94 | - 📤 Export flexibility: Download markdown or clipboard copy 95 | 96 | - **Structural Tools** 97 | - 🗂️ LangChain-powered document chunking 98 | - 📚 Section-level editing 99 | 100 | ### 2. AI Collaboration Phase 101 | - **Context-Aware Assistance** 102 | - 🤖 DSPy-powered feedback suggestions (general or specific to reference text) 103 | - 📑 Automated section summarization 104 | - 🧩 LLM-driven content regeneration 105 | - 📝 Review and compare AI-generated changes 106 | 107 | - **Quality Control** 108 | - 🔍 Semantic feedback tracking with source references 109 | - 📊 Real-time feedback dashboard 110 | - ✅ Accept/reject AI suggestions with diff view 111 | 112 | ### 3. Finalization & Management 113 | - **Output Optimization** 114 | - 🧮 Batch operation processing for bulk edits 115 | 116 | - **Advanced Orchestration** 117 | - 🚦 DSPy-managed suggestion pipeline 118 | - 📜 Version history tracking 119 | - 🌐 Multi-modal previews (raw + rendered views) 120 | - 📄 Pagination for summaries 121 | 122 | ## ⚙️ System Architecture 123 | 124 | [](https://mermaid.live/edit#pako:eNqdVV1r2zAU_SvCpWODmGUrpKkfBknsjMEKZW4oDL8o9o0tYusaSW4a2v73XVv-iLvuYdGTPu45urrnSHp2YkzA8Zxdjoc448qwez-SjNrlJbvlQrIVFiVKkEbb-Y0G9TFydtzbcbeiAXtQwoCKnE82IEiEwSEEaMhCo4AXOfU2P_rAB1R7XfIY-tiEG77lGoYlQnIDPcQP74599FbV-dVT7E5hDFrjkMVPLtNVRgF9eC7kfphmYUnpnOa9Bki2PN73gBiLoj53v8LCozZQ9IiwKgquhoR2IgeX56ZbYIFMhRyyD55KVGY4LR5kjjxp59ktJlVuo3sJViglxEagPCk_c91vLx0LVwoPrhJpZqwS-nOtgI6cl1aKU1lGUM0fgS0qg01PM4M1qK_9G5XGUPIKsHvaNgXV7NXo8GEo8BsZRui4Mrb-tCc8GfYoeE3Rlm2QegQqeCpi9h0kKLJEs2cnzFjAEaqSCbJNmXSQN6frlBplh-WRfIfqXcQ_6nGUMfsFO8Jk8Fft38c0dgGrvbWA7iphhyc-CM2R_JuygzAZOxBdfWFdXUIsdlSVGHNUrUM0hUInN-2Rexc30yC4mU60UbgH7-Lq6qrtuweRmMz7Wj5NGgrvYtq0U6YheUs2v14FwfJMskZVy7NeL2erL2fyDL7qyBbT68WZZL1xLJfvL6a-fyZXZ6g2rWkwm6_OpGqfhVbC-Xp5Mz-TqXk0LE8wC2br_ymUM3EKUAUXCX0TzzVr5JgMCnqoPOomXO0jJ5KvFMfpKQnpIjieURVMHIVVmjlk9VzTqGouoS94qnjRz5Zc_kYcxtD49tb-Ss3n9PoHw9cvYQ) 125 | 126 | ## 🔧 Technical Stack 127 | 128 | | Component | Technology | Purpose | 129 | |----------------|-------------------|----------------------------| 130 | | AI Framework | DSPy | LLM operations management | 131 | | Text Processing| LangChain | Document chunking | 132 | | UI Framework | Streamlit | Web interface | 133 | | Visualization | Streamlit Mermaid| Document flow diagrams | 134 | 135 | ## 📄 License 136 | 137 | MIT Licensed - See [LICENSE](LICENSE) for details. 138 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohappyeyeballs==2.4.4 2 | aiohttp==3.11.11 3 | aiosignal==1.3.2 4 | alembic==1.14.1 5 | altair 6 | annotated-types==0.7.0 7 | anyio==4.8.0 8 | APScheduler==3.11.0 9 | asyncer==0.0.8 10 | attrs==24.3.0 11 | backoff==2.2.1 12 | blinker==1.9.0 13 | cachetools==5.5.1 14 | certifi==2024.12.14 15 | cffi==1.17.1 16 | charset-normalizer==3.4.1 17 | click==8.1.8 18 | cloudpickle==3.1.1 19 | colorlog==6.9.0 20 | cryptography==42.0.8 21 | datasets==3.2.0 22 | dill==0.3.8 23 | diskcache==5.6.3 24 | distro==1.9.0 25 | dnspython==2.7.0 26 | dspy==2.5.43 27 | dspy-ai==2.5.43 28 | email_validator==2.2.0 29 | entrypoints==0.4 30 | fastapi==0.111.1 31 | fastapi-cli==0.0.7 32 | fastapi-sso==0.10.0 33 | filelock==3.17.0 34 | frozenlist==1.5.0 35 | fsspec==2024.9.0 36 | gitdb==4.0.12 37 | GitPython==3.1.44 38 | google-api-core==2.24.1 39 | google-auth==2.38.0 40 | google-cloud-core==2.4.1 41 | google-cloud-firestore==2.20.0 42 | googleapis-common-protos==1.66.0 43 | grpcio==1.70.0 44 | grpcio-status==1.70.0 45 | gunicorn==22.0.0 46 | h11==0.14.0 47 | httpcore==1.0.7 48 | httptools==0.6.4 49 | httpx==0.27.2 50 | huggingface-hub==0.27.1 51 | idna==3.10 52 | importlib_metadata==8.6.1 53 | Jinja2==3.1.5 54 | jiter==0.8.2 55 | joblib==1.4.2 56 | json_repair==0.35.0 57 | jsonpatch==1.33 58 | jsonpointer==3.0.0 59 | jsonschema==4.23.0 60 | jsonschema-specifications==2024.10.1 61 | langchain-core==0.3.31 62 | langchain-text-splitters==0.3.5 63 | langsmith==0.3.1 64 | litellm==1.53.7 65 | magicattr==0.1.6 66 | Mako==1.3.8 67 | markdown-it-py==3.0.0 68 | MarkupSafe==3.0.2 69 | mdurl==0.1.2 70 | multidict==6.1.0 71 | multiprocess==0.70.16 72 | narwhals==1.23.0 73 | numpy==2.2.2 74 | oauthlib==3.2.2 75 | openai==1.60.1 76 | optuna==4.2.0 77 | orjson==3.10.15 78 | packaging==24.2 79 | pandas==2.2.3 80 | pillow==11.1.0 81 | propcache==0.2.1 82 | proto-plus==1.26.0 83 | protobuf==5.29.3 84 | pyarrow==19.0.0 85 | pyasn1==0.6.1 86 | pyasn1_modules==0.4.1 87 | pycparser==2.22 88 | pydantic==2.10.6 89 | pydantic_core==2.27.2 90 | pydeck==0.9.1 91 | Pygments==2.19.1 92 | PyJWT==2.10.1 93 | PyNaCl==1.5.0 94 | python-dateutil==2.9.0.post0 95 | python-dotenv==1.0.1 96 | python-multipart==0.0.9 97 | pytz==2024.2 98 | PyYAML==6.0.2 99 | ratelimit==2.2.1 100 | redis==5.2.1 101 | referencing==0.36.1 102 | regex==2024.11.6 103 | requests==2.32.3 104 | requests-toolbelt==1.0.0 105 | rich==13.9.4 106 | rich-toolkit==0.13.2 107 | rpds-py==0.22.3 108 | rq==2.1.0 109 | rsa==4.9 110 | setuptools==75.8.0 111 | shellingham==1.5.4 112 | six==1.17.0 113 | smmap==5.0.2 114 | sniffio==1.3.1 115 | SQLAlchemy==2.0.37 116 | starlette==0.37.2 117 | streamlit==1.41.1 118 | streamlit-analytics 119 | streamlit-mermaid 120 | streamlit-quill==0.0.3 121 | streamlit_analytics2 122 | tenacity==9.0.0 123 | tiktoken==0.8.0 124 | tokenizers==0.21.0 125 | toml==0.10.2 126 | toolz==1.0.0 127 | tornado==6.4.2 128 | tqdm==4.67.1 129 | typer==0.15.1 130 | typing_extensions==4.12.2 131 | tzdata==2025.1 132 | tzlocal==5.2 133 | ujson==5.10.0 134 | urllib3==2.3.0 135 | uvicorn==0.22.0 136 | uvloop==0.21.0 137 | watchdog==6.0.0 138 | watchfiles==1.0.4 139 | websockets==14.2 140 | xxhash==3.5.0 141 | yarl==1.18.3 142 | zipp==3.21.0 143 | zstandard==0.23.0 144 | -------------------------------------------------------------------------------- /sample.txt: -------------------------------------------------------------------------------- 1 | # The Whispering Forest 2 | 3 | Once upon a time in the small village of Eldoria, nestled between rolling hills and dense woods, there lived a curious young girl named Luna. Luna had always been fascinated by the mysteries that lay beyond the village boundaries, especially the ancient forest known as the Whispering Woods. 4 | 5 | Every evening, as the sun dipped below the horizon, Luna would sit by her window and listen to the trees sway in the wind. Her grandmother had told her stories of the forest spirits and the hidden secrets that awaited those brave enough to explore its depths. 6 | 7 | One crisp autumn morning, Luna decided it was time to uncover the forest's mysteries. With a backpack filled with essentials and a heart full of courage, she ventured into the Whispering Woods. The forest greeted her with a symphony of rustling leaves and distant bird songs. 8 | 9 | As Luna delved deeper, the trees seemed to lean in closer, their branches forming intricate patterns against the sky. Suddenly, she stumbled upon a clearing bathed in ethereal light. In the center stood an ancient oak tree with a door carved into its trunk. 10 | 11 | Gathering her bravery, Luna approached the door and gently knocked. To her surprise, the door creaked open, revealing a spiral staircase that descended into the earth. Without hesitation, she stepped inside, embarking on a journey that would change her life forever. 12 | 13 | Beneath the forest, Luna discovered a hidden realm inhabited by magical creatures and wise guardians. She learned that the Whispering Woods were a bridge between the human world and the realm of enchantment. Over time, Luna became the forest's protector, ensuring harmony between both worlds. 14 | 15 | Years later, Luna returned to Eldoria as a wise and respected leader, her heart forever connected to the Whispering Forest. The villagers would often see her walking through the woods, sharing tales of magic and wonder, inspiring generations to come. 16 | 17 | And so, the bond between Luna, Eldoria, and the Whispering Forest endured, a testament to the courage and curiosity of one young girl who listened to the whispers of the trees. -------------------------------------------------------------------------------- /streamlit_editor.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from io import BytesIO 3 | import os 4 | import pickle 5 | from streamlit_quill import st_quill 6 | from dataclasses import dataclass, field 7 | from typing import Optional 8 | import dspy 9 | import requests 10 | from os import getenv 11 | from dsp import LM 12 | from ratelimit import limits 13 | from datetime import datetime 14 | from langchain_text_splitters import RecursiveCharacterTextSplitter 15 | import streamlit_analytics2 as streamlit_analytics 16 | import streamlit_mermaid as stmd 17 | from dotenv import load_dotenv 18 | 19 | load_dotenv() 20 | 21 | class OpenRouterClient(LM): 22 | RL_CALLS=40 23 | RL_PERIOD_SECONDS=60 24 | def __init__(self, api_key=None, base_url="https://openrouter.ai/api/v1", model="meta-llama/llama-3-8b-instruct:free", extra_headers=None, **kwargs): 25 | self.api_key = api_key or getenv("OPENROUTER_API_KEY") 26 | self.base_url = base_url 27 | self.model = model 28 | self.extra_headers = extra_headers or {} 29 | self.history = [] 30 | self.provider = "openai" 31 | self.kwargs = {'temperature': 0.0, 32 | 'max_tokens': 150, 33 | 'top_p': 1, 34 | 'frequency_penalty': 0, 35 | 'presence_penalty': 0, 36 | 'n': 1} 37 | self.kwargs.update(kwargs) 38 | 39 | def _get_choice_text(choice): 40 | return choice["message"]["content"] 41 | 42 | def _get_headers(self): 43 | headers = { 44 | "Authorization": f"Bearer {self.api_key}", 45 | "Content-Type": "application/json" 46 | } 47 | headers.update(self.extra_headers) 48 | return headers 49 | 50 | @limits(calls=RL_CALLS, period=RL_PERIOD_SECONDS) 51 | def basic_request(self, prompt: str, **kwargs): 52 | headers = self._get_headers() 53 | data = { 54 | "model": self.model, 55 | "messages": [ 56 | {"role": "user", "content": prompt} 57 | ], 58 | **kwargs 59 | } 60 | 61 | response = requests.post(f"{self.base_url}/chat/completions", headers=headers, json=data) 62 | response_data = response.json() 63 | print(response_data) 64 | 65 | self.history.append({ 66 | "prompt": prompt, 67 | "response": response_data, 68 | "kwargs": kwargs, 69 | }) 70 | 71 | return response_data 72 | 73 | def __call__(self, prompt, **kwargs): 74 | req_kwargs = self.kwargs 75 | 76 | if not kwargs: 77 | req_kwargs.update(kwargs) 78 | 79 | response_data = self.basic_request(prompt, **req_kwargs) 80 | completions = [choice["message"]["content"] for choice in response_data.get("choices", [])] 81 | return completions 82 | 83 | # Dictionary to store available LM configurations 84 | lm_configs = {} 85 | 86 | # Configure OpenRouter LM if API key available in secrets 87 | if "openrouter" in st.secrets: 88 | lm_configs['openrouter'] = OpenRouterClient( 89 | model=st.secrets.openrouter.get("model", "meta-llama/llama-3.3-70b-instruct:free"), 90 | api_key=st.secrets.openrouter.api_key, 91 | api_base=st.secrets.openrouter.get("api_base", "https://openrouter.ai/api/v1") 92 | ) 93 | 94 | # Configure OpenAI LM if API key available in secrets 95 | if "openai" in st.secrets: 96 | lm_configs['openai'] = dspy.LM( 97 | model=st.secrets.openai.get("model", "openai/gpt-4o-mini"), 98 | api_key=st.secrets.openai.api_key 99 | ) 100 | 101 | # Configure Deepseek LM if API key available in secrets 102 | if "deepseek" in st.secrets: 103 | lm_configs['deepseek'] = dspy.LM( 104 | model=st.secrets.deepseek.get("model", "deepseek-chat"), 105 | api_key=st.secrets.deepseek.api_key 106 | ) 107 | 108 | # Configure Gemini LM if API key available in secrets 109 | if "gemini" in st.secrets: 110 | lm_configs['gemini'] = dspy.LM( 111 | model=st.secrets.gemini.get("model", "gemini/gemini-2.0-flash-exp"), 112 | api_key=st.secrets.gemini.api_key 113 | ) 114 | 115 | # Configure GitHub LM if credentials available in secrets 116 | if "github" in st.secrets: 117 | lm_configs['github'] = dspy.LM( 118 | model=st.secrets.github.get("model", "openai/gpt-4o-mini"), 119 | api_base=st.secrets.github.get("api_base", "https://models.inference.ai.azure.com"), 120 | api_key=st.secrets.github.api_key 121 | ) 122 | 123 | # Configure Ollama LM if configured in secrets 124 | if "ollama" in st.secrets: 125 | lm_configs['ollama'] = dspy.LM( 126 | model=f"ollama_chat/{st.secrets.ollama.get('model', 'llama3')}", 127 | api_base=st.secrets.ollama.get("api_base", "http://localhost:11434"), 128 | api_key=st.secrets.ollama.get("api_key", "") 129 | ) 130 | 131 | # Select default LM based on availability 132 | default_lm = None 133 | for lm_name in ['openrouter', 'openai', 'deepseek', 'gemini', 'github', 'ollama']: # Updated priority order 134 | if lm_name in lm_configs: 135 | default_lm = lm_configs[lm_name] 136 | break 137 | 138 | # Ensure LM is loaded at application start 139 | if not dspy.settings.lm: 140 | if default_lm: 141 | dspy.settings.configure( 142 | lm=default_lm, 143 | max_requests_per_minute=15, 144 | trace=[] 145 | ) 146 | else: 147 | st.error("No LLM configuration available. Please check your environment variables.") 148 | 149 | # --- Data Models --- 150 | @dataclass 151 | class FeedbackItem: 152 | content: str 153 | reference_text: Optional[str] = None 154 | 155 | @dataclass 156 | class SummaryItem: 157 | title: str 158 | summary: str 159 | original_text: str 160 | start_index: int 161 | end_index: int 162 | 163 | @dataclass 164 | class WorkspaceStats: 165 | """Statistics for workspace analytics""" 166 | word_count: int = 0 167 | section_count: int = 0 168 | last_updated: datetime = field(default_factory=datetime.now) 169 | 170 | @dataclass 171 | class Workspace: 172 | doc_content: Optional[str] = None 173 | ai_modified_text: Optional[str] = None 174 | feedback_items: list[FeedbackItem] = field(default_factory=list) 175 | document_summaries: list[SummaryItem] = field(default_factory=list) 176 | created_at: datetime = field(default_factory=datetime.now) 177 | last_modified: datetime = field(default_factory=datetime.now) 178 | name: Optional[str] = None 179 | description: Optional[str] = None 180 | stats: WorkspaceStats = field(default_factory=WorkspaceStats) 181 | 182 | # --- Workspace Management Functions --- 183 | def create_new_workspace(name: Optional[str] = None) -> str: 184 | """Create a new workspace with a unique ID""" 185 | workspace_id = str(len(st.session_state.workspaces) + 1) 186 | workspace_name = name or f"Workspace {workspace_id}" 187 | 188 | new_workspace = Workspace( 189 | name=workspace_name, 190 | doc_content="", 191 | ai_modified_text=None, 192 | feedback_items=[], 193 | document_summaries=[] 194 | ) 195 | 196 | st.session_state.workspaces[workspace_id] = new_workspace 197 | st.session_state.current_workspace_id = workspace_id 198 | save_state_to_disk() 199 | return workspace_id 200 | 201 | def delete_workspace(workspace_id: str): 202 | """Delete a workspace""" 203 | if workspace_id in st.session_state.workspaces: 204 | del st.session_state.workspaces[workspace_id] 205 | if st.session_state.current_workspace_id == workspace_id: 206 | st.session_state.current_workspace_id = None 207 | save_state_to_disk() 208 | 209 | def get_current_workspace() -> Optional[Workspace]: 210 | """Get the current workspace""" 211 | if st.session_state.current_workspace_id: 212 | return st.session_state.workspaces.get(st.session_state.current_workspace_id) 213 | return None 214 | 215 | def switch_workspace(workspace_id: str): 216 | """Switch to a different workspace""" 217 | if workspace_id in st.session_state.workspaces: 218 | st.session_state.current_workspace_id = workspace_id 219 | save_state_to_disk() 220 | 221 | # --- LLM Signatures --- 222 | class ContentReviser(dspy.Signature): 223 | """Signature for content revision task""" 224 | context = dspy.InputField(desc="Optional context or theme for revision") 225 | guidelines = dspy.InputField(desc="Optional guidelines for revision") 226 | text = dspy.InputField(desc="Text to be revised") 227 | revised_content = dspy.OutputField(desc="Revised version of the input text") 228 | 229 | class FeedbackGenerator(dspy.Signature): 230 | """Signature for feedback generation task""" 231 | text = dspy.InputField(desc="Text to generate feedback for") 232 | reference_text = dspy.InputField(desc="Optional specific text to focus feedback on") 233 | feedback = dspy.OutputField(desc="Generated feedback") 234 | 235 | class SummaryGenerator(dspy.Signature): 236 | """Signature for summary generation task""" 237 | text = dspy.InputField(desc="Text to summarize") 238 | title = dspy.OutputField(desc="Section title") 239 | summary = dspy.OutputField(desc="Generated summary") 240 | 241 | # --- LLM Functions --- 242 | def generate_content_revision(text: str, context: Optional[str] = None, guidelines: Optional[str] = None) -> str: 243 | """Generate revised content using LLM""" 244 | try: 245 | # Ensure LM is loaded 246 | if not dspy.settings.lm: 247 | dspy.settings.configure(lm=default_lm) # Use the configured default LM 248 | 249 | reviser = dspy.Predict(ContentReviser) 250 | result = reviser( 251 | text=text, 252 | context=context or "", 253 | guidelines=guidelines or "" 254 | ) 255 | return result.revised_content 256 | except Exception as e: 257 | print(f"Error in content revision: {str(e)}") 258 | return text # Return original text on error 259 | 260 | def generate_feedback_revision(text: str, feedback_list: list[str]) -> tuple[str, str]: 261 | """Generate revised content based on feedback items, returns both original and revised text""" 262 | try: 263 | # Ensure LM is loaded 264 | if not dspy.settings.lm: 265 | dspy.settings.configure(lm=default_lm) 266 | 267 | # Combine feedback into guidelines 268 | guidelines = "\n".join([f"- {item}" for item in feedback_list]) 269 | reviser = dspy.Predict(ContentReviser) 270 | result = reviser( 271 | text=text, 272 | context="Revise based on feedback", 273 | guidelines=guidelines 274 | ) 275 | return text, result.revised_content 276 | except Exception as e: 277 | print(f"Error in feedback revision: {str(e)}") 278 | return text, text 279 | 280 | def get_feedback_item(reference_text: Optional[str] = None) -> FeedbackItem: 281 | """Generate a feedback item using LLM""" 282 | try: 283 | # Ensure LM is loaded 284 | if not dspy.settings.lm: 285 | dspy.settings.configure(lm=default_lm) # Use the configured default LM 286 | 287 | # Get current workspace 288 | current_workspace = get_current_workspace() 289 | if not current_workspace or not current_workspace.doc_content: 290 | return FeedbackItem( 291 | content="Unable to generate feedback: No document content available.", 292 | reference_text=reference_text 293 | ) 294 | 295 | # Create the generator with the current LM 296 | generator = dspy.Predict(FeedbackGenerator) 297 | 298 | # Generate appropriate prompt based on whether reference text is provided 299 | if reference_text: 300 | result = generator( 301 | text=f"Generate a modification suggestion specifically for the following text within the full document context:\n\nFull Document:\n{current_workspace.doc_content}\n\nSelected Text:\n{reference_text}\n\nMake sure to only provide one concise modification suggestion without actual text, no original text.", 302 | reference_text=reference_text 303 | ) 304 | else: 305 | result = generator( 306 | text=f"Generate a general document modification suggestion for the following text:\n\n{current_workspace.doc_content}\n\nMake sure to only provide one concise modification suggestion without actual text, no original text.", 307 | reference_text="" 308 | ) 309 | 310 | # Create and return the feedback item 311 | return FeedbackItem( 312 | content=result.feedback, 313 | reference_text=reference_text 314 | ) 315 | except Exception as e: 316 | print(f"Error generating feedback: {str(e)}") 317 | return FeedbackItem( 318 | content=f"Unable to generate feedback at this time. Error generating feedback: {str(e)}", 319 | reference_text=reference_text 320 | ) 321 | 322 | def generate_document_summary(text: str) -> list[SummaryItem]: 323 | """Generate document summaries using LLM""" 324 | try: 325 | # Split text into sections (simplified version) 326 | sections = split_into_sections(text) 327 | summaries = [] 328 | 329 | generator = dspy.Predict(SummaryGenerator) 330 | 331 | for i, section_text in enumerate(sections): 332 | result = generator(text=section_text) 333 | start_idx = len(''.join(sections[:i])) 334 | end_idx = start_idx + len(section_text) 335 | 336 | summaries.append(SummaryItem( 337 | title=result.title, 338 | summary=result.summary, 339 | original_text=section_text, 340 | start_index=start_idx, 341 | end_index=end_idx 342 | )) 343 | 344 | # Update section count in the current workspace 345 | current_workspace = get_current_workspace() 346 | if current_workspace: 347 | current_workspace.document_summaries = summaries 348 | update_workspace_stats(current_workspace) 349 | 350 | return summaries 351 | except Exception as e: 352 | print(f"Error generating summaries: {str(e)}") 353 | return [] 354 | 355 | def regenerate_summary(text: str) -> str: 356 | """Regenerate a single summary using LLM""" 357 | try: 358 | generator = dspy.Predict(SummaryGenerator) 359 | result = generator(text=text) 360 | return result.summary 361 | except Exception as e: 362 | print(f"Error regenerating summary: {str(e)}") 363 | return f"Error generating summary: {str(e)}" 364 | 365 | def split_into_sections(text: str, min_section_length: int = 500) -> list[str]: 366 | """Split text into logical sections using Langchain's RecursiveCharacterTextSplitter""" 367 | # Initialize the recursive splitter 368 | splitter = RecursiveCharacterTextSplitter( 369 | chunk_size=min_section_length, 370 | chunk_overlap=0, 371 | separators=["\n## ", "\n# ", "\n### ", "\n#### ", "\n##### ", "\n###### ", "\n\n", "\n", " ", ""] 372 | ) 373 | 374 | # First try to split by markdown headers 375 | sections = splitter.split_text(text) 376 | 377 | # If no sections were created (or only one large section), fall back to character-based splitting 378 | if len(sections) <= 1: 379 | splitter = RecursiveCharacterTextSplitter( 380 | chunk_size=min_section_length, 381 | chunk_overlap=0, 382 | separators=["\n\n", "\n", " ", ""] 383 | ) 384 | sections = splitter.split_text(text) 385 | 386 | # Ensure we always return at least one section 387 | return sections if sections else [text] 388 | 389 | # --- App Configuration --- 390 | if 'read_mode' not in st.session_state: 391 | st.session_state.read_mode = False 392 | 393 | st.set_page_config( 394 | page_title="Document Management with Generative AI", 395 | layout="wide", 396 | initial_sidebar_state="collapsed" if st.session_state.get('read_mode', False) else "expanded", 397 | menu_items={ 398 | 'Get Help': 'https://x.com/StockchatEditor', 399 | 'Report a bug': "https://x.com/StockchatEditor", 400 | } 401 | ) 402 | 403 | streamlit_analytics.start_tracking(load_from_json=".streamlit/analytics.json") 404 | 405 | # --- Helper Functions --- 406 | def load_document(file): 407 | """Load markdown document with empty document handling""" 408 | try: 409 | content = file.read().decode('utf-8') 410 | return content 411 | except Exception as e: 412 | st.error(f"Error loading document: {str(e)}") 413 | return "" # Return empty string as fallback 414 | 415 | def save_document(content: str) -> BytesIO: 416 | """Save document content to bytes""" 417 | byte_io = BytesIO() 418 | byte_io.write(content.encode('utf-8')) 419 | byte_io.seek(0) 420 | return byte_io 421 | 422 | def validate_text_selection(full_text: str, selected_text: str) -> bool: 423 | """ 424 | Validates if the selected text is actually part of the full document 425 | """ 426 | return selected_text.strip() in full_text 427 | 428 | def update_workspace_stats(workspace: Workspace) -> None: 429 | """Update workspace statistics""" 430 | if not workspace.doc_content: 431 | workspace.stats = WorkspaceStats() 432 | return 433 | 434 | # Calculate word count 435 | words = workspace.doc_content.split() 436 | word_count = len(words) 437 | 438 | # Get section count 439 | section_count = len(workspace.document_summaries) 440 | 441 | # Update stats 442 | workspace.stats = WorkspaceStats( 443 | word_count=word_count, 444 | section_count=section_count, 445 | last_updated=datetime.now() 446 | ) 447 | 448 | def save_state_to_disk(): 449 | """Save all workspaces to disk""" 450 | state_data = { 451 | 'workspaces': { 452 | workspace_id: { 453 | 'doc_content': workspace.doc_content, 454 | 'ai_modified_text': workspace.ai_modified_text, 455 | 'feedback_items': [ 456 | {'content': item.content, 'reference_text': item.reference_text} 457 | for item in workspace.feedback_items 458 | ], 459 | 'document_summaries': [ 460 | { 461 | 'title': item.title, 462 | 'summary': item.summary, 463 | 'original_text': item.original_text, 464 | 'start_index': item.start_index, 465 | 'end_index': item.end_index 466 | } 467 | for item in workspace.document_summaries 468 | ], 469 | 'name': workspace.name, 470 | 'description': workspace.description, 471 | 'created_at': workspace.created_at.isoformat(), 472 | 'last_modified': workspace.last_modified.isoformat(), 473 | 'stats': { 474 | 'word_count': workspace.stats.word_count, 475 | 'section_count': workspace.stats.section_count, 476 | 'last_updated': workspace.stats.last_updated.isoformat() 477 | } 478 | } 479 | for workspace_id, workspace in st.session_state.workspaces.items() 480 | }, 481 | 'current_workspace_id': st.session_state.current_workspace_id 482 | } 483 | 484 | os.makedirs('.streamlit', exist_ok=True) 485 | with open('.streamlit/doc_state.pkl', 'wb') as f: 486 | pickle.dump(state_data, f) 487 | 488 | def load_state_from_disk(): 489 | """Load workspaces from disk""" 490 | try: 491 | with open('.streamlit/doc_state.pkl', 'rb') as f: 492 | state_data = pickle.load(f) 493 | 494 | workspaces = {} 495 | for workspace_id, workspace_data in state_data.get('workspaces', {}).items(): 496 | stats_data = workspace_data.get('stats', {}) 497 | workspaces[workspace_id] = Workspace( 498 | doc_content=workspace_data.get('doc_content'), 499 | ai_modified_text=workspace_data.get('ai_modified_text'), 500 | feedback_items=[ 501 | FeedbackItem( 502 | content=item['content'], 503 | reference_text=item.get('reference_text') 504 | ) 505 | for item in workspace_data.get('feedback_items', []) 506 | ], 507 | document_summaries=[ 508 | SummaryItem( 509 | title=item['title'], 510 | summary=item['summary'], 511 | original_text=item['original_text'], 512 | start_index=item['start_index'], 513 | end_index=item['end_index'] 514 | ) 515 | for item in workspace_data.get('document_summaries', []) 516 | ], 517 | name=workspace_data.get('name'), 518 | description=workspace_data.get('description'), 519 | created_at=datetime.fromisoformat(workspace_data.get('created_at')), 520 | last_modified=datetime.fromisoformat(workspace_data.get('last_modified')), 521 | stats=WorkspaceStats( 522 | word_count=stats_data.get('word_count', 0), 523 | section_count=stats_data.get('section_count', 0), 524 | last_updated=datetime.fromisoformat(stats_data.get('last_updated', workspace_data.get('created_at'))) 525 | ) 526 | ) 527 | 528 | st.session_state.workspaces = workspaces 529 | st.session_state.current_workspace_id = state_data.get('current_workspace_id') 530 | except FileNotFoundError: 531 | st.session_state.workspaces = {} 532 | st.session_state.current_workspace_id = None 533 | except Exception as e: 534 | st.error(f"Error loading state: {str(e)}") 535 | st.session_state.workspaces = {} 536 | st.session_state.current_workspace_id = None 537 | 538 | def regenerate_document_from_summaries(summaries: list[SummaryItem]) -> str: 539 | """Reconstruct document from summaries""" 540 | return '\n\n'.join(item.original_text for item in summaries) 541 | 542 | @st.fragment 543 | def ai_assistant_column(): 544 | current_workspace = get_current_workspace() 545 | if not current_workspace: 546 | return 547 | 548 | st.title("AI Assistant") 549 | tab1, tab2, tab3 = st.tabs(["All Feedback", "Custom Feedback", "Flowchart"]) 550 | 551 | with tab1: 552 | # Display existing feedback items 553 | if current_workspace.feedback_items: 554 | for idx, item in enumerate(current_workspace.feedback_items): 555 | cols = st.columns([0.1, 0.75, 0.15]) 556 | 557 | # Checkbox column 558 | with cols[0]: 559 | checkbox_key = f"feedback_checkbox_{idx}" 560 | if checkbox_key not in st.session_state: 561 | st.session_state[checkbox_key] = False 562 | 563 | # Update checkbox state 564 | st.session_state[checkbox_key] = st.checkbox( 565 | "Select feedback", 566 | value=st.session_state[checkbox_key], 567 | key=f"checkbox_display_{idx}", 568 | label_visibility="collapsed" 569 | ) 570 | 571 | # Content column 572 | with cols[1]: 573 | st.markdown(f"**{item.content}**") 574 | 575 | # Delete button column 576 | with cols[2]: 577 | if st.button("🗑️", key=f"delete_feedback_{idx}"): 578 | current_workspace.feedback_items.pop(idx) 579 | # Clean up checkbox state 580 | if f"feedback_checkbox_{idx}" in st.session_state: 581 | del st.session_state[f"feedback_checkbox_{idx}"] 582 | save_state_to_disk() 583 | st.rerun() 584 | 585 | # Reference text expander 586 | if item.reference_text: 587 | with st.expander("📌 View referenced text", expanded=False): 588 | st.markdown(f"{item.reference_text}") 589 | else: 590 | st.info("No feedback items yet. Add feedback using the Custom Feedback tab.") 591 | 592 | # Get currently selected feedback items 593 | currently_selected_feedback = [ 594 | item.content for idx, item in enumerate(current_workspace.feedback_items) 595 | if st.session_state.get(f"feedback_checkbox_{idx}", False) 596 | ] 597 | 598 | # Apply feedback button 599 | st.markdown("