├── src ├── __init__.py ├── rag.py └── data_pipeline.py ├── tests ├── __init__.py └── test_rag.py ├── frontend ├── .vite │ └── deps_temp_bb8246a7 │ │ └── package.json ├── postcss.config.cjs ├── src │ ├── main.tsx │ ├── App.tsx │ ├── index.css │ └── components │ │ └── GitHubChat.tsx ├── tailwind.config.js ├── tsconfig.node.json ├── vite.config.ts ├── index.html ├── tsconfig.json ├── package.json └── pnpm-lock.yaml ├── pyproject.toml ├── .gitignore ├── config.py ├── README.md ├── app.py └── api.py /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/.vite/deps_temp_bb8246a7/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "module" 3 | } 4 | -------------------------------------------------------------------------------- /frontend/postcss.config.cjs: -------------------------------------------------------------------------------- 1 | const tailwindcss = require('tailwindcss'); 2 | const autoprefixer = require('autoprefixer'); 3 | 4 | module.exports = { 5 | plugins: [ 6 | tailwindcss, 7 | autoprefixer, 8 | ], 9 | } -------------------------------------------------------------------------------- /frontend/src/main.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom/client'; 3 | import App from './App'; 4 | import './index.css'; 5 | 6 | ReactDOM.createRoot(document.getElementById('root')!).render( 7 | 8 | 9 | 10 | ); -------------------------------------------------------------------------------- /frontend/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: [ 4 | "./index.html", 5 | "./src/**/*.{js,ts,jsx,tsx}", 6 | ], 7 | theme: { 8 | extend: {}, 9 | }, 10 | plugins: [ 11 | require('@tailwindcss/typography'), 12 | ], 13 | } -------------------------------------------------------------------------------- /frontend/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "skipLibCheck": true, 5 | "module": "ESNext", 6 | "moduleResolution": "bundler", 7 | "allowSyntheticDefaultImports": true, 8 | "strict": true 9 | }, 10 | "include": ["vite.config.ts"] 11 | } -------------------------------------------------------------------------------- /frontend/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite'; 2 | import react from '@vitejs/plugin-react'; 3 | import path from 'path'; 4 | 5 | export default defineConfig({ 6 | plugins: [react()], 7 | resolve: { 8 | alias: { 9 | '@': path.resolve(__dirname, './src'), 10 | }, 11 | }, 12 | server: { 13 | port: 3000, 14 | }, 15 | }); -------------------------------------------------------------------------------- /frontend/src/App.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Toaster } from 'react-hot-toast'; 3 | import GitHubChat from './components/GitHubChat'; 4 | 5 | const App: React.FC = () => { 6 | return ( 7 |
8 | 9 | 10 |
11 | ); 12 | }; 13 | 14 | export default App; -------------------------------------------------------------------------------- /frontend/src/index.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | :root { 6 | font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif; 7 | line-height: 1.5; 8 | font-weight: 400; 9 | 10 | font-synthesis: none; 11 | text-rendering: optimizeLegibility; 12 | -webkit-font-smoothing: antialiased; 13 | -moz-osx-font-smoothing: grayscale; 14 | } -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | GitHub Chat 8 | 9 | 10 |
11 | 12 | 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "github-chat" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Li Yin "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.12" 10 | 11 | adalflow = { version = ">=1.0.0", extras = ["openai"] } 12 | 13 | faiss-cpu = "^1.9.0.post1" 14 | streamlit = "^1.31.1" 15 | fastapi = "^0.109.2" 16 | uvicorn = "^0.27.1" 17 | 18 | 19 | [build-system] 20 | requires = ["poetry-core"] 21 | build-backend = "poetry.core.masonry.api" 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | 3 | # Streamlit secrets 4 | .streamlit/secrets.toml 5 | node_modules/ 6 | # Python 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | *.so 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # Virtual Environment 29 | .env 30 | .venv 31 | env/ 32 | venv/ 33 | ENV/ 34 | 35 | # IDE 36 | .idea/ 37 | .vscode/ 38 | *.swp 39 | *.swo 40 | 41 | # OS 42 | .DS_Store 43 | Thumbs.db 44 | __pycache__/ 45 | 46 | # ignore adalflow cache 47 | /adalflow 48 | -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "useDefineForClassFields": true, 5 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 6 | "module": "ESNext", 7 | "skipLibCheck": true, 8 | "moduleResolution": "bundler", 9 | "allowImportingTsExtensions": true, 10 | "resolveJsonModule": true, 11 | "isolatedModules": true, 12 | "noEmit": true, 13 | "jsx": "react-jsx", 14 | "strict": true, 15 | "noUnusedLocals": true, 16 | "noUnusedParameters": true, 17 | "noFallthroughCasesInSwitch": true, 18 | "baseUrl": ".", 19 | "paths": { 20 | "@/*": ["./src/*"] 21 | } 22 | }, 23 | "include": ["src"], 24 | "references": [{ "path": "./tsconfig.node.json" }] 25 | } -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | from adalflow import OpenAIClient 2 | 3 | 4 | configs = { 5 | "embedder": { 6 | "batch_size": 100, 7 | "model_client": OpenAIClient, # make sure to initialize the model client later 8 | "model_kwargs": { 9 | "model": "text-embedding-3-small", 10 | "dimensions": 256, 11 | "encoding_format": "float", 12 | }, 13 | }, 14 | "retriever": { 15 | "top_k": 20, 16 | }, 17 | "generator": { 18 | "model_client": OpenAIClient, 19 | "model_kwargs": { 20 | "model": "gpt-4o-mini", 21 | "temperature": 0.3, 22 | "stream": False, 23 | }, 24 | }, 25 | "text_splitter": { 26 | "split_by": "word", 27 | "chunk_size": 400, 28 | "chunk_overlap": 100, 29 | }, 30 | } 31 | 32 | DEFAULT_GITHUB_REPO = "https://github.com/SylphAI-Inc/AdalFlow" 33 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "github-chat", 3 | "version": "0.1.0", 4 | "private": true, 5 | "type": "module", 6 | "dependencies": { 7 | "@tailwindcss/typography": "^0.5.10", 8 | "@types/node": "^20.0.0", 9 | "@types/react": "^18.0.0", 10 | "@types/react-dom": "^18.0.0", 11 | "lucide-react": "^0.358.0", 12 | "react": "^18.2.0", 13 | "react-dom": "^18.2.0", 14 | "react-hot-toast": "^2.4.1", 15 | "react-markdown": "^9.0.1", 16 | "tailwindcss": "^3.4.0", 17 | "typescript": "^5.0.0" 18 | }, 19 | "scripts": { 20 | "dev": "vite", 21 | "build": "tsc && vite build", 22 | "serve": "vite preview" 23 | }, 24 | "devDependencies": { 25 | "@types/babel__generator": "^7.6.8", 26 | "@types/babel__template": "^7.4.4", 27 | "@types/babel__traverse": "^7.20.6", 28 | "@types/estree": "^1.0.6", 29 | "@types/prop-types": "^15.7.14", 30 | "@vitejs/plugin-react": "^4.2.0", 31 | "autoprefixer": "^10.4.17", 32 | "postcss": "^8.4.35", 33 | "vite": "^5.0.0" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /tests/test_rag.py: -------------------------------------------------------------------------------- 1 | import os 2 | from src.rag import RAG 3 | import adalflow as adal 4 | from adalflow.utils import get_adalflow_default_root_path 5 | import tempfile 6 | from src.data_pipeline import ( 7 | create_sample_documents, 8 | transform_documents_and_save_to_db, 9 | ) 10 | 11 | 12 | def initialize_test_database(db_path: str): 13 | """Initialize database with sample documents.""" 14 | documents = create_sample_documents() 15 | transform_documents_and_save_to_db(documents, db_path) 16 | return db_path 17 | 18 | 19 | def main(): 20 | 21 | temp_dir = tempfile.mkdtemp() 22 | db_path = os.path.join(temp_dir, "test_db") 23 | 24 | initialize_test_database(db_path) 25 | 26 | # Create RAG instance 27 | rag = RAG(index_path=db_path) 28 | 29 | # Test conversation flow with memory 30 | test_conversation = [ 31 | "Who is Alice and what does she do?", 32 | "What about Bob? What's his expertise?", 33 | "Can you tell me more about what the previous person works on?", 34 | "What was her favorite project?", # Tests memory of Alice 35 | "Between these two people, who has more experience with RAG systems?", # Tests memory of both 36 | "Do they ever meet? Where might they have lunch together?", # Tests memory and context combination 37 | ] 38 | 39 | print("Starting conversation test with memory...\n") 40 | for i, query in enumerate(test_conversation, 1): 41 | print(f"\n----- Query {i} -----") 42 | print(f"User: {query}") 43 | try: 44 | # Get conversation history before the response 45 | print("\nCurrent Conversation History:") 46 | history = rag.memory() 47 | if history: 48 | print(history) 49 | else: 50 | print("(No history yet)") 51 | 52 | response, docs = rag(query) 53 | print(f"\nAssistant: {response}") 54 | 55 | # Show most relevant document used 56 | if docs: 57 | most_relevant = docs[0].documents[0].text.strip() 58 | print(f"\nMost relevant context used: \n{most_relevant[:200]}...") 59 | 60 | except Exception as e: 61 | print(f"Error: {e}") 62 | print("\n" + "=" * 50) 63 | 64 | 65 | if __name__ == "__main__": 66 | from adalflow.utils import setup_env 67 | 68 | setup_env() 69 | main() 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GithubChat 2 | 3 | A RAG assistant to allow you to chat with any github repo. 4 | Learn fast. The default repo is AdalFlow github repo. 5 | 6 | [![Watch the video](https://img.youtube.com/vi/PvZTSmTK8b0/maxresdefault.jpg)](https://youtu.be/PvZTSmTK8b0) 7 | *Click the image above to watch the demo video* 8 | 9 | 10 | 11 | ## Project Structure 12 | ``` 13 | . 14 | ├── frontend/ # React frontend application 15 | ├── src/ # Python backend code 16 | ├── api.py # FastAPI server 17 | ├── app.py # Streamlit application 18 | └── pyproject.toml # Python dependencies 19 | ``` 20 | 21 | ## Backend Setup 22 | 23 | 1. Install dependencies: 24 | ```bash 25 | poetry install 26 | ``` 27 | 28 | 2. Set up OpenAI API key: 29 | 30 | Create a `.streamlit/secrets.toml` file in your project root: 31 | ```bash 32 | mkdir -p .streamlit 33 | touch .streamlit/secrets.toml 34 | ``` 35 | 36 | Add your OpenAI API key to `.streamlit/secrets.toml`: 37 | ```toml 38 | OPENAI_API_KEY = "your-openai-api-key-here" 39 | ``` 40 | 41 | ## Running the Applications 42 | 43 | ### Streamlit UI 44 | Run the streamlit app: 45 | ```bash 46 | poetry run streamlit run app.py 47 | ``` 48 | 49 | ### FastAPI Backend 50 | Run the API server: 51 | ```bash 52 | poetry run uvicorn api:app --reload 53 | ``` 54 | The API will be available at http://localhost:8000 55 | 56 | ### React Frontend 57 | 1. Navigate to the frontend directory: 58 | ```bash 59 | cd frontend 60 | ``` 61 | 62 | 2. Install Node.js dependencies: 63 | ```bash 64 | pnpm install 65 | ``` 66 | 67 | 68 | 3. Start the development server: 69 | ```bash 70 | pnpm run dev 71 | ``` 72 | The frontend will be available at http://localhost:3000 73 | 74 | ## API Endpoints 75 | 76 | ### POST /query 77 | Analyzes a GitHub repository based on a query. 78 | ```json 79 | // Request 80 | { 81 | "repo_url": "https://github.com/username/repo", 82 | "query": "What does this repository do?" 83 | } 84 | 85 | // Response 86 | { 87 | "rationale": "Analysis rationale...", 88 | "answer": "Detailed answer...", 89 | "contexts": [...] 90 | } 91 | ``` 92 | 93 | ## ROADMAP 94 | - [x] Clearly structured RAG that can prepare a repo, persit from reloading, and answer questions. 95 | - `DatabaseManager` in `src/data_pipeline.py` to manage the database. 96 | - `RAG` class in `src/rag.py` to manage the whole RAG lifecycle. 97 | 98 | ### On the RAG backend 99 | - [ ] Conditional retrieval. Sometimes users just want to clarify a past conversation, no extra context needed. 100 | - [ ] Create an evaluation dataset 101 | - [ ] Evaluate the RAG performance on the dataset 102 | - [ ] Auto-optimize the RAG model 103 | 104 | ### On the React frontend 105 | 106 | - [ ] Support the display of the whole conversation history instead of just the last message. 107 | - [ ] Support the management of multiple conversations. 108 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | from src.rag import RAG 4 | 5 | from typing import List 6 | 7 | from config import DEFAULT_GITHUB_REPO 8 | 9 | 10 | def init_rag(repo_path_or_url: str): 11 | 12 | # from adalflow.utils import setup_env 13 | 14 | os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"] 15 | 16 | rag = RAG() 17 | print(f"Loading repository from: {repo_path_or_url}") 18 | rag.prepare_retriever(repo_url_or_path=repo_path_or_url) 19 | return rag 20 | 21 | 22 | st.title("GithubChat") 23 | st.caption("Learn a repo with RAG assistant") 24 | 25 | repo_path = st.text_input( 26 | "Repository Path", 27 | value=DEFAULT_GITHUB_REPO, 28 | help="Github repo URL", 29 | ) 30 | 31 | if "messages" not in st.session_state: 32 | st.session_state.messages = [] 33 | if "rag" not in st.session_state: 34 | st.session_state.rag = None 35 | 36 | if st.button("Initialize local RAG"): 37 | try: 38 | st.session_state.rag = init_rag(repo_path) 39 | if st.session_state.rag: 40 | st.toast("Repository loaded successfully!") 41 | except Exception as e: 42 | st.toast(f"Load failed for repository at: {repo_path}") 43 | 44 | # TODO: Better reset the conversation 45 | if st.button("Clear Chat"): 46 | st.session_state.messages = [] 47 | if st.session_state.rag: 48 | st.session_state.rag.memory.current_conversation.dialog_turns.clear() 49 | 50 | 51 | def display_messages(): 52 | for message in st.session_state.messages: 53 | with st.chat_message(message["role"]): 54 | st.write("Assistant:") 55 | if "rationale" in message: 56 | st.write(message["rationale"]) 57 | st.write(message["content"]) 58 | if "context" in message: 59 | with st.expander("View context"): 60 | for doc in message["context"]: 61 | st.write( 62 | f"file_path: {doc.meta_data.get('file_path', 'unknown')}" 63 | ) 64 | st.write(f"language: {doc.meta_data.get('type', 'unknown')}") 65 | language = doc.meta_data.get("type", "python") 66 | if language == "py": 67 | st.code(doc.text, language="python") 68 | else: 69 | st.write(doc.text) 70 | 71 | 72 | from adalflow.core.types import Document 73 | 74 | 75 | def form_context(context: List[Document]): 76 | formatted_context = "" 77 | for doc in context: 78 | formatted_context += "" 79 | f"file_path: {doc.meta_data.get('file_path', 'unknown')} \n" 80 | f"language: {doc.meta_data.get('type', 'python')} \n" 81 | f"content: {doc.text} \n" 82 | return formatted_context 83 | 84 | 85 | if st.session_state.rag and ( 86 | query := st.chat_input( 87 | "Ask about the code (e.g., 'Show me the implementation of the RAG class', 'How is memory handled?')" 88 | ) 89 | ): 90 | st.session_state.messages.append({"role": "user", "content": query}) 91 | 92 | with st.chat_message("user"): 93 | st.write(query) 94 | 95 | with st.chat_message("assistant"): 96 | with st.spinner("Analyzing code..."): 97 | 98 | st.write(f"memory: {st.session_state.rag.memory()}") 99 | response, docs = st.session_state.rag(query) 100 | 101 | # Show relevant context first, then the explanation 102 | if docs and docs[0].documents: 103 | context = docs[0].documents 104 | 105 | # Add to chat history 106 | st.write(f"add to history") 107 | st.session_state.messages.append( 108 | { 109 | "role": "assistant", 110 | "rationale": ( 111 | response.rationale 112 | if hasattr(response, "rationale") 113 | else None 114 | ), 115 | "content": ( 116 | response.answer 117 | if hasattr(response, "answer") 118 | else response.raw_response 119 | ), 120 | "context": context, 121 | } 122 | ) 123 | else: 124 | st.write(response) 125 | st.session_state.messages.append( 126 | {"role": "assistant", "content": response} 127 | ) 128 | elif not st.session_state.rag: 129 | st.info("Please load a repository first!") 130 | 131 | # Finally, call display_messages *after* everything is appended 132 | display_messages() 133 | -------------------------------------------------------------------------------- /api.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI, HTTPException 2 | from fastapi.middleware.cors import CORSMiddleware 3 | from pydantic import BaseModel 4 | import adalflow as adal 5 | from src.rag import RAG 6 | from typing import List, Optional 7 | import os 8 | from dotenv import load_dotenv 9 | from datetime import datetime, timezone 10 | 11 | def load_environment(): 12 | """Load environment variables from .env file if available, otherwise use system environment variables.""" 13 | try: 14 | # Try to load from .env file for local development 15 | load_dotenv() 16 | print("Loaded environment variables from .env file") 17 | except FileNotFoundError: 18 | # In production, env variables should be set in the environment 19 | print("No .env file found, using system environment variables") 20 | except Exception as e: 21 | print(f"Note: Error loading .env file: {e}") 22 | 23 | # Load environment variables 24 | load_environment() 25 | 26 | # Check for required environment variables 27 | openai_api_key = os.getenv("OPENAI_API_KEY") 28 | if not openai_api_key: 29 | raise ValueError( 30 | "OPENAI_API_KEY environment variable is required. Either:\n" 31 | "1. Create a .env file with OPENAI_API_KEY=your-key-here (for local development)\n" 32 | "2. Set the environment variable in your deployment platform (for production)" 33 | ) 34 | 35 | # Initialize FastAPI app 36 | app = FastAPI( 37 | title="GithubChat API", 38 | description="API for querying GitHub repositories using RAG", 39 | version="1.0.0" 40 | ) 41 | 42 | # Add CORS middleware 43 | app.add_middleware( 44 | CORSMiddleware, 45 | allow_origins=["*"], # In production, replace with specific origins 46 | allow_credentials=True, 47 | allow_methods=["*"], 48 | allow_headers=["*"], 49 | ) 50 | 51 | # Initialize RAG component 52 | try: 53 | # Set up adalflow environment 54 | os.environ["OPENAI_API_KEY"] = openai_api_key 55 | adal.setup_env() 56 | rag = RAG() 57 | print("Successfully initialized RAG component") 58 | except Exception as e: 59 | print(f"Error initializing RAG component: {e}") 60 | raise RuntimeError(f"Failed to initialize RAG component: {e}") 61 | 62 | class QueryRequest(BaseModel): 63 | repo_url: str 64 | query: str 65 | 66 | class DocumentMetadata(BaseModel): 67 | file_path: str 68 | type: str 69 | is_code: bool = False 70 | is_implementation: bool = False 71 | title: str = "" 72 | 73 | class Document(BaseModel): 74 | text: str 75 | meta_data: DocumentMetadata 76 | 77 | class QueryResponse(BaseModel): 78 | rationale: str 79 | answer: str 80 | contexts: List[Document] 81 | 82 | @app.post("/query", response_model=QueryResponse) 83 | async def query_repository(request: QueryRequest): 84 | """ 85 | Query a GitHub repository with RAG 86 | 87 | Args: 88 | request: QueryRequest containing repo_url and query 89 | 90 | Returns: 91 | QueryResponse containing the answer, rationale, and relevant contexts 92 | """ 93 | try: 94 | # Prepare retriever for the repository 95 | rag.prepare_retriever(request.repo_url) 96 | 97 | # Get response and retrieved documents 98 | response, retrieved_documents = rag(request.query) 99 | 100 | # Format response 101 | return QueryResponse( 102 | rationale=response.rationale if hasattr(response, 'rationale') else "", 103 | answer=response.answer if hasattr(response, 'answer') else response.raw_response, 104 | contexts=[ 105 | Document( 106 | text=doc.text, 107 | meta_data=DocumentMetadata( 108 | file_path=doc.meta_data.get('file_path', ''), 109 | type=doc.meta_data.get('type', ''), 110 | is_code=doc.meta_data.get('is_code', False), 111 | is_implementation=doc.meta_data.get('is_implementation', False), 112 | title=doc.meta_data.get('title', '') 113 | ) 114 | ) 115 | for doc in retrieved_documents[0].documents 116 | ] if retrieved_documents and retrieved_documents[0].documents else [] 117 | ) 118 | except Exception as e: 119 | error_msg = f"Error processing query: {str(e)}" 120 | print(error_msg) # Log the error 121 | raise HTTPException(status_code=500, detail=error_msg) 122 | 123 | @app.get("/health") 124 | async def health_check(): 125 | """Health check endpoint to verify API is running""" 126 | return { 127 | "status": "healthy", 128 | "timestamp": datetime.now(timezone.utc).isoformat(), 129 | "version": "1.0.0" 130 | } 131 | 132 | @app.get("/") 133 | async def root(): 134 | """Root endpoint with API information""" 135 | return { 136 | "name": "GithubChat API", 137 | "description": "API for querying GitHub repositories using RAG", 138 | "version": "1.0.0", 139 | "documentation": "/docs", 140 | "health_check": "/health" 141 | } 142 | 143 | if __name__ == "__main__": 144 | import uvicorn 145 | uvicorn.run(app, host="0.0.0.0", port=8000) -------------------------------------------------------------------------------- /src/rag.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List 2 | from uuid import uuid4 3 | 4 | import adalflow as adal 5 | from adalflow.core.types import ( 6 | Conversation, 7 | DialogTurn, 8 | UserQuery, 9 | AssistantResponse, 10 | ) 11 | from adalflow.components.retriever.faiss_retriever import FAISSRetriever 12 | from adalflow.components.data_process import ( 13 | RetrieverOutputToContextStr, 14 | ) 15 | from adalflow.core.component import DataComponent 16 | from config import configs 17 | from src.data_pipeline import DatabaseManager 18 | from adalflow.utils import printc 19 | 20 | 21 | class Memory(DataComponent): 22 | """Simple conversation management with a list of dialog turns.""" 23 | 24 | def __init__(self): 25 | super().__init__() 26 | self.current_conversation = Conversation() 27 | 28 | def call(self) -> List[DialogTurn]: 29 | 30 | all_dialog_turns = self.current_conversation.dialog_turns 31 | 32 | return all_dialog_turns 33 | 34 | def add_dialog_turn(self, user_query: str, assistant_response: str): 35 | dialog_turn = DialogTurn( 36 | id=str(uuid4()), 37 | user_query=UserQuery(query_str=user_query), 38 | assistant_response=AssistantResponse(response_str=assistant_response), 39 | ) 40 | 41 | self.current_conversation.append_dialog_turn(dialog_turn) 42 | 43 | 44 | system_prompt = r""" 45 | You are a code assistant which answer's user question on a Github Repo. 46 | You will receive user query, relevant context, and past conversation history. 47 | Think step by step.""" 48 | 49 | # history is a list of dialog turns 50 | RAG_TEMPLATE = r""" 51 | {{system_prompt}} 52 | {{output_format_str}} 53 | 54 | {# OrderedDict of DialogTurn #} 55 | {% if conversation_history %} 56 | 57 | {% for key, dialog_turn in conversation_history.items() %} 58 | {{key}}. 59 | User: {{dialog_turn.user_query.query_str}} 60 | You: {{dialog_turn.assistant_response.response_str}} 61 | {% endfor %} 62 | 63 | {% endif %} 64 | {% if contexts %} 65 | 66 | {% for context in contexts %} 67 | {{loop.index }}. 68 | File Path: {{context.meta_data.get('file_path', 'unknown')}} 69 | Content: {{context.text}} 70 | {% endfor %} 71 | 72 | {% endif %} 73 | 74 | {{input_str}} 75 | 76 | """ 77 | 78 | from dataclasses import dataclass, field 79 | 80 | 81 | @dataclass 82 | class RAGAnswer(adal.DataClass): 83 | rationale: str = field(default="", metadata={"desc": "Rationale for the answer."}) 84 | answer: str = field(default="", metadata={"desc": "Answer to the user query."}) 85 | 86 | __output_fields__ = ["rationale", "answer"] 87 | 88 | 89 | class RAG(adal.Component): 90 | __doc__ = """RAG with one repo. 91 | If you want to load a new repo. You need to call prepare_retriever(repo_url_or_path) first.""" 92 | 93 | def __init__(self): 94 | 95 | super().__init__() 96 | 97 | # Initialize embedder, generator, and db_manager 98 | self.memory = Memory() 99 | 100 | self.embedder = adal.Embedder( 101 | model_client=configs["embedder"]["model_client"](), 102 | model_kwargs=configs["embedder"]["model_kwargs"], 103 | ) 104 | 105 | self.initialize_db_manager() 106 | 107 | # Get the appropriate prompt template 108 | data_parser = adal.DataClassParser(data_class=RAGAnswer, return_data_class=True) 109 | 110 | self.generator = adal.Generator( 111 | template=RAG_TEMPLATE, 112 | prompt_kwargs={ 113 | "output_format_str": data_parser.get_output_format_str(), 114 | "conversation_history": self.memory(), 115 | "system_prompt": system_prompt, 116 | "contexts": None, 117 | }, 118 | model_client=configs["generator"]["model_client"](), 119 | model_kwargs=configs["generator"]["model_kwargs"], 120 | output_processors=data_parser, 121 | ) 122 | 123 | def initialize_db_manager(self): 124 | self.db_manager = DatabaseManager() 125 | self.transformed_docs = [] 126 | 127 | def prepare_retriever(self, repo_url_or_path: str): 128 | r"""Run prepare_retriever once for each repo.""" 129 | self.initialize_db_manager() 130 | self.transformed_docs = self.db_manager.prepare_database(repo_url_or_path) 131 | print(f"len(self.transformed_docs): {len(self.transformed_docs)}") 132 | self.retriever = FAISSRetriever( 133 | **configs["retriever"], 134 | embedder=self.embedder, 135 | documents=self.transformed_docs, 136 | document_map_func=lambda doc: doc.vector, 137 | ) 138 | 139 | def call(self, query: str) -> Any: 140 | 141 | retrieved_documents = self.retriever(query) 142 | 143 | # fill in the document 144 | retrieved_documents[0].documents = [ 145 | self.transformed_docs[doc_index] 146 | for doc_index in retrieved_documents[0].doc_indices 147 | ] 148 | 149 | printc(f"retrieved_documents: {retrieved_documents[0].documents}") 150 | printc(f"memory: {self.memory()}") 151 | 152 | prompt_kwargs = { 153 | "input_str": query, 154 | "contexts": retrieved_documents[0].documents, 155 | "conversation_history": self.memory(), 156 | } 157 | response = self.generator( 158 | prompt_kwargs=prompt_kwargs, 159 | ) 160 | 161 | # for debug 162 | prompt_str = self.generator.get_prompt(**prompt_kwargs) 163 | printc(f"prompt_str: {prompt_str}") 164 | 165 | final_response = response.data 166 | 167 | self.memory.add_dialog_turn(user_query=query, assistant_response=final_response) 168 | 169 | return final_response, retrieved_documents 170 | 171 | 172 | if __name__ == "__main__": 173 | from adalflow.utils import get_logger 174 | 175 | adal.setup_env() 176 | # repo_url = "https://github.com/SylphAI-Inc/AdalFlow" 177 | repo_url = "https://github.com/SylphAI-Inc/GithubChat" 178 | rag = RAG() 179 | rag.prepare_retriever(repo_url) 180 | print( 181 | f"RAG component initialized for repo: {repo_url}. Type your query below or type 'exit' to quit." 182 | ) 183 | 184 | while True: 185 | # Get user input 186 | 187 | query = input("Enter your query (or type 'exit' to stop): ") 188 | 189 | # Exit condition 190 | if query.lower() in ["exit", "quit", "stop"]: 191 | print("Exiting RAG component. Goodbye!") 192 | break 193 | 194 | # Process the query 195 | try: 196 | response, retrieved_documents = rag(query) 197 | rag.memory.add_dialog_turn(user_query=query, assistant_response=response) 198 | print(f"\nResponse:\n{response}\n") 199 | print(f"Retrieved Documents:\n{retrieved_documents}\n") 200 | except Exception as e: 201 | print(f"An error occurred while processing the query: {e}") 202 | -------------------------------------------------------------------------------- /frontend/src/components/GitHubChat.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useCallback } from "react"; 2 | import ReactMarkdown from "react-markdown"; 3 | import { Loader2, ChevronDown, ChevronRight, Github } from "lucide-react"; 4 | import { toast } from "react-hot-toast"; 5 | 6 | interface DocumentMetadata { 7 | file_path: string; 8 | type: string; 9 | is_code: boolean; 10 | is_implementation: boolean; 11 | title: string; 12 | } 13 | 14 | interface Document { 15 | text: string; 16 | meta_data: DocumentMetadata; 17 | } 18 | 19 | interface QueryResponse { 20 | rationale: string; 21 | answer: string; 22 | contexts: Document[]; 23 | } 24 | 25 | const GitHubChat: React.FC = () => { 26 | const [repoUrl, setRepoUrl] = useState(""); 27 | const [query, setQuery] = useState(""); 28 | const [isProcessing, setIsProcessing] = useState(false); 29 | const [response, setResponse] = useState(null); 30 | const [expandedContexts, setExpandedContexts] = useState<{ 31 | [key: number]: boolean; 32 | }>({}); 33 | 34 | const analyzeRepo = useCallback(async () => { 35 | if (!repoUrl.trim() || !query.trim()) return; 36 | 37 | setIsProcessing(true); 38 | try { 39 | const response = await fetch("http://localhost:8000/query", { 40 | method: "POST", 41 | headers: { 42 | "Content-Type": "application/json", 43 | }, 44 | body: JSON.stringify({ 45 | repo_url: repoUrl, 46 | query: query, 47 | }), 48 | }); 49 | 50 | if (!response.ok) { 51 | throw new Error("Failed to analyze repository"); 52 | } 53 | 54 | const result = await response.json(); 55 | setResponse(result); 56 | toast.success("Analysis complete!"); 57 | } catch (error) { 58 | console.error("Error:", error); 59 | const errorMessage = 60 | error instanceof Error ? error.message : "Failed to analyze repository"; 61 | toast.error(errorMessage); 62 | } finally { 63 | setIsProcessing(false); 64 | } 65 | }, [repoUrl, query]); 66 | 67 | const toggleContext = (index: number) => { 68 | setExpandedContexts((prev) => ({ 69 | ...prev, 70 | [index]: !prev[index], 71 | })); 72 | }; 73 | 74 | return ( 75 |
76 |
77 |
78 |
79 | 80 |

GitHubChat

81 |
82 |

Chat with any Github Repo!

83 |
84 | 85 |
86 |
87 |
88 | 94 |
95 | setRepoUrl(e.target.value)} 100 | className="block w-full rounded-md border-gray-300 shadow-sm focus:border-black focus:ring-black sm:text-sm px-4 py-2 border" 101 | placeholder="https://github.com/username/repository" 102 | /> 103 |
104 |
105 | 106 |
107 | 113 |
114 |