├── .github └── workflows │ └── ruff-action.yml ├── .gitignore ├── 1.Streaming - Memory - Sources ├── .chainlit │ ├── config.toml │ ├── configOld.toml │ └── translations │ │ ├── de.json │ │ ├── en-US.json │ │ └── pt-BR.json ├── .env.example ├── README.md ├── chainlit.md ├── images │ └── RAG.gif ├── ingest.py ├── main.py └── requirements.txt ├── 2.Pinecone - HybridRetriever - Adv.Ingestion ├── .chainlit │ ├── config.toml │ └── translations │ │ ├── en-US.json │ │ └── pt-BR.json ├── .env.example ├── README.md ├── chainlit.md ├── ingest.py ├── main.py └── requirements.txt ├── 3.Reranker - Q.Transformation - Res.Synthesis ├── .chainlit │ ├── config.toml │ └── translations │ │ ├── en-US.json │ │ └── pt-BR.json ├── .env.example ├── README.md ├── chainlit.md ├── ingest.py ├── main.py └── requirements.txt ├── 4.Evaluation - Generation - Optimization ├── .chainlit │ ├── config.toml │ └── translations │ │ ├── en-US.json │ │ └── pt-BR.json ├── .env.example ├── README.md ├── _evaluations.json ├── benchmark.csv ├── chainlit.md ├── evaluation.py ├── generation.py ├── ingest.py ├── main.py ├── optimization.py ├── rag_evaluator_pack │ ├── README.md │ ├── llama_index │ │ └── packs │ │ │ └── rag_evaluator │ │ │ ├── BUILD │ │ │ ├── __init__.py │ │ │ └── base.py │ └── pyproject.toml └── requirements.txt ├── 5.Intent Detection Agent ├── .chainlit │ ├── config.toml │ └── translations │ │ ├── en-US.json │ │ └── pt-BR.json ├── .env.example ├── README.md ├── chainlit.md ├── images │ └── RAGSources.png ├── ingest.py ├── main.py └── requirements.txt ├── LICENSE.md ├── README.md └── ruff.toml /.github/workflows/ruff-action.yml: -------------------------------------------------------------------------------- 1 | name: Ruff 2 | on: [ push, pull_request ] 3 | jobs: 4 | ruff: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v4 8 | - uses: chartboost/ruff-action@v1 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.env 2 | *requirements_plain.txt 3 | *__pycache__ 4 | **/storage/ 5 | *.gitattributes 6 | **/data/* 7 | !**/data/ 8 | *.files 9 | 6.* 10 | **/output/* 11 | !**/output/ 12 | *questions.json 13 | *_evaluations.json 14 | *benchmark.csv -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/.chainlit/config.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Whether to enable telemetry (default: true). No personal data is collected. 3 | enable_telemetry = true 4 | 5 | 6 | # List of environment variables to be provided by each user to use the app. 7 | user_env = [] 8 | 9 | # Duration (in seconds) during which the session is saved when the connection is lost 10 | session_timeout = 3600 11 | 12 | # Enable third parties caching (e.g LangChain cache) 13 | cache = false 14 | 15 | # Authorized origins 16 | allow_origins = ["*"] 17 | 18 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) 19 | # follow_symlink = false 20 | 21 | [features] 22 | # Show the prompt playground 23 | prompt_playground = true 24 | 25 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript) 26 | unsafe_allow_html = false 27 | 28 | # Process and display mathematical expressions. This can clash with "$" characters in messages. 29 | latex = false 30 | 31 | # Authorize users to upload files with messages 32 | [features.multi_modal] 33 | enabled = false 34 | accept = ["*/*"] 35 | max_files = 20 36 | max_size_mb = 500 37 | 38 | # Allows user to use speech to text 39 | [features.speech_to_text] 40 | enabled = false 41 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string 42 | # language = "en-US" 43 | 44 | [UI] 45 | # Name of the app and chatbot. 46 | name = "Chatbot" 47 | 48 | # Show the readme while the thread is empty. 49 | show_readme_as_default = false 50 | 51 | # Description of the app and chatbot. This is used for HTML tags. 52 | # description = "" 53 | 54 | # Large size content are by default collapsed for a cleaner ui 55 | default_collapse_content = true 56 | 57 | # The default value for the expand messages settings. 58 | default_expand_messages = false 59 | 60 | # Hide the chain of thought details from the user in the UI. 61 | hide_cot = false 62 | 63 | # Link to your github repo. This will add a github button in the UI's header. 64 | github = "https://github.com/felipearosr/GPT-Documents" 65 | 66 | # Specify a CSS file that can be used to customize the user interface. 67 | # The CSS file can be served from the public directory or via an external link. 68 | # custom_css = "/public/test.css" 69 | 70 | # Specify a Javascript file that can be used to customize the user interface. 71 | # The Javascript file can be served from the public directory. 72 | # custom_js = "/public/test.js" 73 | 74 | # Specify a custom font url. 75 | # custom_font = "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap" 76 | 77 | # Specify a custom build directory for the frontend. 78 | # This can be used to customize the frontend code. 79 | # Be careful: If this is a relative path, it should not start with a slash. 80 | # custom_build = "./public/build" 81 | 82 | # Override default MUI light theme. (Check theme.ts) 83 | [UI.theme] 84 | #font_family = "Inter, sans-serif" 85 | [UI.theme.light] 86 | #background = "#FAFAFA" 87 | #paper = "#FFFFFF" 88 | 89 | [UI.theme.light.primary] 90 | #main = "#F80061" 91 | #dark = "#980039" 92 | #light = "#FFE7EB" 93 | 94 | # Override default MUI dark theme. (Check theme.ts) 95 | [UI.theme.dark] 96 | #background = "#000212" 97 | #paper = "#00031E" 98 | 99 | [UI.theme.dark.primary] 100 | #main = "#FFFFFF" #"#F80061" 101 | #dark = "#FFFFFF" #"#980039" 102 | #light = "#FFFFFF" #"#00031E" 103 | 104 | 105 | [meta] 106 | generated_by = "1.0.500" 107 | -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/.chainlit/configOld.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Whether to enable telemetry (default: true). No personal data is collected. 3 | enable_telemetry = true 4 | 5 | # List of environment variables to be provided by each user to use the app. 6 | user_env = [] 7 | 8 | # Duration (in seconds) during which the session is saved when the connection is lost 9 | session_timeout = 3600 10 | 11 | # Enable third parties caching (e.g LangChain cache) 12 | cache = false 13 | 14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) 15 | # follow_symlink = false 16 | 17 | [features] 18 | # Show the prompt playground 19 | prompt_playground = true 20 | 21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript) 22 | unsafe_allow_html = false 23 | 24 | # Process and display mathematical expressions. This can clash with "$" characters in messages. 25 | latex = false 26 | 27 | # Authorize users to upload files with messages 28 | multi_modal = false 29 | 30 | # Allows user to use speech to text 31 | [features.speech_to_text] 32 | enabled = false 33 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string 34 | # language = "en-US" 35 | 36 | [UI] 37 | # Name of the app and chatbot. 38 | name = "Chatbot" 39 | 40 | # Show the readme while the thread is empty. 41 | show_readme_as_default = false 42 | 43 | # Description of the app and chatbot. This is used for HTML tags. 44 | # description = "" 45 | 46 | # Large size content are by default collapsed for a cleaner ui 47 | default_collapse_content = true 48 | 49 | # The default value for the expand messages settings. 50 | default_expand_messages = false 51 | 52 | # Hide the chain of thought details from the user in the UI. 53 | hide_cot = false 54 | 55 | # Link to your github repo. This will add a github button in the UI's header. 56 | github = "https://github.com/felipearosr/GPT-Documents" 57 | 58 | # Specify a CSS file that can be used to customize the user interface. 59 | # The CSS file can be served from the public directory or via an external link. 60 | # custom_css = "/public/test.css" 61 | 62 | # Override default MUI light theme. (Check theme.ts) 63 | [UI.theme.light] 64 | #background = "#FAFAFA" 65 | #paper = "#FFFFFF" 66 | 67 | [UI.theme.light.primary] 68 | #main = "#F80061" 69 | #dark = "#980039" 70 | #light = "#FFE7EB" 71 | 72 | # Override default MUI dark theme. (Check theme.ts) 73 | [UI.theme.dark] 74 | #background = "#FAFAFA" 75 | #paper = "#FFFFFF" 76 | 77 | [UI.theme.dark.primary] 78 | #main = "#F80061" 79 | #dark = "#980039" 80 | #light = "#FFE7EB" 81 | 82 | 83 | [meta] 84 | generated_by = "1.0.101" 85 | -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/.chainlit/translations/de.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Einstellungen", 8 | "settingsKey": "S", 9 | "APIKeys": "API-Schl\u00fcssel", 10 | "logout": "Abmelden" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "Neuer Chat" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Aufgabenliste", 22 | "loading": "L\u00e4dt...", 23 | "error": "Ein Fehler ist aufgetreten" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Upload abbrechen", 28 | "removeAttachment": "Anhang entfernen" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Neuen Chat erstellen?", 32 | "clearChat": "Dies wird die aktuellen Nachrichten l\u00f6schen und einen neuen Chat starten.", 33 | "cancel": "Abbrechen", 34 | "confirm": "Best\u00e4tigen" 35 | }, 36 | "settingsModal": { 37 | "settings": "Einstellungen", 38 | "expandMessages": "Nachrichten ausklappen", 39 | "hideChainOfThought": "Zwischenschritte verbergen", 40 | "darkMode": "Dunkelmodus" 41 | } 42 | }, 43 | "organisms": { 44 | "chat": { 45 | "history": { 46 | "index": { 47 | "showHistory": "Zeige Chatverlauf", 48 | "lastInputs": "Letzte Eingaben", 49 | "noInputs": "Leer...", 50 | "loading": "L\u00e4dt..." 51 | } 52 | }, 53 | "inputBox": { 54 | "input": { 55 | "placeholder": "Nachricht eingeben..." 56 | }, 57 | "speechButton": { 58 | "start": "Aufnahme starten", 59 | "stop": "Aufnahme stoppen" 60 | }, 61 | "SubmitButton": { 62 | "sendMessage": "Nachricht senden", 63 | "stopTask": "Aufgabe stoppen" 64 | }, 65 | "UploadButton": { 66 | "attachFiles": "Dateien anh\u00e4ngen" 67 | }, 68 | "waterMark": { 69 | "text": "Erstellt mit" 70 | } 71 | }, 72 | "Messages": { 73 | "index": { 74 | "running": "L\u00e4uft", 75 | "executedSuccessfully": "erfolgreich ausgef\u00fchrt", 76 | "failed": "fehlgeschlagen", 77 | "feedbackUpdated": "Feedback aktualisiert", 78 | "updating": "Aktualisiert" 79 | } 80 | }, 81 | "dropScreen": { 82 | "dropYourFilesHere": "Ziehe deine Dateien hierher" 83 | }, 84 | "index": { 85 | "failedToUpload": "Upload fehlgeschlagen", 86 | "cancelledUploadOf": "Upload abgebrochen von", 87 | "couldNotReachServer": "Konnte den Server nicht erreichen", 88 | "continuingChat": "Vorherigen Chat fortsetzen" 89 | }, 90 | "settings": { 91 | "settingsPanel": "Einstellungsfenster", 92 | "reset": "Zur\u00fccksetzen", 93 | "cancel": "Abbrechen", 94 | "confirm": "Best\u00e4tigen" 95 | } 96 | }, 97 | "threadHistory": { 98 | "sidebar": { 99 | "filters": { 100 | "FeedbackSelect": { 101 | "feedbackAll": "Feedback: Alle", 102 | "feedbackPositive": "Feedback: Positiv", 103 | "feedbackNegative": "Feedback: Negativ" 104 | }, 105 | "SearchBar": { 106 | "search": "Suche" 107 | } 108 | }, 109 | "DeleteThreadButton": { 110 | "confirmMessage": "Dies wird den Thread sowie seine Nachrichten und Elemente l\u00f6schen.", 111 | "cancel": "Abbrechen", 112 | "confirm": "Best\u00e4tigen", 113 | "deletingChat": "Chat wird gel\u00f6scht", 114 | "chatDeleted": "Chat gel\u00f6scht" 115 | }, 116 | "index": { 117 | "pastChats": "Vergangene Chats" 118 | }, 119 | "ThreadList": { 120 | "empty": "Leer...", 121 | "today": "Heute", 122 | "yesterday": "Gestern", 123 | "previous7days": "Vor 7 Tagen", 124 | "previous30days": "Vor 30 Tagen" 125 | }, 126 | "TriggerButton": { 127 | "closeSidebar": "Seitenleiste schlie\u00dfen", 128 | "openSidebar": "Seitenleiste \u00f6ffnen" 129 | } 130 | }, 131 | "Thread": { 132 | "backToChat": "Zur\u00fcck zum Chat", 133 | "chatCreatedOn": "Dieser Chat wurde erstellt am" 134 | } 135 | }, 136 | "header": { 137 | "chat": "Chat", 138 | "readme": "Liesmich" 139 | } 140 | } 141 | }, 142 | "hooks": { 143 | "useLLMProviders": { 144 | "failedToFetchProviders": "Anbieter konnten nicht geladen werden:" 145 | } 146 | }, 147 | "pages": { 148 | "Design": {}, 149 | "Env": { 150 | "savedSuccessfully": "Erfolgreich gespeichert", 151 | "requiredApiKeys": "Ben\u00f6tigte API-Schl\u00fcssel", 152 | "requiredApiKeysInfo": "Um diese App zu nutzen, werden die folgenden API-Schl\u00fcssel ben\u00f6tigt. Die Schl\u00fcssel werden im lokalen Speicher Ihres Ger\u00e4ts gespeichert." 153 | }, 154 | "Page": { 155 | "notPartOfProject": "Sie sind nicht Teil dieses Projekts." 156 | }, 157 | "ResumeButton": { 158 | "resumeChat": "Chat fortsetzen" 159 | } 160 | } 161 | } -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/.chainlit/translations/en-US.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Settings", 8 | "settingsKey": "S", 9 | "APIKeys": "API Keys", 10 | "logout": "Logout" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "New Chat" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Task List", 22 | "loading": "Loading...", 23 | "error": "An error occured" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancel upload", 28 | "removeAttachment": "Remove attachment" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Create new chat?", 32 | "clearChat": "This will clear the current messages and start a new chat.", 33 | "cancel": "Cancel", 34 | "confirm": "Confirm" 35 | }, 36 | "settingsModal": { 37 | "settings": "Settings", 38 | "expandMessages": "Expand Messages", 39 | "hideChainOfThought": "Hide Chain of Thought", 40 | "darkMode": "Dark Mode" 41 | }, 42 | "detailsButton": { 43 | "using": "Using", 44 | "running": "Running", 45 | "took_one": "Took {{count}} step", 46 | "took_other": "Took {{count}} steps" 47 | }, 48 | "auth": { 49 | "authLogin": { 50 | "title": "Login to access the app.", 51 | "form": { 52 | "email": "Email address", 53 | "password": "Password", 54 | "noAccount": "Don't have an account?", 55 | "alreadyHaveAccount": "Already have an account?", 56 | "signup": "Sign Up", 57 | "signin": "Sign In", 58 | "or": "OR", 59 | "continue": "Continue", 60 | "forgotPassword": "Forgot password?", 61 | "passwordMustContain": "Your password must contain:", 62 | "emailRequired": "email is a required field", 63 | "passwordRequired": "password is a required field" 64 | }, 65 | "error": { 66 | "default": "Unable to sign in.", 67 | "signin": "Try signing in with a different account.", 68 | "oauthsignin": "Try signing in with a different account.", 69 | "redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.", 70 | "oauthcallbackerror": "Try signing in with a different account.", 71 | "oauthcreateaccount": "Try signing in with a different account.", 72 | "emailcreateaccount": "Try signing in with a different account.", 73 | "callback": "Try signing in with a different account.", 74 | "oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.", 75 | "emailsignin": "The e-mail could not be sent.", 76 | "emailverify": "Please verify your email, a new email has been sent.", 77 | "credentialssignin": "Sign in failed. Check the details you provided are correct.", 78 | "sessionrequired": "Please sign in to access this page." 79 | } 80 | }, 81 | "authVerifyEmail": { 82 | "almostThere": "You're almost there! We've sent an email to ", 83 | "verifyEmailLink": "Please click on the link in that email to complete your signup.", 84 | "didNotReceive": "Can't find the email?", 85 | "resendEmail": "Resend email", 86 | "goBack": "Go Back", 87 | "emailSent": "Email sent successfully.", 88 | "verifyEmail": "Verify your email address" 89 | }, 90 | "providerButton": { 91 | "continue": "Continue with {{provider}}", 92 | "signup": "Sign up with {{provider}}" 93 | }, 94 | "authResetPassword": { 95 | "newPasswordRequired": "New password is a required field", 96 | "passwordsMustMatch": "Passwords must match", 97 | "confirmPasswordRequired": "Confirm password is a required field", 98 | "newPassword": "New password", 99 | "confirmPassword": "Confirm password", 100 | "resetPassword": "Reset Password" 101 | }, 102 | "authForgotPassword": { 103 | "email": "Email address", 104 | "emailRequired": "email is a required field", 105 | "emailSent": "Please check the email address {{email}} for instructions to reset your password.", 106 | "enterEmail": "Enter your email address and we will send you instructions to reset your password.", 107 | "resendEmail": "Resend email", 108 | "continue": "Continue", 109 | "goBack": "Go Back" 110 | } 111 | } 112 | }, 113 | "organisms": { 114 | "chat": { 115 | "history": { 116 | "index": { 117 | "showHistory": "Show history", 118 | "lastInputs": "Last Inputs", 119 | "noInputs": "Such empty...", 120 | "loading": "Loading..." 121 | } 122 | }, 123 | "inputBox": { 124 | "input": { 125 | "placeholder": "Type your message here..." 126 | }, 127 | "speechButton": { 128 | "start": "Start recording", 129 | "stop": "Stop recording" 130 | }, 131 | "SubmitButton": { 132 | "sendMessage": "Send message", 133 | "stopTask": "Stop Task" 134 | }, 135 | "UploadButton": { 136 | "attachFiles": "Attach files" 137 | }, 138 | "waterMark": { 139 | "text": "Built with" 140 | } 141 | }, 142 | "Messages": { 143 | "index": { 144 | "running": "Running", 145 | "executedSuccessfully": "executed successfully", 146 | "failed": "failed", 147 | "feedbackUpdated": "Feedback updated", 148 | "updating": "Updating" 149 | } 150 | }, 151 | "dropScreen": { 152 | "dropYourFilesHere": "Drop your files here" 153 | }, 154 | "index": { 155 | "failedToUpload": "Failed to upload", 156 | "cancelledUploadOf": "Cancelled upload of", 157 | "couldNotReachServer": "Could not reach the server", 158 | "continuingChat": "Continuing previous chat" 159 | }, 160 | "settings": { 161 | "settingsPanel": "Settings panel", 162 | "reset": "Reset", 163 | "cancel": "Cancel", 164 | "confirm": "Confirm" 165 | } 166 | }, 167 | "threadHistory": { 168 | "sidebar": { 169 | "filters": { 170 | "FeedbackSelect": { 171 | "feedbackAll": "Feedback: All", 172 | "feedbackPositive": "Feedback: Positive", 173 | "feedbackNegative": "Feedback: Negative" 174 | }, 175 | "SearchBar": { 176 | "search": "Search" 177 | } 178 | }, 179 | "DeleteThreadButton": { 180 | "confirmMessage": "This will delete the thread as well as it's messages and elements.", 181 | "cancel": "Cancel", 182 | "confirm": "Confirm", 183 | "deletingChat": "Deleting chat", 184 | "chatDeleted": "Chat deleted" 185 | }, 186 | "index": { 187 | "pastChats": "Past Chats" 188 | }, 189 | "ThreadList": { 190 | "empty": "Empty...", 191 | "today": "Today", 192 | "yesterday": "Yesterday", 193 | "previous7days": "Previous 7 days", 194 | "previous30days": "Previous 30 days" 195 | }, 196 | "TriggerButton": { 197 | "closeSidebar": "Close sidebar", 198 | "openSidebar": "Open sidebar" 199 | } 200 | }, 201 | "Thread": { 202 | "backToChat": "Go back to chat", 203 | "chatCreatedOn": "This chat was created on" 204 | } 205 | }, 206 | "header": { 207 | "chat": "Chat", 208 | "readme": "Readme" 209 | } 210 | } 211 | }, 212 | "hooks": { 213 | "useLLMProviders": { 214 | "failedToFetchProviders": "Failed to fetch providers:" 215 | } 216 | }, 217 | "pages": { 218 | "Design": {}, 219 | "Env": { 220 | "savedSuccessfully": "Saved successfully", 221 | "requiredApiKeys": "Required API Keys", 222 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage." 223 | }, 224 | "Page": { 225 | "notPartOfProject": "You are not part of this project." 226 | }, 227 | "ResumeButton": { 228 | "resumeChat": "Resume Chat" 229 | } 230 | } 231 | } -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/.chainlit/translations/pt-BR.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Configura\u00e7\u00f5es", 8 | "settingsKey": "S", 9 | "APIKeys": "Chaves de API", 10 | "logout": "Sair" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "Nova Conversa" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas", 22 | "loading": "Carregando...", 23 | "error": "Ocorreu um erro" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancelar envio", 28 | "removeAttachment": "Remover anexo" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Criar novo chat?", 32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.", 33 | "cancel": "Cancelar", 34 | "confirm": "Confirmar" 35 | }, 36 | "settingsModal": { 37 | "settings": "Configura\u00e7\u00f5es", 38 | "expandMessages": "Expandir Mensagens", 39 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento", 40 | "darkMode": "Modo Escuro" 41 | } 42 | }, 43 | "organisms": { 44 | "chat": { 45 | "history": { 46 | "index": { 47 | "showHistory": "Mostrar hist\u00f3rico", 48 | "lastInputs": "\u00daltimas Entradas", 49 | "noInputs": "Vazio...", 50 | "loading": "Carregando..." 51 | } 52 | }, 53 | "inputBox": { 54 | "input": { 55 | "placeholder": "Digite sua mensagem aqui..." 56 | }, 57 | "speechButton": { 58 | "start": "Iniciar grava\u00e7\u00e3o", 59 | "stop": "Parar grava\u00e7\u00e3o" 60 | }, 61 | "SubmitButton": { 62 | "sendMessage": "Enviar mensagem", 63 | "stopTask": "Parar Tarefa" 64 | }, 65 | "UploadButton": { 66 | "attachFiles": "Anexar arquivos" 67 | }, 68 | "waterMark": { 69 | "text": "Constru\u00eddo com" 70 | } 71 | }, 72 | "Messages": { 73 | "index": { 74 | "running": "Executando", 75 | "executedSuccessfully": "executado com sucesso", 76 | "failed": "falhou", 77 | "feedbackUpdated": "Feedback atualizado", 78 | "updating": "Atualizando" 79 | } 80 | }, 81 | "dropScreen": { 82 | "dropYourFilesHere": "Solte seus arquivos aqui" 83 | }, 84 | "index": { 85 | "failedToUpload": "Falha ao enviar", 86 | "cancelledUploadOf": "Envio cancelado de", 87 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor", 88 | "continuingChat": "Continuando o chat anterior" 89 | }, 90 | "settings": { 91 | "settingsPanel": "Painel de Configura\u00e7\u00f5es", 92 | "reset": "Redefinir", 93 | "cancel": "Cancelar", 94 | "confirm": "Confirmar" 95 | } 96 | }, 97 | "threadHistory": { 98 | "sidebar": { 99 | "filters": { 100 | "FeedbackSelect": { 101 | "feedbackAll": "Feedback: Todos", 102 | "feedbackPositive": "Feedback: Positivo", 103 | "feedbackNegative": "Feedback: Negativo" 104 | }, 105 | "SearchBar": { 106 | "search": "Buscar" 107 | } 108 | }, 109 | "DeleteThreadButton": { 110 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.", 111 | "cancel": "Cancelar", 112 | "confirm": "Confirmar", 113 | "deletingChat": "Deletando conversa", 114 | "chatDeleted": "Conversa deletada" 115 | }, 116 | "index": { 117 | "pastChats": "Conversas Anteriores" 118 | }, 119 | "ThreadList": { 120 | "empty": "Vazio..." 121 | }, 122 | "TriggerButton": { 123 | "closeSidebar": "Fechar barra lateral", 124 | "openSidebar": "Abrir barra lateral" 125 | } 126 | }, 127 | "Thread": { 128 | "backToChat": "Voltar para a conversa", 129 | "chatCreatedOn": "Esta conversa foi criada em" 130 | } 131 | }, 132 | "header": { 133 | "chat": "Conversa", 134 | "readme": "Leia-me" 135 | } 136 | }, 137 | "hooks": { 138 | "useLLMProviders": { 139 | "failedToFetchProviders": "Falha ao buscar provedores:" 140 | } 141 | }, 142 | "pages": { 143 | "Design": {}, 144 | "Env": { 145 | "savedSuccessfully": "Salvo com sucesso", 146 | "requiredApiKeys": "Chaves de API necess\u00e1rias", 147 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo." 148 | }, 149 | "Page": { 150 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto." 151 | }, 152 | "ResumeButton": { 153 | "resumeChat": "Continuar Conversa" 154 | } 155 | } 156 | } 157 | } -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="sk-..." -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/README.md: -------------------------------------------------------------------------------- 1 | # ChatBot with streaming, memory and sources 2 | 3 | Embarking on the creation of an advanced Retrieval-Augmented Generation (RAG) system marks a significant first step towards innovative chatbot development. This foundational version incorporates three critical features: 4 | 5 | - **Streaming:** Enhance user experience with fast, real-time answers as the chatbot generates responses on-the-fly, reducing wait times. 6 | - **Memory:** Facilitate natural, conversational interactions by enabling the chatbot to recall previous parts of the conversation, adding context and relevance to the dialogue. 7 | - **Sources:** Increase transparency and trust by clearly indicating the origin of the chatbot's answers, allowing users to understand where the information is coming from. 8 | 9 | These functionalities are powered by technologies like the Llama-index and Chainlit, setting the stage for a more intuitive, responsive, and informed chatbot experience. 10 | 11 | ![](https://github.com/felipearosr/GPT-Documents/blob/main/1.Streaming%20-%20Memory%20-%20Sources/images/RAG.gif) 12 | 13 | 14 | ## Table of Contents 15 | 16 | 1. [Installation](#installation") 17 | 2. [Usage](#usage) 18 | 3. [Streaming](#streaming) 19 | 4. [Memory](#memory) 20 | 5. [Sources](#sources) 21 | 6. [Improvements](#improvements) 22 | 23 | 24 | ## Installation 25 | 26 | Follow these steps to set up the GPT Documents chatbot on your local machine: 27 | 28 | 1. Create a conda environment: 29 | 30 | ```shell 31 | conda create -n rag python==3.11 -y && source activate rag 32 | ``` 33 | 34 | 2. Install the required dependencies: 35 | 36 | ```shell 37 | pip install -r requirements.txt 38 | ``` 39 | 40 | 3. Load your documents into the vector store by: 41 | - Create a folder named `data`. 42 | - Place your documents inside the `data` folder. 43 | - Execute the `ingest.py` script to initiate the loading process. 44 | 45 | ## Usage 46 | 47 | Once the setup is complete, launch the chainlit app using the following command: 48 | 49 | ```shell 50 | chainlit run main.py 51 | ``` 52 | 53 | ## Streaming 54 | 55 | ### Understanding Streaming in LLMs 56 | 57 | Streaming is a feature that enables real-time delivery of responses from the language learning model (LLM) as they are being generated. This process significantly reduces response latency by allowing immediate display of each part of the answer, token by token, as it is streamed from the LLM. This means users do not have to wait for the entire response to be composed and sent before beginning to read the answer, facilitating a smoother and faster interaction. 58 | 59 | 60 | ### How do we implement it? 61 | 62 | ```python 63 | @cl.on_chat_start 64 | async def start(): 65 | # we simply add `streaming=True` to the OpenAI settings 66 | Settings.llm = OpenAI( 67 | model="gpt-3.5-turbo", temperature=0.1, max_tokens=1024, streaming=True 68 | ) 69 | # ... 70 | ``` 71 | 72 | ```python 73 | @cl.on_message 74 | async def main(message: cl.Message): 75 | # ... 76 | # we need to make the response an async function 77 | response = await cl.make_async(query_engine.query)(prompt_template) 78 | 79 | # now we stream the tokens into chainlit 80 | for token in response.response_gen: 81 | await response_message.stream_token(token) 82 | if response.response_txt: 83 | response_message.content = response.response_txt 84 | await response_message.send() 85 | # ... 86 | ``` 87 | 88 | 89 | 90 | ## Memory 91 | 92 | ### Exploring Memory in LLMs 93 | 94 | Memory in llms is a feature we integrate to enhance their ability to maintain and recall the history of interactions with users. This functionality enriches the conversational experience by allowing the model to reference previous exchanges and build on them, creating a more coherent and contextually relevant dialogue. 95 | 96 | ### How do we implement it? 97 | ```python 98 | @cl.on_chat_start 99 | async def start(): 100 | # ... 101 | # create an empty list to store the message history 102 | message_history = [] 103 | # set message_history to user_session 104 | cl.user_session.set("message_history", message_history) 105 | # ... 106 | ``` 107 | 108 | ```python 109 | @cl.on_message 110 | async def main(message: cl.Message): 111 | # get message_history from user_session 112 | message_history = cl.user_session.get("message_history") 113 | prompt_template = "Previous messages:\n" 114 | # ... 115 | user_message = message.content 116 | 117 | # fills prompt with the messages 118 | for message in message_history: 119 | prompt_template += f"{message['author']}: {message['content']}\n" 120 | prompt_template += f"Human: {user_message}" 121 | # ... 122 | # we add both the user_message and the response_message to the message_history 123 | message_history.append({"author": "Human", "content": user_message}) 124 | message_history.append({"author": "AI", "content": response_message.content}) 125 | # limits the memory to only the last 2 queries and responses 126 | message_history = message_history[-4:] 127 | # finally we set the filled message_history into the user_session 128 | cl.user_session.set("message_history", message_history) 129 | # ... 130 | ``` 131 | 132 | ## Sources 133 | 134 | ### What are Sources? 135 | 136 | Sources refer to the documents or materials returned by the retrieval system, which provide the foundation for the answers to your queries. They offer a transparent way to verify the origin of the information used by the language model to generate its responses. 137 | 138 | ### How do we implement it? 139 | 140 | This is a basic implementation of sources, you can also separate them by file types, using the metadata of the source_nodes. 141 | 142 | ```python 143 | async def set_sources(response, response_message): 144 | label_list = [] 145 | count = 1 146 | 147 | # we run through all the source_nodes of the response 148 | for sr in response.source_nodes: 149 | elements = [ 150 | # we put this sources into a chainlit element, in this case Text, 151 | # it can also be PDF or other elements available in chainlit. 152 | cl.Text( 153 | name="S" + str(count), 154 | content=f"{sr.node.text}", 155 | display="side", 156 | size="small", 157 | ) 158 | ] 159 | response_message.elements = elements 160 | label_list.append("S" + str(count)) 161 | await response_message.update() 162 | count += 1 163 | response_message.content += "\n\nSources: " + ", ".join(label_list) 164 | # we update the response_message so that this sources are displayed in chainlit 165 | await response_message.update() 166 | ``` 167 | 168 | ## Improvements 169 | 170 | Adding the callback manager from chainlit. Right now is broken since the llama-index v0.10 update. -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/chainlit.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/1.Streaming - Memory - Sources/chainlit.md -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/images/RAG.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/1.Streaming - Memory - Sources/images/RAG.gif -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/ingest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | openai.api_key = os.environ.get("OPENAI_API_KEY") 8 | 9 | documents = SimpleDirectoryReader("./data").load_data(show_progress=True) 10 | index = VectorStoreIndex.from_documents(documents) 11 | index.storage_context.persist() 12 | -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import chainlit as cl 4 | 5 | from llama_index.core import Settings, load_index_from_storage, StorageContext 6 | from llama_index.llms.openai import OpenAI 7 | from llama_index.core.callbacks import CallbackManager 8 | from llama_index.embeddings.openai import OpenAIEmbedding 9 | from llama_index.core.service_context import ServiceContext 10 | 11 | openai.api_key = os.environ.get("OPENAI_API_KEY") 12 | 13 | 14 | @cl.cache 15 | def load_context(): 16 | storage_context = StorageContext.from_defaults( 17 | persist_dir="./storage", 18 | ) 19 | index = load_index_from_storage(storage_context) 20 | return index 21 | 22 | 23 | @cl.on_chat_start 24 | async def start(): 25 | index = load_context() 26 | 27 | Settings.llm = OpenAI( 28 | model="gpt-3.5-turbo", temperature=0.1, max_tokens=1024, streaming=True 29 | ) 30 | Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") 31 | Settings.context_window = 4096 32 | Settings.callback_manager = CallbackManager([cl.LlamaIndexCallbackHandler()]) 33 | 34 | service_context = ServiceContext.from_defaults() 35 | query_engine = index.as_query_engine( 36 | streaming=True, similarity_top_k=2, service_context=service_context 37 | ) 38 | cl.user_session.set("query_engine", query_engine) 39 | 40 | message_history = [] 41 | cl.user_session.set("message_history", message_history) 42 | 43 | await cl.Message( 44 | author="Assistant", content="Hello! Im an AI assistant. How may I help you?" 45 | ).send() 46 | 47 | 48 | async def set_sources(response, response_message): 49 | label_list = [] 50 | count = 1 51 | for sr in response.source_nodes: 52 | elements = [ 53 | cl.Text( 54 | name="S" + str(count), 55 | content=f"{sr.node.text}", 56 | display="side", 57 | size="small", 58 | ) 59 | ] 60 | response_message.elements = elements 61 | label_list.append("S" + str(count)) 62 | await response_message.update() 63 | count += 1 64 | response_message.content += "\n\nSources: " + ", ".join(label_list) 65 | await response_message.update() 66 | 67 | 68 | @cl.on_message 69 | async def main(message: cl.Message): 70 | query_engine = cl.user_session.get("query_engine") 71 | message_history = cl.user_session.get("message_history") 72 | prompt_template = "Previous messages:\n" 73 | 74 | response_message = cl.Message(content="", author="Assistant") 75 | 76 | user_message = message.content 77 | 78 | for message in message_history: 79 | prompt_template += f"{message['author']}: {message['content']}\n" 80 | prompt_template += f"Human: {user_message}" 81 | 82 | response = await cl.make_async(query_engine.query)(prompt_template) 83 | 84 | for token in response.response_gen: 85 | await response_message.stream_token(token) 86 | if response.response_txt: 87 | response_message.content = response.response_txt 88 | await response_message.send() 89 | 90 | message_history.append({"author": "Human", "content": user_message}) 91 | message_history.append({"author": "AI", "content": response_message.content}) 92 | message_history = message_history[-4:] 93 | cl.user_session.set("message_history", message_history) 94 | 95 | if response.source_nodes: 96 | await set_sources(response, response_message) 97 | -------------------------------------------------------------------------------- /1.Streaming - Memory - Sources/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | aiohttp==3.9.3 3 | aiosignal==1.3.1 4 | annotated-types==0.6.0 5 | anyio==3.7.1 6 | asyncer==0.0.2 7 | attrs==23.2.0 8 | beautifulsoup4==4.12.3 9 | bidict==0.23.1 10 | certifi==2024.2.2 11 | chainlit==1.0.500 12 | charset-normalizer==3.3.2 13 | chevron==0.14.0 14 | click==8.1.7 15 | dataclasses-json==0.5.14 16 | Deprecated==1.2.14 17 | dirtyjson==1.0.8 18 | distro==1.9.0 19 | fastapi==0.108.0 20 | fastapi-socketio==0.0.10 21 | filetype==1.2.0 22 | frozenlist==1.4.1 23 | fsspec==2024.3.1 24 | googleapis-common-protos==1.63.0 25 | greenlet==3.0.3 26 | grpcio==1.62.1 27 | h11==0.14.0 28 | httpcore==1.0.5 29 | httpx==0.27.0 30 | idna==3.6 31 | importlib-metadata==7.0.0 32 | joblib==1.3.2 33 | Lazify==0.4.0 34 | literalai==0.0.401 35 | llama-index==0.10.26 36 | llama-index-agent-openai==0.2.1 37 | llama-index-cli==0.1.11 38 | llama-index-core==0.10.26 39 | llama-index-embeddings-openai==0.1.7 40 | llama-index-indices-managed-llama-cloud==0.1.5 41 | llama-index-legacy==0.9.48 42 | llama-index-llms-openai==0.1.14 43 | llama-index-multi-modal-llms-openai==0.1.4 44 | llama-index-program-openai==0.1.5 45 | llama-index-question-gen-openai==0.1.3 46 | llama-index-readers-file==0.1.13 47 | llama-index-readers-llama-parse==0.1.4 48 | llama-parse==0.4.0 49 | llamaindex-py-client==0.1.15 50 | marshmallow==3.21.1 51 | multidict==6.0.5 52 | mypy-extensions==1.0.0 53 | nest-asyncio==1.6.0 54 | networkx==3.2.1 55 | nltk==3.8.1 56 | numpy==1.26.4 57 | openai==1.16.1 58 | opentelemetry-api==1.24.0 59 | opentelemetry-exporter-otlp==1.24.0 60 | opentelemetry-exporter-otlp-proto-common==1.24.0 61 | opentelemetry-exporter-otlp-proto-grpc==1.24.0 62 | opentelemetry-exporter-otlp-proto-http==1.24.0 63 | opentelemetry-instrumentation==0.45b0 64 | opentelemetry-proto==1.24.0 65 | opentelemetry-sdk==1.24.0 66 | opentelemetry-semantic-conventions==0.45b0 67 | packaging==23.2 68 | pandas==2.2.1 69 | pillow==10.3.0 70 | protobuf==4.25.3 71 | pydantic==2.6.4 72 | pydantic_core==2.16.3 73 | PyJWT==2.8.0 74 | PyMuPDF==1.24.1 75 | PyMuPDFb==1.24.1 76 | pypdf==4.1.0 77 | python-dateutil==2.9.0.post0 78 | python-dotenv==1.0.1 79 | python-engineio==4.9.0 80 | python-graphql-client==0.4.3 81 | python-multipart==0.0.9 82 | python-socketio==5.11.2 83 | pytz==2024.1 84 | PyYAML==6.0.1 85 | regex==2023.12.25 86 | requests==2.31.0 87 | simple-websocket==1.0.0 88 | six==1.16.0 89 | sniffio==1.3.1 90 | soupsieve==2.5 91 | SQLAlchemy==2.0.29 92 | starlette==0.32.0.post1 93 | striprtf==0.0.26 94 | syncer==2.0.3 95 | tenacity==8.2.3 96 | tiktoken==0.6.0 97 | tomli==2.0.1 98 | tqdm==4.66.2 99 | typing-inspect==0.9.0 100 | typing_extensions==4.10.0 101 | tzdata==2024.1 102 | uptrace==1.22.0 103 | urllib3==2.2.1 104 | uvicorn==0.25.0 105 | watchfiles==0.20.0 106 | websockets==12.0 107 | wrapt==1.16.0 108 | wsproto==1.2.0 109 | yarl==1.9.4 110 | zipp==3.18.1 -------------------------------------------------------------------------------- /2.Pinecone - HybridRetriever - Adv.Ingestion/.chainlit/config.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Whether to enable telemetry (default: true). No personal data is collected. 3 | enable_telemetry = true 4 | 5 | # List of environment variables to be provided by each user to use the app. 6 | user_env = [] 7 | 8 | # Duration (in seconds) during which the session is saved when the connection is lost 9 | session_timeout = 3600 10 | 11 | # Enable third parties caching (e.g LangChain cache) 12 | cache = false 13 | 14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) 15 | # follow_symlink = false 16 | 17 | [features] 18 | # Show the prompt playground 19 | prompt_playground = true 20 | 21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript) 22 | unsafe_allow_html = false 23 | 24 | # Process and display mathematical expressions. This can clash with "$" characters in messages. 25 | latex = false 26 | 27 | # Authorize users to upload files with messages 28 | multi_modal = false 29 | 30 | # Allows user to use speech to text 31 | [features.speech_to_text] 32 | enabled = false 33 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string 34 | # language = "en-US" 35 | 36 | [UI] 37 | # Name of the app and chatbot. 38 | name = "Chatbot" 39 | 40 | # Show the readme while the thread is empty. 41 | show_readme_as_default = false 42 | 43 | # Description of the app and chatbot. This is used for HTML tags. 44 | # description = "" 45 | 46 | # Large size content are by default collapsed for a cleaner ui 47 | default_collapse_content = true 48 | 49 | # The default value for the expand messages settings. 50 | default_expand_messages = false 51 | 52 | # Hide the chain of thought details from the user in the UI. 53 | hide_cot = false 54 | 55 | # Link to your github repo. This will add a github button in the UI's header. 56 | github = "https://github.com/felipearosr/GPT-Documents" 57 | 58 | # Specify a CSS file that can be used to customize the user interface. 59 | # The CSS file can be served from the public directory or via an external link. 60 | # custom_css = "/public/test.css" 61 | 62 | # Override default MUI light theme. (Check theme.ts) 63 | [UI.theme.light] 64 | #background = "#FAFAFA" 65 | #paper = "#FFFFFF" 66 | 67 | [UI.theme.light.primary] 68 | #main = "#F80061" 69 | #dark = "#980039" 70 | #light = "#FFE7EB" 71 | 72 | # Override default MUI dark theme. (Check theme.ts) 73 | [UI.theme.dark] 74 | #background = "#FAFAFA" 75 | #paper = "#FFFFFF" 76 | 77 | [UI.theme.dark.primary] 78 | #main = "#F80061" 79 | #dark = "#980039" 80 | #light = "#FFE7EB" 81 | 82 | 83 | [meta] 84 | generated_by = "1.0.101" 85 | -------------------------------------------------------------------------------- /2.Pinecone - HybridRetriever - Adv.Ingestion/.chainlit/translations/en-US.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Settings", 8 | "settingsKey": "S", 9 | "APIKeys": "API Keys", 10 | "logout": "Logout" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "New Chat" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Task List", 22 | "loading": "Loading...", 23 | "error": "An error occured" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancel upload", 28 | "removeAttachment": "Remove attachment" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Create new chat?", 32 | "clearChat": "This will clear the current messages and start a new chat.", 33 | "cancel": "Cancel", 34 | "confirm": "Confirm" 35 | }, 36 | "settingsModal": { 37 | "expandMessages": "Expand Messages", 38 | "hideChainOfThought": "Hide Chain of Thought", 39 | "darkMode": "Dark Mode" 40 | } 41 | }, 42 | "organisms": { 43 | "chat": { 44 | "history": { 45 | "index": { 46 | "lastInputs": "Last Inputs", 47 | "noInputs": "Such empty...", 48 | "loading": "Loading..." 49 | } 50 | }, 51 | "inputBox": { 52 | "input": { 53 | "placeholder": "Type your message here..." 54 | }, 55 | "speechButton": { 56 | "start": "Start recording", 57 | "stop": "Stop recording" 58 | }, 59 | "SubmitButton": { 60 | "sendMessage": "Send message", 61 | "stopTask": "Stop Task" 62 | }, 63 | "UploadButton": { 64 | "attachFiles": "Attach files" 65 | }, 66 | "waterMark": { 67 | "text": "Built with" 68 | } 69 | }, 70 | "Messages": { 71 | "index": { 72 | "running": "Running", 73 | "executedSuccessfully": "executed successfully", 74 | "failed": "failed", 75 | "feedbackUpdated": "Feedback updated", 76 | "updating": "Updating" 77 | } 78 | }, 79 | "dropScreen": { 80 | "dropYourFilesHere": "Drop your files here" 81 | }, 82 | "index": { 83 | "failedToUpload": "Failed to upload", 84 | "cancelledUploadOf": "Cancelled upload of", 85 | "couldNotReachServer": "Could not reach the server", 86 | "continuingChat": "Continuing previous chat" 87 | }, 88 | "settings": { 89 | "settingsPanel": "Settings panel", 90 | "reset": "Reset", 91 | "cancel": "Cancel", 92 | "confirm": "Confirm" 93 | } 94 | }, 95 | "threadHistory": { 96 | "sidebar": { 97 | "filters": { 98 | "FeedbackSelect": { 99 | "feedbackAll": "Feedback: All", 100 | "feedbackPositive": "Feedback: Positive", 101 | "feedbackNegative": "Feedback: Negative" 102 | }, 103 | "SearchBar": { 104 | "search": "Search" 105 | } 106 | }, 107 | "DeleteThreadButton": { 108 | "confirmMessage": "This will delete the thread as well as it's messages and elements.", 109 | "cancel": "Cancel", 110 | "confirm": "Confirm", 111 | "deletingChat": "Deleting chat", 112 | "chatDeleted": "Chat deleted" 113 | }, 114 | "index": { 115 | "pastChats": "Past Chats" 116 | }, 117 | "ThreadList": { 118 | "empty": "Empty..." 119 | }, 120 | "TriggerButton": { 121 | "closeSidebar": "Close sidebar", 122 | "openSidebar": "Open sidebar" 123 | } 124 | }, 125 | "Thread": { 126 | "backToChat": "Go back to chat", 127 | "chatCreatedOn": "This chat was created on" 128 | } 129 | }, 130 | "header": { 131 | "chat": "Chat", 132 | "readme": "Readme" 133 | } 134 | } 135 | }, 136 | "hooks": { 137 | "useLLMProviders": { 138 | "failedToFetchProviders": "Failed to fetch providers:" 139 | } 140 | }, 141 | "pages": { 142 | "Design": {}, 143 | "Env": { 144 | "savedSuccessfully": "Saved successfully", 145 | "requiredApiKeys": "Required API Keys", 146 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage." 147 | }, 148 | "Page": { 149 | "notPartOfProject": "You are not part of this project." 150 | }, 151 | "ResumeButton": { 152 | "resumeChat": "Resume Chat" 153 | } 154 | } 155 | } -------------------------------------------------------------------------------- /2.Pinecone - HybridRetriever - Adv.Ingestion/.chainlit/translations/pt-BR.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Configura\u00e7\u00f5es", 8 | "settingsKey": "S", 9 | "APIKeys": "Chaves de API", 10 | "logout": "Sair" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "Nova Conversa" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas", 22 | "loading": "Carregando...", 23 | "error": "Ocorreu um erro" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancelar envio", 28 | "removeAttachment": "Remover anexo" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Criar novo chat?", 32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.", 33 | "cancel": "Cancelar", 34 | "confirm": "Confirmar" 35 | }, 36 | "settingsModal": { 37 | "expandMessages": "Expandir Mensagens", 38 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento", 39 | "darkMode": "Modo Escuro" 40 | } 41 | }, 42 | "organisms": { 43 | "chat": { 44 | "history": { 45 | "index": { 46 | "lastInputs": "\u00daltimas Entradas", 47 | "noInputs": "Vazio...", 48 | "loading": "Carregando..." 49 | } 50 | }, 51 | "inputBox": { 52 | "input": { 53 | "placeholder": "Digite sua mensagem aqui..." 54 | }, 55 | "speechButton": { 56 | "start": "Iniciar grava\u00e7\u00e3o", 57 | "stop": "Parar grava\u00e7\u00e3o" 58 | }, 59 | "SubmitButton": { 60 | "sendMessage": "Enviar mensagem", 61 | "stopTask": "Parar Tarefa" 62 | }, 63 | "UploadButton": { 64 | "attachFiles": "Anexar arquivos" 65 | }, 66 | "waterMark": { 67 | "text": "Constru\u00eddo com" 68 | } 69 | }, 70 | "Messages": { 71 | "index": { 72 | "running": "Executando", 73 | "executedSuccessfully": "executado com sucesso", 74 | "failed": "falhou", 75 | "feedbackUpdated": "Feedback atualizado", 76 | "updating": "Atualizando" 77 | } 78 | }, 79 | "dropScreen": { 80 | "dropYourFilesHere": "Solte seus arquivos aqui" 81 | }, 82 | "index": { 83 | "failedToUpload": "Falha ao enviar", 84 | "cancelledUploadOf": "Envio cancelado de", 85 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor", 86 | "continuingChat": "Continuando o chat anterior" 87 | }, 88 | "settings": { 89 | "settingsPanel": "Painel de Configura\u00e7\u00f5es", 90 | "reset": "Redefinir", 91 | "cancel": "Cancelar", 92 | "confirm": "Confirmar" 93 | } 94 | }, 95 | "threadHistory": { 96 | "sidebar": { 97 | "filters": { 98 | "FeedbackSelect": { 99 | "feedbackAll": "Feedback: Todos", 100 | "feedbackPositive": "Feedback: Positivo", 101 | "feedbackNegative": "Feedback: Negativo" 102 | }, 103 | "SearchBar": { 104 | "search": "Buscar" 105 | } 106 | }, 107 | "DeleteThreadButton": { 108 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.", 109 | "cancel": "Cancelar", 110 | "confirm": "Confirmar", 111 | "deletingChat": "Deletando conversa", 112 | "chatDeleted": "Conversa deletada" 113 | }, 114 | "index": { 115 | "pastChats": "Conversas Anteriores" 116 | }, 117 | "ThreadList": { 118 | "empty": "Vazio..." 119 | }, 120 | "TriggerButton": { 121 | "closeSidebar": "Fechar barra lateral", 122 | "openSidebar": "Abrir barra lateral" 123 | } 124 | }, 125 | "Thread": { 126 | "backToChat": "Voltar para a conversa", 127 | "chatCreatedOn": "Esta conversa foi criada em" 128 | } 129 | }, 130 | "header": { 131 | "chat": "Conversa", 132 | "readme": "Leia-me" 133 | } 134 | }, 135 | "hooks": { 136 | "useLLMProviders": { 137 | "failedToFetchProviders": "Falha ao buscar provedores:" 138 | } 139 | }, 140 | "pages": { 141 | "Design": {}, 142 | "Env": { 143 | "savedSuccessfully": "Salvo com sucesso", 144 | "requiredApiKeys": "Chaves de API necess\u00e1rias", 145 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo." 146 | }, 147 | "Page": { 148 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto." 149 | }, 150 | "ResumeButton": { 151 | "resumeChat": "Continuar Conversa" 152 | } 153 | } 154 | } 155 | } -------------------------------------------------------------------------------- /2.Pinecone - HybridRetriever - Adv.Ingestion/.env.example: -------------------------------------------------------------------------------- 1 | # Mandatory 2 | OPENAI_API_KEY="sk-..." 3 | PINECONE_API_KEY="..." # for vector database 4 | LLAMA_PARSE_API_KEY="llx-..." # for pdf ingestion 5 | 6 | # Optional 7 | MODEL="gpt-4-0125-preview" 8 | EMBEDDING="text-embedding-3-large" -------------------------------------------------------------------------------- /2.Pinecone - HybridRetriever - Adv.Ingestion/README.md: -------------------------------------------------------------------------------- 1 | # Adding vector db, hybrid retriever and improved ingestion 2 | 3 | In this updated version, we've added three major features to enhance the repository: 4 | 5 | 1. **Vector DB Integration**: We've integrated Pinecone for efficient vector data handling, improving real-time analysis and insight extraction. 6 | 7 | 2. **Hybrid Retriever**: Implements a blend of dense and sparse vector methods, enhancing search accuracy and relevance. 8 | 9 | 3. **Advanced Ingestion**: Employs specialized techniques like Unstructured for general documents and LLM Sherpa for PDFs, plus metadata enhancement to improve document retrievability and context for LLMs. 10 | 11 | ![](https://github.com/felipearosr/GPT-Documents/blob/main/1.Streaming%20-%20Memory%20-%20Sources/images/RAG.gif) 12 | 13 | ## Table of Contents 14 | 15 | 1. [Installation Instructions](#installation-instructions) 16 | 2. [Usage](#usage) 17 | 3. [Pinecone](#pinecone) 18 | 4. [Hybrid Retriever](#hybrid-retriever) 19 | 5. [Advanced Ingestion](#advanced-ingestion) 20 | 21 | ## Installation Instructions 22 | 23 | Follow these steps to set up the GPT Documents chatbot on your local machine: 24 | 25 | 1. Create a conda environment: 26 | 27 | ```shell 28 | conda create -n rag python==3.11 -y && source activate rag 29 | ``` 30 | 31 | 2. Install the required dependencies: 32 | 33 | ```shell 34 | pip install -r requirements.txt 35 | ``` 36 | 37 | 3. Load your documents into the vector store by: 38 | - Create a folder named `data`. 39 | - Place your documents inside the `data` folder. 40 | - Execute the `ingest.py` script to initiate the loading process. 41 | 42 | ## Usage 43 | 44 | Once the setup is complete, launch the chainlit app using the following command: 45 | 46 | ```shell 47 | chainlit run -w main.py 48 | ``` 49 | 50 | Feel free to explore the functionalities and contribute to the development of this project. Your feedback and contributions are highly appreciated! 51 | 52 | ## Pinecone 53 | 54 | ### What is Pinecone? 55 | 56 | Pinecone is a specialized vector database designed to optimize the storage and querying of vector embeddings. This capability enables efficient real-time analysis and extraction of insights from complex, large-scale data. Its architecture is specifically tuned for handling the intricacies of vector data, making it an ideal choice for applications requiring rapid retrieval and analysis of such information. 57 | 58 | In the provided example, Pinecone is utilized to create a hybrid index, which is a critical component for a hybrid retriever system. This system leverages both textual and vector-based data to enhance search and retrieval capabilities. While Pinecone is highlighted for its effective handling of vector embeddings and support for hybrid indexing, it's worth noting that other vector databases offering similar types of indexing could also be considered based on project requirements and specific use cases. 59 | 60 | By adopting Pinecone or a similar vector database, developers can implement advanced retrieval systems that combine the strengths of traditional and vector-based search methods, leading to more nuanced and efficient data handling and retrieval solutions. 61 | 62 | ### How do we implement it? 63 | `main.py` 64 | ```python 65 | from pinecone import Pinecone 66 | from llama_index.vector_stores.pinecone import PineconeVectorStore 67 | 68 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 69 | 70 | @cl.cache 71 | def load_context(): 72 | pc = Pinecone(api_key=pinecone_api_key) 73 | pinecone_index = pc.Index("pinecone-index") 74 | vector_store = PineconeVectorStore( 75 | pinecone_index=pinecone_index, 76 | ) 77 | 78 | index = VectorStoreIndex.from_vector_store( 79 | vector_store=vector_store, 80 | ) 81 | return index 82 | ``` 83 | `ingest.py` 84 | ```python 85 | from pinecone import Pinecone, PodSpec 86 | from llama_index.vector_stores.pinecone import PineconeVectorStore 87 | 88 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 89 | pc = Pinecone(api_key=pinecone_api_key) 90 | 91 | def get_pinecone_index(pc, index_name): 92 | pinecone_index = pc.Index(index_name) 93 | return pinecone_index 94 | 95 | 96 | def get_pinecone_vector_store(pinecone_index): 97 | vector_store = PineconeVectorStore( 98 | pinecone_index=pinecone_index, 99 | add_sparse_vector=True, 100 | ) 101 | return vector_store 102 | ``` 103 | Use `--gen` flag to generate a pinecone pod if you haven't created one already. 104 | `ingest.py --gen` 105 | def create_pinecone_pod(pc, index_name): 106 | print("Creating pinecone pod") 107 | pc.create_index( 108 | name=index_name, 109 | dimension=3072, 110 | metric="dotproduct", 111 | spec=PodSpec(environment="gcp-starter"), 112 | ) 113 | 114 | ## Hybrid Retriever 115 | 116 | ### What is an Hybrid Retriever? 117 | 118 | A hybrid retriever is a sophisticated tool used in information retrieval systems, combining the best features of both dense and sparse vector methods to enhance search results' accuracy and relevance. In the context of AI and data search, this means leveraging the strengths of both context-understanding capabilities (dense vectors) and keyword-matching skills (sparse vectors). 119 | 120 | Typically, dense vectors are excellent at grasping the overall context of a query but may miss out on important keyword-specific details. On the other hand, sparse vectors excel at identifying exact keyword matches but might lack in understanding the broader context. A hybrid retriever merges these approaches, providing a more balanced and effective retrieval mechanism. 121 | 122 | For instance, in the field of document retrieval, such as with academic papers or medical abstracts, a hybrid approach can be particularly beneficial. By combining the contextual understanding of dense vector models with the precision of sparse retrieval methods like BM25, a hybrid retrieval pipeline can significantly improve the relevance and accuracy of search results. 123 | 124 | In practical applications, hybrid retrievers involve creating and processing both sparse and dense vectors for documents and queries. This includes tokenization processes for sparse vectors and embedding generation for dense vectors, as well as the management of these vectors within a suitable database or search engine like Pinecone or Weaviate. The retrieval process then utilizes these vectors to deliver highly relevant search results, balancing the depth of context and specificity of keywords. 125 | 126 | 127 | ### How do we implement it? 128 | ```python 129 | @cl.on_chat_start 130 | async def start(): 131 | # ... 132 | # What is important here is adding `vector_store_query_mode="hybrid"` 133 | # Is also really important to change what type of index you have, make sure 134 | # that you read the ingestion part of this README. 135 | query_engine = index.as_query_engine( 136 | streaming=True, 137 | similarity_top_k=4, 138 | vector_store_query_mode="hybrid", # Added line of code 139 | ) 140 | # ... 141 | ``` 142 | ## Advanced Ingestion 143 | 144 | ### What is advanced ingestion? 145 | 146 | Advanced ingestion involves specialized methods to optimize documents for better retrieval by large language models (LLMs). We use two main approaches: 147 | 148 | 1. **Unstructured**: Applied for all document types except PDFs, enhancing data extraction and structuring to improve LLM readability. Explore various connectors from Llama Index for optimal results. More details [here](https://github.com/Unstructured-IO/unstructured). 149 | 150 | 2. **Llama Parse**: Specifically for processing PDFs, transforming them into a more LLM-friendly format. Check it out [here](https://github.com/run-llama/llama_parse). 151 | 152 | 3. **Metadata Enhancement**: We're incorporating metadata into the documents for enriched context and searchability. You have the option to exclude them as needed. However, be mindful that each piece of metadata incurs a processing cost by the LLM due to the additional analysis required. 153 | 154 | ### How do we implement it? 155 | 156 | ```unstructured``` 157 | ```python 158 | UnstructuredReader = download_loader("UnstructuredReader") 159 | 160 | file_extractor = { 161 | # ... 162 | ".html": UnstructuredReader(), 163 | ".txt": UnstructuredReader(), 164 | } 165 | director_reader = SimpleDirectoryReader( 166 | input_dir=input_dir, file_extractor=file_extractor 167 | ) 168 | documents = director_reader.load_data(show_progress=True) 169 | ``` 170 | 171 | `llama parse` 172 | ```python 173 | llama_parser = LlamaParse(api_key=llama_parse_api_key, result_type="markdown", verbose=True) 174 | 175 | file_extractor = { 176 | ".pdf": llama_parser, 177 | # ... 178 | } 179 | director_reader = SimpleDirectoryReader( 180 | input_dir=input_dir, file_extractor=file_extractor 181 | ) 182 | documents = director_reader.load_data(show_progress=True) 183 | ``` 184 | 185 | `metadata enhancement` 186 | ```python 187 | pipeline = IngestionPipeline( 188 | transformations=[ 189 | SentenceSplitter(chunk_size=512, chunk_overlap=126), 190 | TitleExtractor(llm=llm, num_workers=num_workers), 191 | QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers), 192 | SummaryExtractor(summaries=["prev", "self"], llm=llm, num_workers=num_workers), 193 | KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers), 194 | OpenAIEmbedding(model=EMBEDDING) 195 | ], 196 | vector_store=vector_store, 197 | ) 198 | pipeline.run(documents=documents, show_progress=True, num_workers=num_workers) 199 | ``` 200 | -------------------------------------------------------------------------------- /2.Pinecone - HybridRetriever - Adv.Ingestion/chainlit.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/2.Pinecone - HybridRetriever - Adv.Ingestion/chainlit.md -------------------------------------------------------------------------------- /2.Pinecone - HybridRetriever - Adv.Ingestion/ingest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import argparse 4 | 5 | from dotenv import load_dotenv 6 | from pinecone import Pinecone, ServerlessSpec 7 | 8 | from llama_parse import LlamaParse 9 | from llama_index.core import SimpleDirectoryReader 10 | from llama_index.llms.openai import OpenAI 11 | from llama_index.core.ingestion import IngestionPipeline 12 | from llama_index.core.node_parser import SentenceSplitter 13 | from llama_index.embeddings.openai import OpenAIEmbedding 14 | from llama_index.vector_stores.pinecone import PineconeVectorStore 15 | from llama_index.core.extractors import ( 16 | TitleExtractor, 17 | # QuestionsAnsweredExtractor, 18 | # SummaryExtractor, 19 | # KeywordExtractor, 20 | ) 21 | 22 | load_dotenv() 23 | openai.api_key = os.environ.get("OPENAI_API_KEY") 24 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 25 | llama_parse_api_key = os.environ.get("LLAMA_PARSE_API_KEY") 26 | 27 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview") 28 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large") 29 | 30 | 31 | def get_pinecone_index(pc, index_name): 32 | pinecone_index = pc.Index(index_name) 33 | return pinecone_index 34 | 35 | 36 | def get_pinecone_vector_store(pinecone_index): 37 | vector_store = PineconeVectorStore( 38 | pinecone_index=pinecone_index, 39 | add_sparse_vector=True, 40 | ) 41 | return vector_store 42 | 43 | 44 | def create_pinecone_serverless_index(pc, index_name): 45 | print("Creating pinecone serverless index") 46 | pc.create_index( 47 | name=index_name, 48 | dimension=3072, 49 | metric="dotproduct", 50 | spec=ServerlessSpec(cloud="aws", region="us-east-1"), 51 | ) 52 | 53 | 54 | def get_documents(input_dir): 55 | llama_parser = LlamaParse( 56 | api_key=llama_parse_api_key, result_type="markdown", verbose=True 57 | ) 58 | 59 | file_extractor = { 60 | ".pdf": llama_parser, 61 | } 62 | print("Reading directory") 63 | director_reader = SimpleDirectoryReader( 64 | input_dir=input_dir, file_extractor=file_extractor 65 | ) 66 | print("Starting document reading") 67 | documents = director_reader.load_data(show_progress=True) 68 | return documents 69 | 70 | 71 | def run_pipeline(documents, vector_store, llm, num_workers): 72 | pipeline = IngestionPipeline( 73 | transformations=[ 74 | SentenceSplitter(chunk_size=512, chunk_overlap=126), 75 | TitleExtractor(llm=llm, num_workers=num_workers), 76 | # QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers), 77 | # SummaryExtractor( 78 | # summaries=["prev", "self"], llm=llm, num_workers=num_workers 79 | # ), 80 | # KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers), 81 | OpenAIEmbedding(model=EMBEDDING), 82 | ], 83 | vector_store=vector_store, 84 | ) 85 | pipeline.run(documents=documents, show_progress=True, num_workers=num_workers) 86 | 87 | 88 | def main(): 89 | input_dir = "./data/" 90 | index_name = "rag-index" 91 | num_cores = os.cpu_count() 92 | num_workers = min(4, num_cores) 93 | pc = Pinecone(api_key=pinecone_api_key) 94 | parser = argparse.ArgumentParser(description="Process some integers.") 95 | parser.add_argument( 96 | "--gen", 97 | action="store_true", 98 | help="Generate new pinecone index", 99 | ) 100 | args = parser.parse_args() 101 | if args.gen: 102 | create_pinecone_serverless_index(pc, index_name) 103 | llm = OpenAI(temperature=0.1, model=MODEL, max_tokens=1024) 104 | pinecone_index = get_pinecone_index(pc, index_name) 105 | vector_store = get_pinecone_vector_store(pinecone_index) 106 | documents = get_documents(input_dir) 107 | print("Starting ingestion pipeline") 108 | run_pipeline(documents, vector_store, llm, num_workers) 109 | 110 | 111 | if __name__ == "__main__": 112 | print("Starting ingestion") 113 | main() 114 | -------------------------------------------------------------------------------- /2.Pinecone - HybridRetriever - Adv.Ingestion/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import chainlit as cl 4 | 5 | from pinecone import Pinecone 6 | from llama_index.core import Settings, VectorStoreIndex 7 | from llama_index.llms.openai import OpenAI 8 | from llama_index.embeddings.openai import OpenAIEmbedding 9 | from llama_index.vector_stores.pinecone import PineconeVectorStore 10 | 11 | openai.api_key = os.environ.get("OPENAI_API_KEY") 12 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 13 | 14 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview") 15 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large") 16 | 17 | 18 | @cl.cache 19 | def load_context(): 20 | Settings.llm = OpenAI(temperature=0.1, model=MODEL, streaming=True) 21 | Settings.embed_model = OpenAIEmbedding(model=EMBEDDING, embed_batch_size=1) 22 | Settings.num_output = 1024 23 | Settings.context_window = 128000 24 | pc = Pinecone(api_key=pinecone_api_key) 25 | pinecone_index = pc.Index("rag-index") 26 | vector_store = PineconeVectorStore( 27 | pinecone_index=pinecone_index, 28 | ) 29 | 30 | index = VectorStoreIndex.from_vector_store( 31 | vector_store=vector_store, 32 | ) 33 | return index 34 | 35 | 36 | @cl.on_chat_start 37 | async def start(): 38 | index = load_context() 39 | 40 | query_engine = index.as_query_engine( 41 | streaming=True, 42 | similarity_top_k=4, 43 | vector_store_query_mode="hybrid", 44 | ) 45 | cl.user_session.set("query_engine", query_engine) 46 | 47 | message_history = [] 48 | cl.user_session.set("message_history", message_history) 49 | 50 | await cl.Message( 51 | author="Assistant", content="Hello! Im an AI assistant. How may I help you?" 52 | ).send() 53 | 54 | 55 | async def set_sources(response, response_message): 56 | label_list = [] 57 | count = 1 58 | for sr in response.source_nodes: 59 | elements = [ 60 | cl.Text( 61 | name="S" + str(count), 62 | content=f"{sr.node.text}", 63 | display="side", 64 | size="small", 65 | ) 66 | ] 67 | response_message.elements = elements 68 | label_list.append("S" + str(count)) 69 | await response_message.update() 70 | count += 1 71 | response_message.content += "\n\nSources: " + ", ".join(label_list) 72 | await response_message.update() 73 | 74 | 75 | @cl.on_message 76 | async def main(message: cl.Message): 77 | query_engine = cl.user_session.get("query_engine") 78 | message_history = cl.user_session.get("message_history") 79 | prompt_template = "Previous messages:\n" 80 | 81 | response_message = cl.Message(content="", author="Assistant") 82 | 83 | user_message = message.content 84 | 85 | for message in message_history: 86 | prompt_template += f"{message['author']}: {message['content']}\n" 87 | prompt_template += f"Human: {user_message}" 88 | 89 | response = await cl.make_async(query_engine.query)(prompt_template) 90 | 91 | for token in response.response_gen: 92 | await response_message.stream_token(token) 93 | if response.response_txt: 94 | response_message.content = response.response_txt 95 | await response_message.send() 96 | 97 | message_history.append({"author": "Human", "content": user_message}) 98 | message_history.append({"author": "AI", "content": response_message.content}) 99 | message_history = message_history[-4:] 100 | cl.user_session.set("message_history", message_history) 101 | 102 | if response.source_nodes: 103 | await set_sources(response, response_message) 104 | -------------------------------------------------------------------------------- /2.Pinecone - HybridRetriever - Adv.Ingestion/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | aiohttp==3.9.5 3 | aiosignal==1.3.1 4 | annotated-types==0.6.0 5 | anyio==3.7.1 6 | asyncer==0.0.2 7 | attrs==23.2.0 8 | backoff==2.2.1 9 | beautifulsoup4==4.12.3 10 | bidict==0.23.1 11 | certifi==2024.2.2 12 | chainlit==1.0.505 13 | chardet==5.2.0 14 | charset-normalizer==3.3.2 15 | chevron==0.14.0 16 | click==8.1.7 17 | dataclasses-json==0.5.14 18 | dataclasses-json-speakeasy==0.5.11 19 | Deprecated==1.2.14 20 | dirtyjson==1.0.8 21 | distro==1.9.0 22 | emoji==2.11.1 23 | fastapi==0.110.2 24 | fastapi-socketio==0.0.10 25 | filelock==3.13.4 26 | filetype==1.2.0 27 | frozenlist==1.4.1 28 | fsspec==2024.3.1 29 | googleapis-common-protos==1.63.0 30 | greenlet==3.0.3 31 | grpcio==1.62.2 32 | h11==0.14.0 33 | html2text==2024.2.26 34 | httpcore==1.0.5 35 | httpx==0.27.0 36 | huggingface-hub==0.22.2 37 | idna==3.7 38 | importlib-metadata==7.0.0 39 | joblib==1.4.0 40 | jsonpath-python==1.0.6 41 | langdetect==1.0.9 42 | Lazify==0.4.0 43 | literalai==0.0.507 44 | llama-hub==0.0.79.post1 45 | llama-index==0.10.32 46 | llama-index-agent-openai==0.2.3 47 | llama-index-cli==0.1.12 48 | llama-index-core==0.10.32 49 | llama-index-embeddings-openai==0.1.9 50 | llama-index-indices-managed-llama-cloud==0.1.6 51 | llama-index-legacy==0.9.48 52 | llama-index-llms-openai==0.1.16 53 | llama-index-multi-modal-llms-openai==0.1.5 54 | llama-index-program-openai==0.1.6 55 | llama-index-question-gen-openai==0.1.3 56 | llama-index-readers-file==0.1.19 57 | llama-index-readers-llama-parse==0.1.4 58 | llama-index-vector-stores-pinecone==0.1.6 59 | llama-parse==0.4.2 60 | llamaindex-py-client==0.1.19 61 | lxml==5.2.1 62 | marshmallow==3.21.1 63 | multidict==6.0.5 64 | mypy-extensions==1.0.0 65 | nest-asyncio==1.6.0 66 | networkx==3.3 67 | nltk==3.8.1 68 | numpy==1.26.4 69 | openai==1.23.6 70 | opentelemetry-api==1.24.0 71 | opentelemetry-exporter-otlp==1.24.0 72 | opentelemetry-exporter-otlp-proto-common==1.24.0 73 | opentelemetry-exporter-otlp-proto-grpc==1.24.0 74 | opentelemetry-exporter-otlp-proto-http==1.24.0 75 | opentelemetry-instrumentation==0.45b0 76 | opentelemetry-proto==1.24.0 77 | opentelemetry-sdk==1.24.0 78 | opentelemetry-semantic-conventions==0.45b0 79 | packaging==23.2 80 | pandas==2.2.2 81 | pillow==10.3.0 82 | pinecone-client==3.2.2 83 | protobuf==4.25.3 84 | psutil==5.9.8 85 | pyaml==23.12.0 86 | pydantic==2.7.1 87 | pydantic_core==2.18.2 88 | PyJWT==2.8.0 89 | pypdf==4.2.0 90 | python-dateutil==2.9.0.post0 91 | python-dotenv==1.0.1 92 | python-engineio==4.9.0 93 | python-graphql-client==0.4.3 94 | python-iso639==2024.2.7 95 | python-magic==0.4.27 96 | python-multipart==0.0.9 97 | python-socketio==5.11.2 98 | pytz==2024.1 99 | PyYAML==6.0.1 100 | rapidfuzz==3.8.1 101 | regex==2024.4.16 102 | requests==2.31.0 103 | retrying==1.3.4 104 | safetensors==0.4.3 105 | simple-websocket==1.0.0 106 | six==1.16.0 107 | sniffio==1.3.1 108 | soupsieve==2.5 109 | SQLAlchemy==2.0.29 110 | starlette==0.37.2 111 | striprtf==0.0.26 112 | syncer==2.0.3 113 | tabulate==0.9.0 114 | tenacity==8.2.3 115 | tiktoken==0.6.0 116 | tokenizer==3.4.3 117 | tokenizers==0.19.1 118 | tomli==2.0.1 119 | tqdm==4.66.2 120 | transformers==4.40.1 121 | typing-inspect==0.9.0 122 | typing_extensions==4.11.0 123 | tzdata==2024.1 124 | unstructured==0.13.4 125 | unstructured-client==0.18.0 126 | uptrace==1.24.0 127 | urllib3==2.2.1 128 | uvicorn==0.25.0 129 | watchfiles==0.20.0 130 | websockets==12.0 131 | wrapt==1.16.0 132 | wsproto==1.2.0 133 | yarl==1.9.4 134 | zipp==3.18.1 135 | -------------------------------------------------------------------------------- /3.Reranker - Q.Transformation - Res.Synthesis/.chainlit/config.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Whether to enable telemetry (default: true). No personal data is collected. 3 | enable_telemetry = true 4 | 5 | # List of environment variables to be provided by each user to use the app. 6 | user_env = [] 7 | 8 | # Duration (in seconds) during which the session is saved when the connection is lost 9 | session_timeout = 3600 10 | 11 | # Enable third parties caching (e.g LangChain cache) 12 | cache = false 13 | 14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) 15 | # follow_symlink = false 16 | 17 | [features] 18 | # Show the prompt playground 19 | prompt_playground = true 20 | 21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript) 22 | unsafe_allow_html = false 23 | 24 | # Process and display mathematical expressions. This can clash with "$" characters in messages. 25 | latex = false 26 | 27 | # Authorize users to upload files with messages 28 | multi_modal = false 29 | 30 | # Allows user to use speech to text 31 | [features.speech_to_text] 32 | enabled = false 33 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string 34 | # language = "en-US" 35 | 36 | [UI] 37 | # Name of the app and chatbot. 38 | name = "Chatbot" 39 | 40 | # Show the readme while the thread is empty. 41 | show_readme_as_default = false 42 | 43 | # Description of the app and chatbot. This is used for HTML tags. 44 | # description = "" 45 | 46 | # Large size content are by default collapsed for a cleaner ui 47 | default_collapse_content = true 48 | 49 | # The default value for the expand messages settings. 50 | default_expand_messages = false 51 | 52 | # Hide the chain of thought details from the user in the UI. 53 | hide_cot = false 54 | 55 | # Link to your github repo. This will add a github button in the UI's header. 56 | github = "https://github.com/felipearosr/GPT-Documents" 57 | 58 | # Specify a CSS file that can be used to customize the user interface. 59 | # The CSS file can be served from the public directory or via an external link. 60 | # custom_css = "/public/test.css" 61 | 62 | # Override default MUI light theme. (Check theme.ts) 63 | [UI.theme.light] 64 | #background = "#FAFAFA" 65 | #paper = "#FFFFFF" 66 | 67 | [UI.theme.light.primary] 68 | #main = "#F80061" 69 | #dark = "#980039" 70 | #light = "#FFE7EB" 71 | 72 | # Override default MUI dark theme. (Check theme.ts) 73 | [UI.theme.dark] 74 | #background = "#FAFAFA" 75 | #paper = "#FFFFFF" 76 | 77 | [UI.theme.dark.primary] 78 | #main = "#F80061" 79 | #dark = "#980039" 80 | #light = "#FFE7EB" 81 | 82 | 83 | [meta] 84 | generated_by = "1.0.101" 85 | -------------------------------------------------------------------------------- /3.Reranker - Q.Transformation - Res.Synthesis/.chainlit/translations/en-US.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Settings", 8 | "settingsKey": "S", 9 | "APIKeys": "API Keys", 10 | "logout": "Logout" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "New Chat" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Task List", 22 | "loading": "Loading...", 23 | "error": "An error occured" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancel upload", 28 | "removeAttachment": "Remove attachment" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Create new chat?", 32 | "clearChat": "This will clear the current messages and start a new chat.", 33 | "cancel": "Cancel", 34 | "confirm": "Confirm" 35 | }, 36 | "settingsModal": { 37 | "expandMessages": "Expand Messages", 38 | "hideChainOfThought": "Hide Chain of Thought", 39 | "darkMode": "Dark Mode" 40 | } 41 | }, 42 | "organisms": { 43 | "chat": { 44 | "history": { 45 | "index": { 46 | "lastInputs": "Last Inputs", 47 | "noInputs": "Such empty...", 48 | "loading": "Loading..." 49 | } 50 | }, 51 | "inputBox": { 52 | "input": { 53 | "placeholder": "Type your message here..." 54 | }, 55 | "speechButton": { 56 | "start": "Start recording", 57 | "stop": "Stop recording" 58 | }, 59 | "SubmitButton": { 60 | "sendMessage": "Send message", 61 | "stopTask": "Stop Task" 62 | }, 63 | "UploadButton": { 64 | "attachFiles": "Attach files" 65 | }, 66 | "waterMark": { 67 | "text": "Built with" 68 | } 69 | }, 70 | "Messages": { 71 | "index": { 72 | "running": "Running", 73 | "executedSuccessfully": "executed successfully", 74 | "failed": "failed", 75 | "feedbackUpdated": "Feedback updated", 76 | "updating": "Updating" 77 | } 78 | }, 79 | "dropScreen": { 80 | "dropYourFilesHere": "Drop your files here" 81 | }, 82 | "index": { 83 | "failedToUpload": "Failed to upload", 84 | "cancelledUploadOf": "Cancelled upload of", 85 | "couldNotReachServer": "Could not reach the server", 86 | "continuingChat": "Continuing previous chat" 87 | }, 88 | "settings": { 89 | "settingsPanel": "Settings panel", 90 | "reset": "Reset", 91 | "cancel": "Cancel", 92 | "confirm": "Confirm" 93 | } 94 | }, 95 | "threadHistory": { 96 | "sidebar": { 97 | "filters": { 98 | "FeedbackSelect": { 99 | "feedbackAll": "Feedback: All", 100 | "feedbackPositive": "Feedback: Positive", 101 | "feedbackNegative": "Feedback: Negative" 102 | }, 103 | "SearchBar": { 104 | "search": "Search" 105 | } 106 | }, 107 | "DeleteThreadButton": { 108 | "confirmMessage": "This will delete the thread as well as it's messages and elements.", 109 | "cancel": "Cancel", 110 | "confirm": "Confirm", 111 | "deletingChat": "Deleting chat", 112 | "chatDeleted": "Chat deleted" 113 | }, 114 | "index": { 115 | "pastChats": "Past Chats" 116 | }, 117 | "ThreadList": { 118 | "empty": "Empty..." 119 | }, 120 | "TriggerButton": { 121 | "closeSidebar": "Close sidebar", 122 | "openSidebar": "Open sidebar" 123 | } 124 | }, 125 | "Thread": { 126 | "backToChat": "Go back to chat", 127 | "chatCreatedOn": "This chat was created on" 128 | } 129 | }, 130 | "header": { 131 | "chat": "Chat", 132 | "readme": "Readme" 133 | } 134 | } 135 | }, 136 | "hooks": { 137 | "useLLMProviders": { 138 | "failedToFetchProviders": "Failed to fetch providers:" 139 | } 140 | }, 141 | "pages": { 142 | "Design": {}, 143 | "Env": { 144 | "savedSuccessfully": "Saved successfully", 145 | "requiredApiKeys": "Required API Keys", 146 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage." 147 | }, 148 | "Page": { 149 | "notPartOfProject": "You are not part of this project." 150 | }, 151 | "ResumeButton": { 152 | "resumeChat": "Resume Chat" 153 | } 154 | } 155 | } -------------------------------------------------------------------------------- /3.Reranker - Q.Transformation - Res.Synthesis/.chainlit/translations/pt-BR.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Configura\u00e7\u00f5es", 8 | "settingsKey": "S", 9 | "APIKeys": "Chaves de API", 10 | "logout": "Sair" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "Nova Conversa" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas", 22 | "loading": "Carregando...", 23 | "error": "Ocorreu um erro" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancelar envio", 28 | "removeAttachment": "Remover anexo" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Criar novo chat?", 32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.", 33 | "cancel": "Cancelar", 34 | "confirm": "Confirmar" 35 | }, 36 | "settingsModal": { 37 | "expandMessages": "Expandir Mensagens", 38 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento", 39 | "darkMode": "Modo Escuro" 40 | } 41 | }, 42 | "organisms": { 43 | "chat": { 44 | "history": { 45 | "index": { 46 | "lastInputs": "\u00daltimas Entradas", 47 | "noInputs": "Vazio...", 48 | "loading": "Carregando..." 49 | } 50 | }, 51 | "inputBox": { 52 | "input": { 53 | "placeholder": "Digite sua mensagem aqui..." 54 | }, 55 | "speechButton": { 56 | "start": "Iniciar grava\u00e7\u00e3o", 57 | "stop": "Parar grava\u00e7\u00e3o" 58 | }, 59 | "SubmitButton": { 60 | "sendMessage": "Enviar mensagem", 61 | "stopTask": "Parar Tarefa" 62 | }, 63 | "UploadButton": { 64 | "attachFiles": "Anexar arquivos" 65 | }, 66 | "waterMark": { 67 | "text": "Constru\u00eddo com" 68 | } 69 | }, 70 | "Messages": { 71 | "index": { 72 | "running": "Executando", 73 | "executedSuccessfully": "executado com sucesso", 74 | "failed": "falhou", 75 | "feedbackUpdated": "Feedback atualizado", 76 | "updating": "Atualizando" 77 | } 78 | }, 79 | "dropScreen": { 80 | "dropYourFilesHere": "Solte seus arquivos aqui" 81 | }, 82 | "index": { 83 | "failedToUpload": "Falha ao enviar", 84 | "cancelledUploadOf": "Envio cancelado de", 85 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor", 86 | "continuingChat": "Continuando o chat anterior" 87 | }, 88 | "settings": { 89 | "settingsPanel": "Painel de Configura\u00e7\u00f5es", 90 | "reset": "Redefinir", 91 | "cancel": "Cancelar", 92 | "confirm": "Confirmar" 93 | } 94 | }, 95 | "threadHistory": { 96 | "sidebar": { 97 | "filters": { 98 | "FeedbackSelect": { 99 | "feedbackAll": "Feedback: Todos", 100 | "feedbackPositive": "Feedback: Positivo", 101 | "feedbackNegative": "Feedback: Negativo" 102 | }, 103 | "SearchBar": { 104 | "search": "Buscar" 105 | } 106 | }, 107 | "DeleteThreadButton": { 108 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.", 109 | "cancel": "Cancelar", 110 | "confirm": "Confirmar", 111 | "deletingChat": "Deletando conversa", 112 | "chatDeleted": "Conversa deletada" 113 | }, 114 | "index": { 115 | "pastChats": "Conversas Anteriores" 116 | }, 117 | "ThreadList": { 118 | "empty": "Vazio..." 119 | }, 120 | "TriggerButton": { 121 | "closeSidebar": "Fechar barra lateral", 122 | "openSidebar": "Abrir barra lateral" 123 | } 124 | }, 125 | "Thread": { 126 | "backToChat": "Voltar para a conversa", 127 | "chatCreatedOn": "Esta conversa foi criada em" 128 | } 129 | }, 130 | "header": { 131 | "chat": "Conversa", 132 | "readme": "Leia-me" 133 | } 134 | }, 135 | "hooks": { 136 | "useLLMProviders": { 137 | "failedToFetchProviders": "Falha ao buscar provedores:" 138 | } 139 | }, 140 | "pages": { 141 | "Design": {}, 142 | "Env": { 143 | "savedSuccessfully": "Salvo com sucesso", 144 | "requiredApiKeys": "Chaves de API necess\u00e1rias", 145 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo." 146 | }, 147 | "Page": { 148 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto." 149 | }, 150 | "ResumeButton": { 151 | "resumeChat": "Continuar Conversa" 152 | } 153 | } 154 | } 155 | } -------------------------------------------------------------------------------- /3.Reranker - Q.Transformation - Res.Synthesis/.env.example: -------------------------------------------------------------------------------- 1 | # Mandatory 2 | OPENAI_API_KEY="sk-..." 3 | COHERE_API_KEY="..." # for the reranker 4 | PINECONE_API_KEY="..." # for vector database 5 | LLAMA_PARSE_API_KEY="llx-..." # for pdf ingestion 6 | 7 | # Optional 8 | MODEL="gpt-4-0125-preview" 9 | EMBEDDING="text-embedding-3-large" -------------------------------------------------------------------------------- /3.Reranker - Q.Transformation - Res.Synthesis/README.md: -------------------------------------------------------------------------------- 1 | # Adding reranker, query transformations and response synthesis. 2 | 3 | This repository leverages the synergy between Cohere reranker and a hybrid retriever to merge the strengths of keyword and vector-based searches with sophisticated semantic reranking. This innovative approach guarantees not only the retrieval of a wide array of pertinent documents but also organizes them in a manner that aligns seamlessly with the user's intentions. 4 | 5 | To enhance query processing, we implement two additional methods: 6 | 7 | 1. **Multi-Step Transformations**: This method deconstructs intricate queries into simpler, more manageable subquestions, each of which is then executed against the database. The responses obtained from these subquestions guide the construction and execution of follow-up queries, ensuring a comprehensive and detailed exploration of the user's original inquiry. 8 | 9 | 2. **Refine**: This approach methodically processes each piece of retrieved text, making individual calls to the Large Language Model (LLM) for each text chunk. This sequential refinement enables a progressive enhancement of the answer, ensuring that each chunk contributes to a more complete and accurate response. 10 | 11 | By incorporating these methods, the repository not only improves the precision and relevance of search results but also ensures a deeper, more nuanced understanding and response to complex queries, enhancing overall search performance and user experience. 12 | 13 | ![](https://github.com/felipearosr/GPT-Documents/blob/main/1.Streaming%20-%20Memory%20-%20Sources/images/RAG.gif) 14 | 15 | ## Table of Contents 16 | 17 | 1. [Installation Instructions](#installation-instructions) 18 | 2. [Usage](#usage) 19 | 3. [Reranker](#reranker) 20 | 4. [Query Transformations](#query-transformations) 21 | 5. [Response Synthesis](#response-synthesis) 22 | 23 | 24 | ## Installation Instructions 25 | 26 | Follow these steps to set up the GPT Documents chatbot on your local machine: 27 | 28 | 1. Create a conda environment: 29 | 30 | ```shell 31 | conda create -n rag python==3.11 -y && source activate rag 32 | ``` 33 | 34 | 2. Install the required dependencies: 35 | 36 | ```shell 37 | pip install -r requirements.txt 38 | ``` 39 | 40 | 3. Load your documents into the vector store by: 41 | - Create a folder named 'data'. 42 | - Place your documents inside the 'data' folder. 43 | - Execute the 'ingest.py' script to initiate the loading process. 44 | 45 | ## Usage 46 | 47 | Once the setup is complete, launch the chainlit app using the following command: 48 | 49 | ```shell 50 | chainlit run -w main.py 51 | ``` 52 | 53 | Feel free to explore the functionalities and contribute to the development of this project. Your feedback and contributions are highly appreciated! 54 | 55 | ## Reranker 56 | 57 | ### What is a reranker and why do we use it? 58 | 59 | The Cohere reranker is a tool that enhances search quality by semantically reranking documents to align more closely with user queries, without needing major changes to existing systems. It's easy to implement and can be customized for specific needs. When combined with a hybrid retriever, which merges keyword and vector search benefits, the Cohere reranker ensures documents are not just relevant but also correctly prioritized according to the query's semantic intent, thus boosting search accuracy and user satisfaction. 60 | 61 | ### How do we implement it? 62 | 63 | ```python 64 | from llama_index.postprocessor.cohere_rerank import CohereRerank 65 | 66 | # make sure you add your cohere key to your .env file 67 | cohere_api_key = os.environ.get("COHERE_API_KEY") 68 | 69 | @cl.on_chat_start 70 | async def start(): 71 | # ... 72 | reranker = CohereRerank(api_key=cohere_api_key, top_n=3) 73 | 74 | query_engine = index.as_query_engine( 75 | streaming=True, 76 | similarity_top_k=6, 77 | node_postprocessors=[reranker], # add this line 78 | vector_store_query_mode="hybrid", 79 | query_transform=step_decompose_transform, 80 | response_synthesizer_mode=ResponseMode.REFINE, 81 | ) 82 | # ... 83 | ``` 84 | 85 | ## Query Transformations 86 | 87 | ### What are query transformations? 88 | 89 | LlamaIndex facilitates query transformations, allowing the conversion of a query into a different form for improved processing. These transformations can be single-step, where the transformation occurs once before execution, or multi-step, involving sequential transformation and execution phases with responses influencing subsequent queries. 90 | 91 | Use Cases: 92 | 93 | - **HyDE**: This technique generates a hypothetical answer document from a natural language query for more effective embedding lookup. 94 | - **Multi-Step Transformations**: Involves breaking down a complex query into smaller, manageable subquestions, executing each against the index, and using the responses to inform follow-up questions. 95 | 96 | In this case we implement the multi-step transformation, feel free to play around with other techniques. 97 | 98 | ### How do we implement it? 99 | 100 | ```python 101 | from llama_index.core.indices.query.query_transform.base import StepDecomposeQueryTransform 102 | 103 | @cl.on_chat_start 104 | async def start(): 105 | step_decompose_transform = StepDecomposeQueryTransform(llm=MODEL, verbose=True) 106 | 107 | query_engine = index.as_query_engine( 108 | streaming=True, 109 | similarity_top_k=6, 110 | node_postprocessors=[reranker], 111 | vector_store_query_mode="hybrid", 112 | query_transform=step_decompose_transform, # add this line 113 | response_synthesizer_mode=ResponseMode.REFINE, 114 | ) 115 | ``` 116 | 117 | ## Response Synthesis 118 | 119 | ### What are the different response modes? 120 | 121 | The system supports various response modes for processing and refining answers from retrieved text chunks: 122 | 123 | 1. **Refine**: Processes each retrieved text chunk sequentially, making separate LLM calls for each, refining the answer progressively with each chunk. 124 | 125 | 2. **Compact (default)**: Similar to refine, but compacts all chunks before processing, reducing the number of LLM calls needed by concatenating chunks to fit within context windows. 126 | 127 | 3. **Tree Summarize**: Uses a summary template for all chunks and recursively condenses responses into a single final answer, ideal for summarization with multiple rounds of LLM queries. 128 | 129 | 4. **Simple Summarize**: Truncates text chunks to fit a single LLM prompt for a quick summary, potentially losing details due to truncation. 130 | 131 | 5. **No Text**: Fetches nodes without sending them to the LLM, allowing for manual inspection of retrieved chunks. 132 | 133 | 6. **Accumulate**: Applies the query to each text chunk separately, accumulating responses into an array, useful for separate detailed queries. 134 | 135 | 7. **Compact Accumulate**: A combination of compact and accumulate, compacting prompts before applying the same query to each, for efficiency with detail. 136 | 137 | Each mode is designed for different levels of detail and summarization needs. 138 | 139 | For more information visit this [link](https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/response_modes.html). 140 | 141 | ### How do we implement it? 142 | 143 | ```python 144 | from llama_index.core.response_synthesizers import ResponseMode 145 | 146 | @cl.on_chat_start 147 | async def start(): 148 | query_engine = index.as_query_engine( 149 | streaming=True, 150 | similarity_top_k=6, 151 | node_postprocessors=[reranker], 152 | vector_store_query_mode="hybrid", 153 | query_transform=step_decompose_transform, 154 | response_synthesizer_mode=ResponseMode.REFINE, # add this line 155 | ) 156 | ``` 157 | -------------------------------------------------------------------------------- /3.Reranker - Q.Transformation - Res.Synthesis/chainlit.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/3.Reranker - Q.Transformation - Res.Synthesis/chainlit.md -------------------------------------------------------------------------------- /3.Reranker - Q.Transformation - Res.Synthesis/ingest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import asyncio 4 | import argparse 5 | 6 | from dotenv import load_dotenv 7 | from pinecone import Pinecone, PodSpec 8 | 9 | from llama_index.core import SimpleDirectoryReader 10 | from llama_index.llms.openai import OpenAI 11 | from llama_index.readers.file import UnstructuredReader 12 | from llama_index.core.ingestion import IngestionPipeline 13 | from llama_index.core.node_parser import SentenceSplitter 14 | from llama_index.embeddings.openai import OpenAIEmbedding 15 | from llama_index.vector_stores.pinecone import PineconeVectorStore 16 | from llama_index.core.extractors import ( 17 | TitleExtractor, 18 | # QuestionsAnsweredExtractor, 19 | # SummaryExtractor, 20 | # KeywordExtractor, 21 | ) 22 | from llama_parse import LlamaParse 23 | 24 | load_dotenv() 25 | openai.api_key = os.environ.get("OPENAI_API_KEY") 26 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 27 | llama_parse_api_key = os.environ.get("LLAMA_PARSE_API_KEY") 28 | 29 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview") 30 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large") 31 | 32 | 33 | def get_pinecone_index(pc, index_name): 34 | pinecone_index = pc.Index(index_name) 35 | return pinecone_index 36 | 37 | 38 | def get_pinecone_vector_store(pinecone_index): 39 | vector_store = PineconeVectorStore( 40 | pinecone_index=pinecone_index, 41 | add_sparse_vector=True, 42 | ) 43 | return vector_store 44 | 45 | 46 | def create_pinecone_pod(pc, index_name): 47 | print("Creating pinecone pod") 48 | pc.create_index( 49 | name=index_name, 50 | dimension=3072, 51 | metric="dotproduct", 52 | spec=PodSpec(environment="gcp-starter"), 53 | ) 54 | 55 | 56 | def get_documents(input_dir): 57 | llama_parser = LlamaParse( 58 | api_key=llama_parse_api_key, result_type="markdown", verbose=True 59 | ) 60 | 61 | file_extractor = { 62 | ".pdf": llama_parser, 63 | ".html": UnstructuredReader(), 64 | ".txt": UnstructuredReader(), 65 | } 66 | print("Reading directory") 67 | director_reader = SimpleDirectoryReader( 68 | input_dir=input_dir, file_extractor=file_extractor 69 | ) 70 | print("Starting document reading") 71 | documents = director_reader.load_data(show_progress=True) 72 | return documents 73 | 74 | 75 | def run_pipeline(documents, vector_store, llm, num_workers): 76 | pipeline = IngestionPipeline( 77 | transformations=[ 78 | SentenceSplitter(chunk_size=512, chunk_overlap=126), 79 | TitleExtractor(llm=llm, num_workers=num_workers), 80 | # QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers), 81 | # SummaryExtractor( 82 | # summaries=["prev", "self"], llm=llm, num_workers=num_workers 83 | # ), 84 | # KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers), 85 | OpenAIEmbedding(model=EMBEDDING), 86 | ], 87 | vector_store=vector_store, 88 | ) 89 | for doc in documents: # Small patch to remove last_accessed_date from metadata 90 | k = vars(doc) 91 | del k["metadata"]["last_accessed_date"] 92 | pipeline.run(documents=documents, show_progress=True, num_workers=num_workers) 93 | 94 | 95 | async def main(): 96 | print("Starting ingestion") 97 | input_dir = "./data/source_files/" 98 | index_name = "rag-index" 99 | num_cores = os.cpu_count() 100 | num_workers = min(4, num_cores) 101 | pc = Pinecone(api_key=pinecone_api_key) 102 | parser = argparse.ArgumentParser(description="Process some integers.") 103 | parser.add_argument( 104 | "--gen", 105 | action="store_true", 106 | help="Generate new pinecone index", 107 | ) 108 | args = parser.parse_args() 109 | if args.gen: 110 | create_pinecone_pod(pc, index_name) 111 | llm = OpenAI(temperature=0.1, model=MODEL, max_tokens=1024) 112 | pinecone_index = get_pinecone_index(pc, index_name) 113 | vector_store = get_pinecone_vector_store(pinecone_index) 114 | documents = get_documents(input_dir) 115 | print("Starting ingestion pipeline") 116 | run_pipeline(documents, vector_store, llm, num_workers) 117 | 118 | 119 | if __name__ == "__main__": 120 | asyncio.run(main()) 121 | -------------------------------------------------------------------------------- /3.Reranker - Q.Transformation - Res.Synthesis/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import chainlit as cl 4 | 5 | from pinecone import Pinecone 6 | from llama_index.core import Settings, VectorStoreIndex 7 | from llama_index.llms.openai import OpenAI 8 | from llama_index.embeddings.openai import OpenAIEmbedding 9 | from llama_index.vector_stores.pinecone import PineconeVectorStore 10 | from llama_index.core.response_synthesizers import ResponseMode 11 | from llama_index.postprocessor.cohere_rerank import CohereRerank 12 | from llama_index.core.indices.query.query_transform.base import ( 13 | StepDecomposeQueryTransform, 14 | ) 15 | 16 | openai.api_key = os.environ.get("OPENAI_API_KEY") 17 | cohere_api_key = os.environ.get("COHERE_API_KEY") 18 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 19 | 20 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview") 21 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large") 22 | 23 | print(MODEL) 24 | 25 | 26 | @cl.cache 27 | def load_context(): 28 | Settings.llm = OpenAI(temperature=0.1, model=MODEL, streaming=True) 29 | Settings.embed_model = OpenAIEmbedding(model=EMBEDDING, embed_batch_size=1) 30 | Settings.num_output = 1024 31 | Settings.context_window = 128000 32 | pc = Pinecone(api_key=pinecone_api_key) 33 | pinecone_index = pc.Index("rag-index") 34 | vector_store = PineconeVectorStore( 35 | pinecone_index=pinecone_index, 36 | ) 37 | 38 | index = VectorStoreIndex.from_vector_store( 39 | vector_store=vector_store, 40 | ) 41 | return index 42 | 43 | 44 | @cl.on_chat_start 45 | async def start(): 46 | index = load_context() 47 | 48 | reranker = CohereRerank(api_key=cohere_api_key, top_n=3) 49 | step_decompose_transform = StepDecomposeQueryTransform(llm=MODEL, verbose=True) 50 | 51 | query_engine = index.as_query_engine( 52 | streaming=True, 53 | similarity_top_k=6, 54 | node_postprocessors=[reranker], 55 | vector_store_query_mode="hybrid", 56 | query_transform=step_decompose_transform, 57 | response_synthesizer_mode=ResponseMode.REFINE, 58 | ) 59 | cl.user_session.set("query_engine", query_engine) 60 | 61 | message_history = [] 62 | cl.user_session.set("message_history", message_history) 63 | 64 | await cl.Message( 65 | author="Assistant", content="Hello! Im an AI assistant. How may I help you?" 66 | ).send() 67 | 68 | 69 | async def set_sources(response, response_message): 70 | label_list = [] 71 | count = 1 72 | for sr in response.source_nodes: 73 | elements = [ 74 | cl.Text( 75 | name="S" + str(count), 76 | content=f"{sr.node.text}", 77 | display="side", 78 | size="small", 79 | ) 80 | ] 81 | response_message.elements = elements 82 | label_list.append("S" + str(count)) 83 | await response_message.update() 84 | count += 1 85 | response_message.content += "\n\nSources: " + ", ".join(label_list) 86 | await response_message.update() 87 | 88 | 89 | @cl.on_message 90 | async def main(message: cl.Message): 91 | query_engine = cl.user_session.get("query_engine") 92 | message_history = cl.user_session.get("message_history") 93 | prompt_template = "Previous messages:\n" 94 | 95 | response_message = cl.Message(content="", author="Assistant") 96 | 97 | user_message = message.content 98 | 99 | for message in message_history: 100 | prompt_template += f"{message['author']}: {message['content']}\n" 101 | prompt_template += f"Human: {user_message}" 102 | 103 | response = await cl.make_async(query_engine.query)(prompt_template) 104 | 105 | for token in response.response_gen: 106 | await response_message.stream_token(token) 107 | if response.response_txt: 108 | response_message.content = response.response_txt 109 | await response_message.send() 110 | 111 | message_history.append({"author": "Human", "content": user_message}) 112 | message_history.append({"author": "AI", "content": response_message.content}) 113 | message_history = message_history[-6:] 114 | cl.user_session.set("message_history", message_history) 115 | 116 | if response.source_nodes: 117 | await set_sources(response, response_message) 118 | -------------------------------------------------------------------------------- /3.Reranker - Q.Transformation - Res.Synthesis/requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | cohere 3 | chainlit 4 | llama-index 5 | pinecone-client 6 | 7 | llama-index-vector-stores-pinecone 8 | torch 9 | pypdf 10 | llmsherpa 11 | llama-hub 12 | transformers 13 | llama-index-postprocessor-cohere-rerank -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/.chainlit/config.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Whether to enable telemetry (default: true). No personal data is collected. 3 | enable_telemetry = true 4 | 5 | # List of environment variables to be provided by each user to use the app. 6 | user_env = [] 7 | 8 | # Duration (in seconds) during which the session is saved when the connection is lost 9 | session_timeout = 3600 10 | 11 | # Enable third parties caching (e.g LangChain cache) 12 | cache = false 13 | 14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) 15 | # follow_symlink = false 16 | 17 | [features] 18 | # Show the prompt playground 19 | prompt_playground = true 20 | 21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript) 22 | unsafe_allow_html = false 23 | 24 | # Process and display mathematical expressions. This can clash with "$" characters in messages. 25 | latex = false 26 | 27 | # Authorize users to upload files with messages 28 | multi_modal = false 29 | 30 | # Allows user to use speech to text 31 | [features.speech_to_text] 32 | enabled = false 33 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string 34 | # language = "en-US" 35 | 36 | [UI] 37 | # Name of the app and chatbot. 38 | name = "Chatbot" 39 | 40 | # Show the readme while the thread is empty. 41 | show_readme_as_default = false 42 | 43 | # Description of the app and chatbot. This is used for HTML tags. 44 | # description = "" 45 | 46 | # Large size content are by default collapsed for a cleaner ui 47 | default_collapse_content = true 48 | 49 | # The default value for the expand messages settings. 50 | default_expand_messages = false 51 | 52 | # Hide the chain of thought details from the user in the UI. 53 | hide_cot = false 54 | 55 | # Link to your github repo. This will add a github button in the UI's header. 56 | # github = "" 57 | 58 | # Specify a CSS file that can be used to customize the user interface. 59 | # The CSS file can be served from the public directory or via an external link. 60 | # custom_css = "/public/test.css" 61 | 62 | # Override default MUI light theme. (Check theme.ts) 63 | [UI.theme.light] 64 | #background = "#FAFAFA" 65 | #paper = "#FFFFFF" 66 | 67 | [UI.theme.light.primary] 68 | #main = "#F80061" 69 | #dark = "#980039" 70 | #light = "#FFE7EB" 71 | 72 | # Override default MUI dark theme. (Check theme.ts) 73 | [UI.theme.dark] 74 | #background = "#FAFAFA" 75 | #paper = "#FFFFFF" 76 | 77 | [UI.theme.dark.primary] 78 | #main = "#F80061" 79 | #dark = "#980039" 80 | #light = "#FFE7EB" 81 | 82 | 83 | [meta] 84 | generated_by = "1.0.101" 85 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/.chainlit/translations/en-US.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Settings", 8 | "settingsKey": "S", 9 | "APIKeys": "API Keys", 10 | "logout": "Logout" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "New Chat" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Task List", 22 | "loading": "Loading...", 23 | "error": "An error occured" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancel upload", 28 | "removeAttachment": "Remove attachment" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Create new chat?", 32 | "clearChat": "This will clear the current messages and start a new chat.", 33 | "cancel": "Cancel", 34 | "confirm": "Confirm" 35 | }, 36 | "settingsModal": { 37 | "expandMessages": "Expand Messages", 38 | "hideChainOfThought": "Hide Chain of Thought", 39 | "darkMode": "Dark Mode" 40 | } 41 | }, 42 | "organisms": { 43 | "chat": { 44 | "history": { 45 | "index": { 46 | "lastInputs": "Last Inputs", 47 | "noInputs": "Such empty...", 48 | "loading": "Loading..." 49 | } 50 | }, 51 | "inputBox": { 52 | "input": { 53 | "placeholder": "Type your message here..." 54 | }, 55 | "speechButton": { 56 | "start": "Start recording", 57 | "stop": "Stop recording" 58 | }, 59 | "SubmitButton": { 60 | "sendMessage": "Send message", 61 | "stopTask": "Stop Task" 62 | }, 63 | "UploadButton": { 64 | "attachFiles": "Attach files" 65 | }, 66 | "waterMark": { 67 | "text": "Built with" 68 | } 69 | }, 70 | "Messages": { 71 | "index": { 72 | "running": "Running", 73 | "executedSuccessfully": "executed successfully", 74 | "failed": "failed", 75 | "feedbackUpdated": "Feedback updated", 76 | "updating": "Updating" 77 | } 78 | }, 79 | "dropScreen": { 80 | "dropYourFilesHere": "Drop your files here" 81 | }, 82 | "index": { 83 | "failedToUpload": "Failed to upload", 84 | "cancelledUploadOf": "Cancelled upload of", 85 | "couldNotReachServer": "Could not reach the server", 86 | "continuingChat": "Continuing previous chat" 87 | }, 88 | "settings": { 89 | "settingsPanel": "Settings panel", 90 | "reset": "Reset", 91 | "cancel": "Cancel", 92 | "confirm": "Confirm" 93 | } 94 | }, 95 | "threadHistory": { 96 | "sidebar": { 97 | "filters": { 98 | "FeedbackSelect": { 99 | "feedbackAll": "Feedback: All", 100 | "feedbackPositive": "Feedback: Positive", 101 | "feedbackNegative": "Feedback: Negative" 102 | }, 103 | "SearchBar": { 104 | "search": "Search" 105 | } 106 | }, 107 | "DeleteThreadButton": { 108 | "confirmMessage": "This will delete the thread as well as it's messages and elements.", 109 | "cancel": "Cancel", 110 | "confirm": "Confirm", 111 | "deletingChat": "Deleting chat", 112 | "chatDeleted": "Chat deleted" 113 | }, 114 | "index": { 115 | "pastChats": "Past Chats" 116 | }, 117 | "ThreadList": { 118 | "empty": "Empty..." 119 | }, 120 | "TriggerButton": { 121 | "closeSidebar": "Close sidebar", 122 | "openSidebar": "Open sidebar" 123 | } 124 | }, 125 | "Thread": { 126 | "backToChat": "Go back to chat", 127 | "chatCreatedOn": "This chat was created on" 128 | } 129 | }, 130 | "header": { 131 | "chat": "Chat", 132 | "readme": "Readme" 133 | } 134 | } 135 | }, 136 | "hooks": { 137 | "useLLMProviders": { 138 | "failedToFetchProviders": "Failed to fetch providers:" 139 | } 140 | }, 141 | "pages": { 142 | "Design": {}, 143 | "Env": { 144 | "savedSuccessfully": "Saved successfully", 145 | "requiredApiKeys": "Required API Keys", 146 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage." 147 | }, 148 | "Page": { 149 | "notPartOfProject": "You are not part of this project." 150 | }, 151 | "ResumeButton": { 152 | "resumeChat": "Resume Chat" 153 | } 154 | } 155 | } -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/.chainlit/translations/pt-BR.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Configura\u00e7\u00f5es", 8 | "settingsKey": "S", 9 | "APIKeys": "Chaves de API", 10 | "logout": "Sair" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "Nova Conversa" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas", 22 | "loading": "Carregando...", 23 | "error": "Ocorreu um erro" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancelar envio", 28 | "removeAttachment": "Remover anexo" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Criar novo chat?", 32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.", 33 | "cancel": "Cancelar", 34 | "confirm": "Confirmar" 35 | }, 36 | "settingsModal": { 37 | "expandMessages": "Expandir Mensagens", 38 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento", 39 | "darkMode": "Modo Escuro" 40 | } 41 | }, 42 | "organisms": { 43 | "chat": { 44 | "history": { 45 | "index": { 46 | "lastInputs": "\u00daltimas Entradas", 47 | "noInputs": "Vazio...", 48 | "loading": "Carregando..." 49 | } 50 | }, 51 | "inputBox": { 52 | "input": { 53 | "placeholder": "Digite sua mensagem aqui..." 54 | }, 55 | "speechButton": { 56 | "start": "Iniciar grava\u00e7\u00e3o", 57 | "stop": "Parar grava\u00e7\u00e3o" 58 | }, 59 | "SubmitButton": { 60 | "sendMessage": "Enviar mensagem", 61 | "stopTask": "Parar Tarefa" 62 | }, 63 | "UploadButton": { 64 | "attachFiles": "Anexar arquivos" 65 | }, 66 | "waterMark": { 67 | "text": "Constru\u00eddo com" 68 | } 69 | }, 70 | "Messages": { 71 | "index": { 72 | "running": "Executando", 73 | "executedSuccessfully": "executado com sucesso", 74 | "failed": "falhou", 75 | "feedbackUpdated": "Feedback atualizado", 76 | "updating": "Atualizando" 77 | } 78 | }, 79 | "dropScreen": { 80 | "dropYourFilesHere": "Solte seus arquivos aqui" 81 | }, 82 | "index": { 83 | "failedToUpload": "Falha ao enviar", 84 | "cancelledUploadOf": "Envio cancelado de", 85 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor", 86 | "continuingChat": "Continuando o chat anterior" 87 | }, 88 | "settings": { 89 | "settingsPanel": "Painel de Configura\u00e7\u00f5es", 90 | "reset": "Redefinir", 91 | "cancel": "Cancelar", 92 | "confirm": "Confirmar" 93 | } 94 | }, 95 | "threadHistory": { 96 | "sidebar": { 97 | "filters": { 98 | "FeedbackSelect": { 99 | "feedbackAll": "Feedback: Todos", 100 | "feedbackPositive": "Feedback: Positivo", 101 | "feedbackNegative": "Feedback: Negativo" 102 | }, 103 | "SearchBar": { 104 | "search": "Buscar" 105 | } 106 | }, 107 | "DeleteThreadButton": { 108 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.", 109 | "cancel": "Cancelar", 110 | "confirm": "Confirmar", 111 | "deletingChat": "Deletando conversa", 112 | "chatDeleted": "Conversa deletada" 113 | }, 114 | "index": { 115 | "pastChats": "Conversas Anteriores" 116 | }, 117 | "ThreadList": { 118 | "empty": "Vazio..." 119 | }, 120 | "TriggerButton": { 121 | "closeSidebar": "Fechar barra lateral", 122 | "openSidebar": "Abrir barra lateral" 123 | } 124 | }, 125 | "Thread": { 126 | "backToChat": "Voltar para a conversa", 127 | "chatCreatedOn": "Esta conversa foi criada em" 128 | } 129 | }, 130 | "header": { 131 | "chat": "Conversa", 132 | "readme": "Leia-me" 133 | } 134 | }, 135 | "hooks": { 136 | "useLLMProviders": { 137 | "failedToFetchProviders": "Falha ao buscar provedores:" 138 | } 139 | }, 140 | "pages": { 141 | "Design": {}, 142 | "Env": { 143 | "savedSuccessfully": "Salvo com sucesso", 144 | "requiredApiKeys": "Chaves de API necess\u00e1rias", 145 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo." 146 | }, 147 | "Page": { 148 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto." 149 | }, 150 | "ResumeButton": { 151 | "resumeChat": "Continuar Conversa" 152 | } 153 | } 154 | } 155 | } -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="sk-..." 2 | PINECONE_API_KEY= 3 | COHERE_API_KEY= 4 | 5 | MODEL= 6 | EMBEDDING= -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/README.md: -------------------------------------------------------------------------------- 1 | # Evaluation, generation and optimization 2 | 3 | > [!IMPORTANT] 4 | > Optimization is untested and likely unfinished. 5 | 6 | ## Table of Contents 7 | 8 | 1. [Installation Instructions](#installation-instructions) 9 | 2. [Usage](#usage) 10 | 3. [Generation](#generation) 11 | 4. [Evaluation](#evaluation) 12 | 10. [Testing](#tested) 13 | 14 | ## Installation Instructions 15 | 16 | Follow these steps to set up the GPT Documents chatbot on your local machine: 17 | 18 | 1. Create a conda environment: 19 | 20 | ```shell 21 | conda create -n rag python==3.11 -y && source activate rag 22 | ``` 23 | 24 | 2. Install the required dependencies: 25 | 26 | ```shell 27 | pip install -r requirements.txt 28 | ``` 29 | 30 | 3. Load your documents into the vector store by: 31 | - Create a folder named 'data'. 32 | - Place your documents inside the 'data' folder. 33 | - Execute the 'ingest.py' script to initiate the loading process. 34 | 35 | ## Usage 36 | 37 | Once the setup is complete, launch the chainlit app using the following command: 38 | 39 | ```shell 40 | chainlit run -w main.py 41 | ``` 42 | 43 | Feel free to explore the functionalities and contribute to the development of this project. Your feedback and contributions are highly appreciated! 44 | 45 | ## Generation 46 | 47 | ### Why is it important? 48 | 49 | The creation of a dataset is crucial for developing and refining a RAG. These systems combine the capabilities of information retrieval and generative language models to provide answers that are both relevant and contextually accurate. By generating and using a labeled dataset, we can train, test, and improve the RAG models more effectively, ensuring that they provide high-quality, contextually relevant answers based on the information retrieved. 50 | 51 | ### What are we generating? 52 | 53 | We are generating a `LabelledRagDataset`. This dataset is designed to train and test Retriever-Augmented Generation systems. You can generate this dataset by hand or with the help of a large language model (LLM), such as GPT-4. For the purposes of this example, we are generating it with `gpt-4`. 54 | 55 | The dataset consists of the following structured data: 56 | 57 | ```json 58 | { 59 | "query": "Query", 60 | "query_by": { 61 | "model_name": "gpt-4", 62 | "type": "ai" 63 | }, 64 | "reference_contexts": [ 65 | "context_1", 66 | "context_2" 67 | ], 68 | "reference_answer": "answer", 69 | "reference_answer_by": { 70 | "model_name": "gpt-4", 71 | "type": "ai" 72 | } 73 | }, 74 | ``` 75 | 76 | Each entry in the dataset includes: 77 | 78 | - `query`: The question or prompt that the RAG system needs to respond to. 79 | - `query_by`: Information about how or by whom the query was generated, it can be a model or a person. 80 | - `reference_contexts`: An array of texts that provide relevant information or context to the query. These are the pieces of information that the retriever component is expected to fetch, which will aid the generator in crafting a coherent and contextually appropriate response. 81 | - `reference_answer`: The correct or expected answer to the query, which will be used as the ground truth for evaluating the RAG system's performance. 82 | - `reference_answer_by`: Information about how or by whom the reference answer was generated. This could be a human annotator or an AI model. 83 | 84 | This structure allows for the comprehensive training and evaluation of RAG systems, ensuring they can effectively retrieve relevant information and generate appropriate responses. 85 | 86 | ### How do we implement it? 87 | 88 | The implementation is super easy thanks to LlamaIndex. 89 | 90 | ```python 91 | dataset_generator = RagDatasetGenerator.from_documents( 92 | documents, 93 | llm=llm, 94 | num_questions_per_chunk=1, 95 | show_progress=True, 96 | ) 97 | 98 | rag_dataset = dataset_generator.generate_dataset_from_nodes() 99 | ``` 100 | 101 | ## Evaluation 102 | 103 | ### Why is evaluation important? 104 | 105 | Evaluation is crucial for measuring the performance and guiding the improvement of Retriever-Augmented Generation (RAG) systems. It ensures that these systems produce accurate, relevant, and contextually appropriate responses. By evaluating various metrics, we can identify areas of strength and weakness, benchmark against other systems, and refine our approach for better outcomes. 106 | 107 | We assess the following metrics: 108 | 109 | - **Mean Correctness Score**: Accuracy of the generated answers. 110 | - **Mean Relevancy Score**: Relevance of the retrieved documents to the query. 111 | - **Mean Faithfulness Score**: Adherence of the responses to the retrieved information. 112 | - **Mean Context Similarity Score**: Similarity of the responses to the query and document context. 113 | 114 | These metrics collectively help ensure the RAG system meets quality standards and user needs. 115 | 116 | ### Notes 117 | 118 | In the process of adjusting my pipeline within `main.py`, several modifications were necessary. Notably, due to the limitations of using a trial API key for Cohere, I was unable to perform evaluations as initially intended. This constraint has influenced the functionality of the RAG, rendering my current results less applicable. If you have access to a full API key and can conduct the evaluation, I encourage you to share your findings through a pull request. 119 | 120 | Additionally, I disabled streaming functionality to accommodate these changes. This decision may affect the overall operation and efficiency of the system. 121 | 122 | I acknowledge that my current implementation might not be optimal. I am planning to revisit and refine it to enhance performance and reliability. Your feedback and contributions are highly welcomed to help improve this project. 123 | 124 | --- 125 | 126 | ## Testing 127 | | Tested | Function | Last Time Tested | Notes | 128 | |:-------------|:----------------|:-----------------|:---------------------------| 129 | | ✅ | Generation | 2023-03-14 | | 130 | | ✅ | Evaluation | 2023-03-14 | Had to change some things in main.py, this is the [notes](#notes) above. | 131 | | ❌ | Optimization | Untested | | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/benchmark.csv: -------------------------------------------------------------------------------- 1 | base_rag 2 | 2.6136363636363638 3 | 0.0 4 | 0.11363636363636363 5 | 0.7192798337295863 6 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/chainlit.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/4.Evaluation - Generation - Optimization/chainlit.md -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import asyncio 4 | 5 | from llama_index.core import Settings 6 | from llama_index.llms.openai import OpenAI 7 | from llama_index.core.llama_pack import download_llama_pack 8 | from llama_index.core.llama_dataset import LabelledRagDataset 9 | from main import load_query_engine, load_index 10 | 11 | openai.api_key = os.getenv("OPENAI_API_KEY") 12 | model = os.getenv("MODEL", "gpt-4-0125-preview") 13 | print(f"model = {model}") 14 | Settings.model = OpenAI(model=model) 15 | 16 | 17 | async def evaluate(): 18 | rag_dataset = LabelledRagDataset.from_json("./data/rag_dataset.json") 19 | print("Rag dataset loaded") 20 | index = load_index() 21 | print("Index loaded") 22 | query_engine = load_query_engine(index) 23 | print("Query engine loaded") 24 | RagEvaluatorPack = download_llama_pack("RagEvaluatorPack", "./rag_evaluator_pack") 25 | print("RagEvaluatorPack downloaded") 26 | rag_evaluator_pack = RagEvaluatorPack( 27 | rag_dataset=rag_dataset, query_engine=query_engine 28 | ) 29 | print("RagEvaluatorPack created") 30 | ############################################################################ 31 | # NOTE: If have a lower tier subscription for OpenAI API like Usage Tier 1 # 32 | # then you'll need to use different batch_size and sleep_time_in_seconds. # 33 | # For Usage Tier 1, settings that seemed to work well were batch_size=5, # 34 | # and sleep_time_in_seconds=15 (as of December 2023.) # 35 | ############################################################################ 36 | benchmark_df = await rag_evaluator_pack.arun( 37 | batch_size=20, # batches the number of openai api calls to make 38 | sleep_time_in_seconds=1, # seconds to sleep before making an api call 39 | ) 40 | print("Benchmarking complete") 41 | benchmark_df.to_csv("benchmark.csv", index=True) 42 | 43 | 44 | if __name__ == "__main__": 45 | asyncio.run(evaluate()) 46 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/generation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | 4 | from llama_index.core import SimpleDirectoryReader 5 | from llama_index.llms.openai import OpenAI 6 | from llama_index.core.llama_dataset.generator import RagDatasetGenerator 7 | 8 | 9 | openai.api_key = os.environ.get("OPENAI_API_KEY") 10 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview") 11 | print(f"model = {MODEL}") 12 | 13 | 14 | def get_documents(input_dir): 15 | documents = SimpleDirectoryReader(input_dir).load_data(show_progress=True) 16 | return documents 17 | 18 | 19 | def generate_dataset(documents): 20 | llm = OpenAI(model=MODEL, temperature=0.1) 21 | 22 | dataset_generator = RagDatasetGenerator.from_documents( 23 | documents, 24 | llm=llm, 25 | num_questions_per_chunk=1, 26 | show_progress=True, 27 | ) 28 | 29 | rag_dataset = dataset_generator.generate_dataset_from_nodes() 30 | return rag_dataset 31 | 32 | 33 | def main(): 34 | input_dir = "./data/source_files/" 35 | documents = get_documents(input_dir) 36 | rag_dataset = generate_dataset(documents) 37 | rag_dataset.save_json("./output/rag_dataset.json") 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/ingest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import asyncio 4 | import argparse 5 | 6 | from dotenv import load_dotenv 7 | from pinecone import Pinecone, PodSpec 8 | 9 | from llama_index.core import SimpleDirectoryReader, download_loader 10 | from llama_index.llms.openai import OpenAI 11 | from llama_index.embeddings.openai import OpenAIEmbedding 12 | from llama_index.core.ingestion import IngestionPipeline 13 | from llama_index.vector_stores.pinecone import PineconeVectorStore 14 | from llama_index.core.extractors import ( 15 | TitleExtractor, 16 | # QuestionsAnsweredExtractor, 17 | # SummaryExtractor, 18 | # KeywordExtractor, 19 | ) 20 | from llama_index.core.node_parser import SentenceSplitter 21 | from llama_parse import LlamaParse 22 | 23 | load_dotenv() 24 | openai.api_key = os.environ.get("OPENAI_API_KEY") 25 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 26 | llama_parse_api_key = os.environ.get("LLAMA_PARSE_API_KEY") 27 | 28 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview") 29 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large") 30 | 31 | 32 | def get_pinecone_index(pc, index_name): 33 | pinecone_index = pc.Index(index_name) 34 | return pinecone_index 35 | 36 | 37 | def get_pinecone_vector_store(pinecone_index): 38 | vector_store = PineconeVectorStore( 39 | pinecone_index=pinecone_index, 40 | add_sparse_vector=True, 41 | ) 42 | return vector_store 43 | 44 | 45 | def create_pinecone_pod(pc, index_name): 46 | print("Creating pinecone pod") 47 | pc.create_index( 48 | name=index_name, 49 | dimension=3072, 50 | metric="dotproduct", 51 | spec=PodSpec(environment="gcp-starter"), 52 | ) 53 | 54 | 55 | def get_documents(input_dir): 56 | llama_parser = LlamaParse( 57 | api_key=llama_parse_api_key, result_type="markdown", verbose=True 58 | ) 59 | 60 | UnstructuredReader = download_loader("UnstructuredReader") 61 | 62 | file_extractor = { 63 | ".pdf": llama_parser, 64 | ".html": UnstructuredReader(), 65 | ".txt": UnstructuredReader(), 66 | } 67 | print("Reading directory") 68 | director_reader = SimpleDirectoryReader( 69 | input_dir=input_dir, file_extractor=file_extractor 70 | ) 71 | print("Starting document reading") 72 | documents = director_reader.load_data(show_progress=True) 73 | return documents 74 | 75 | 76 | def run_pipeline(documents, vector_store, llm, num_workers): 77 | pipeline = IngestionPipeline( 78 | transformations=[ 79 | SentenceSplitter(chunk_size=512, chunk_overlap=126), 80 | TitleExtractor(llm=llm, num_workers=num_workers), 81 | # QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers), 82 | # SummaryExtractor( 83 | # summaries=["prev", "self"], llm=llm, num_workers=num_workers 84 | # ), 85 | # KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers), 86 | OpenAIEmbedding(model=EMBEDDING), 87 | ], 88 | vector_store=vector_store, 89 | ) 90 | for doc in documents: # Small patch to remove last_accessed_date from metadata 91 | k = vars(doc) 92 | del k["metadata"]["last_accessed_date"] 93 | pipeline.run(documents=documents, show_progress=True, num_workers=num_workers) 94 | 95 | 96 | async def main(): 97 | print("Starting ingestion") 98 | input_dir = "./data/source_files/" 99 | index_name = "rag-index" 100 | num_cores = os.cpu_count() 101 | num_workers = min(4, num_cores) 102 | pc = Pinecone(api_key=pinecone_api_key) 103 | parser = argparse.ArgumentParser(description="Process some integers.") 104 | parser.add_argument( 105 | "--gen", 106 | action="store_true", 107 | help="Generate new pinecone index", 108 | ) 109 | args = parser.parse_args() 110 | if args.gen: 111 | create_pinecone_pod(pc, index_name) 112 | llm = OpenAI(temperature=0.1, model=MODEL, max_tokens=1024) 113 | pinecone_index = get_pinecone_index(pc, index_name) 114 | vector_store = get_pinecone_vector_store(pinecone_index) 115 | documents = get_documents(input_dir) 116 | print("Starting ingestion pipeline") 117 | run_pipeline(documents, vector_store, llm, num_workers) 118 | 119 | 120 | if __name__ == "__main__": 121 | asyncio.run(main()) 122 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import chainlit as cl 4 | 5 | from pinecone import Pinecone 6 | from llama_index.core import Settings, VectorStoreIndex 7 | from llama_index.llms.openai import OpenAI 8 | from llama_index.embeddings.openai import OpenAIEmbedding 9 | from llama_index.vector_stores.pinecone import PineconeVectorStore 10 | from llama_index.core.response_synthesizers import ResponseMode 11 | 12 | # from llama_index.postprocessor.cohere_rerank import CohereRerank 13 | from llama_index.core.indices.query.query_transform.base import ( 14 | StepDecomposeQueryTransform, 15 | ) 16 | 17 | openai.api_key = os.environ.get("OPENAI_API_KEY") 18 | cohere_api_key = os.environ.get("COHERE_API_KEY") 19 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 20 | 21 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview") 22 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large") 23 | 24 | 25 | @cl.cache 26 | def load_index(): 27 | Settings.llm = OpenAI( 28 | temperature=0.1, 29 | model=MODEL, # streaming=True 30 | ) 31 | Settings.embed_model = OpenAIEmbedding(model=EMBEDDING, embed_batch_size=1) 32 | Settings.num_output = 1024 33 | Settings.context_window = 128000 34 | pc = Pinecone(api_key=pinecone_api_key) 35 | pinecone_index = pc.Index("pinecone-index") 36 | vector_store = PineconeVectorStore( 37 | pinecone_index=pinecone_index, 38 | ) 39 | 40 | index = VectorStoreIndex.from_vector_store( 41 | vector_store=vector_store, 42 | ) 43 | return index 44 | 45 | 46 | @cl.cache 47 | def load_query_engine(index): 48 | # reranker = CohereRerank(api_key=cohere_api_key, top_n=3) 49 | step_decompose_transform = StepDecomposeQueryTransform(llm=MODEL, verbose=True) 50 | 51 | query_engine = index.as_query_engine( 52 | # streaming=True, 53 | similarity_top_k=6, 54 | # node_postprocessors=[reranker], # Reranker would require a non Trial key for evaluation. 55 | vector_store_query_mode="hybrid", 56 | query_transform=step_decompose_transform, 57 | response_synthesizer_mode=ResponseMode.REFINE, 58 | ) 59 | return query_engine 60 | 61 | 62 | @cl.on_chat_start 63 | async def start(): 64 | index = load_index() 65 | query_engine = load_query_engine(index) 66 | 67 | cl.user_session.set("query_engine", query_engine) 68 | 69 | message_history = [] 70 | cl.user_session.set("message_history", message_history) 71 | 72 | await cl.Message( 73 | author="Assistant", content="Hello! Im an AI assistant. How may I help you?" 74 | ).send() 75 | 76 | 77 | @cl.on_message 78 | async def main(message: cl.Message): 79 | query_engine = cl.user_session.get("query_engine") 80 | message_history = cl.user_session.get("message_history") 81 | prompt_template = "Previous messages:\n" 82 | 83 | response_message = cl.Message(content="", author="Assistant") 84 | 85 | user_message = message.content 86 | 87 | for message in message_history: 88 | prompt_template += f"{message['author']}: {message['content']}\n" 89 | prompt_template += f"Human: {user_message}" 90 | 91 | response = await cl.make_async(query_engine.query)(prompt_template) 92 | 93 | for token in response.response_gen: 94 | await response_message.stream_token(token) 95 | if response.response_txt: 96 | response_message.content = response.response_txt 97 | await response_message.send() 98 | 99 | message_history.append({"author": "Human", "content": user_message}) 100 | message_history.append({"author": "AI", "content": response_message.content}) 101 | message_history = message_history[-6:] 102 | cl.user_session.set("message_history", message_history) 103 | 104 | label_list = [] 105 | count = 1 106 | 107 | for sr in response.source_nodes: 108 | elements = [ 109 | cl.Text( 110 | name="S" + str(count), 111 | content=f"{sr.node.text}", 112 | display="side", 113 | size="small", 114 | ) 115 | ] 116 | response_message.elements = elements 117 | label_list.append("S" + str(count)) 118 | await response_message.update() 119 | count += 1 120 | response_message.content += "\n\nSources: " + ", ".join(label_list) 121 | await response_message.update() 122 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/optimization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from pathlib import Path 4 | 5 | 6 | from llama_index.core import ( 7 | Document, 8 | StorageContext, 9 | VectorStoreIndex, 10 | load_index_from_storage, 11 | ) 12 | from llama_index.core.evaluation import ( 13 | SemanticSimilarityEvaluator, 14 | BatchEvalRunner, 15 | ) 16 | 17 | # from llama_index.llms.openai import OpenAI 18 | from llama_index.readers.file import PDFReader 19 | from llama_index.core.evaluation import QueryResponseDataset 20 | from llama_index.core.node_parser import SimpleNodeParser 21 | from llama_index.embeddings.openai import OpenAIEmbedding 22 | from llama_index.core.param_tuner.base import RunResult 23 | from llama_index.experimental.param_tuner import RayTuneParamTuner 24 | from llama_index.core.evaluation.eval_utils import get_responses 25 | 26 | loader = PDFReader() 27 | docs0 = loader.load_data(file=Path("./data/llama2.pdf")) 28 | 29 | doc_text = "\n\n".join([d.get_content() for d in docs0]) 30 | docs = [Document(text=doc_text)] 31 | 32 | ###################### 33 | ###### Chnage this to work with a datageneration form Llabelleddata 34 | ###################### 35 | eval_dataset = QueryResponseDataset.from_json("data/llama2_eval_qr_dataset.json") 36 | eval_qs = eval_dataset.questions 37 | ref_response_strs = [r for (_, r) in eval_dataset.qr_pairs] 38 | 39 | 40 | def _build_index(chunk_size, docs): 41 | index_out_path = f"./storage_{chunk_size}" 42 | if not os.path.exists(index_out_path): 43 | Path(index_out_path).mkdir(parents=True, exist_ok=True) 44 | # parse docs 45 | node_parser = SimpleNodeParser.from_defaults(chunk_size=chunk_size) 46 | base_nodes = node_parser.get_nodes_from_documents(docs) 47 | 48 | # build index 49 | index = VectorStoreIndex(base_nodes) 50 | # save index to disk 51 | index.storage_context.persist(index_out_path) 52 | else: 53 | # rebuild storage context 54 | storage_context = StorageContext.from_defaults(persist_dir=index_out_path) 55 | # load index 56 | index = load_index_from_storage( 57 | storage_context, 58 | ) 59 | return index 60 | 61 | 62 | def _get_eval_batch_runner(): 63 | evaluator_s = SemanticSimilarityEvaluator(embed_model=OpenAIEmbedding()) 64 | eval_batch_runner = BatchEvalRunner( 65 | {"semantic_similarity": evaluator_s}, workers=2, show_progress=True 66 | ) 67 | 68 | return eval_batch_runner 69 | 70 | 71 | def objective_function(params_dict): 72 | chunk_size = params_dict["chunk_size"] 73 | docs = params_dict["docs"] 74 | top_k = params_dict["top_k"] 75 | eval_qs = params_dict["eval_qs"] 76 | ref_response_strs = params_dict["ref_response_strs"] 77 | 78 | # build index 79 | index = _build_index(chunk_size, docs) 80 | 81 | # query engine 82 | query_engine = index.as_query_engine(similarity_top_k=top_k) 83 | 84 | # get predicted responses 85 | pred_response_objs = get_responses(eval_qs, query_engine, show_progress=True) 86 | 87 | # run evaluator 88 | # NOTE: can uncomment other evaluators 89 | eval_batch_runner = _get_eval_batch_runner() 90 | eval_results = eval_batch_runner.evaluate_responses( 91 | eval_qs, responses=pred_response_objs, reference=ref_response_strs 92 | ) 93 | 94 | # get semantic similarity metric 95 | mean_score = np.array([r.score for r in eval_results["semantic_similarity"]]).mean() 96 | 97 | return RunResult(score=mean_score, params=params_dict) 98 | 99 | 100 | def param_tuner(): 101 | param_dict = {"chunk_size": [256, 512, 1024], "top_k": [1, 2, 5]} 102 | fixed_param_dict = { 103 | "docs": docs, 104 | "eval_qs": eval_qs[:10], 105 | "ref_response_strs": ref_response_strs[:10], 106 | } 107 | 108 | param_tuner = RayTuneParamTuner( 109 | param_fn=objective_function, 110 | param_dict=param_dict, 111 | fixed_param_dict=fixed_param_dict, 112 | run_config_dict={"storage_path": "/tmp/custom/ray_tune", "name": "my_exp"}, 113 | ) 114 | results = param_tuner.tune() 115 | return results 116 | 117 | 118 | def print_results(results): 119 | best_result = results.best_run_result 120 | 121 | best_top_k = results.best_run_result.params["top_k"] 122 | best_chunk_size = results.best_run_result.params["chunk_size"] 123 | print(f"Score: {best_result.score}") 124 | print(f"Top-k: {best_top_k}") 125 | print(f"Chunk size: {best_chunk_size}") 126 | 127 | 128 | def main(): 129 | results = param_tuner() 130 | print_results(results) 131 | 132 | 133 | if __name__ == "__main__": 134 | main() 135 | 136 | # NOT TESTED YET 137 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/rag_evaluator_pack/README.md: -------------------------------------------------------------------------------- 1 | DO NOT DELETE 2 | This readme file is needed to install from pyproject.toml. -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/rag_evaluator_pack/llama_index/packs/rag_evaluator/BUILD: -------------------------------------------------------------------------------- 1 | python_sources() 2 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/rag_evaluator_pack/llama_index/packs/rag_evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | from llama_index.packs.rag_evaluator.base import RagEvaluatorPack 2 | 3 | __all__ = ["RagEvaluatorPack"] 4 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/rag_evaluator_pack/llama_index/packs/rag_evaluator/base.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import time 4 | import warnings 5 | from collections import deque 6 | from typing import Any, List, Optional 7 | 8 | import pandas as pd 9 | import tqdm 10 | from llama_index.core.evaluation import ( 11 | CorrectnessEvaluator, 12 | EvaluationResult, 13 | FaithfulnessEvaluator, 14 | RelevancyEvaluator, 15 | SemanticSimilarityEvaluator, 16 | ) 17 | from llama_index.core.evaluation.notebook_utils import ( 18 | get_eval_results_df, 19 | ) 20 | from llama_index.core.llama_dataset import BaseLlamaDataset, BaseLlamaPredictionDataset 21 | from llama_index.core.llama_pack.base import BaseLlamaPack 22 | from llama_index.core.llms import LLM 23 | from llama_index.core.query_engine import BaseQueryEngine 24 | from llama_index.embeddings.openai import OpenAIEmbedding 25 | from llama_index.llms.openai import OpenAI 26 | from openai import RateLimitError 27 | from tqdm.asyncio import tqdm_asyncio 28 | 29 | 30 | class RagEvaluatorPack(BaseLlamaPack): 31 | """A pack for performing evaluation with your own RAG pipeline. 32 | 33 | Args: 34 | query_engine: The RAG pipeline to evaluate. 35 | rag_dataset: The BaseLlamaDataset to evaluate on. 36 | judge_llm: The LLM to use as the evaluator. 37 | """ 38 | 39 | def __init__( 40 | self, 41 | query_engine: BaseQueryEngine, 42 | rag_dataset: BaseLlamaDataset, 43 | judge_llm: Optional[LLM] = None, 44 | show_progress: bool = True, 45 | ): 46 | self.query_engine = query_engine 47 | self.rag_dataset = rag_dataset 48 | self._num_examples = len(self.rag_dataset.examples) 49 | if judge_llm is None: 50 | self.judge_llm = OpenAI(temperature=0, model="gpt-4-1106-preview") 51 | else: 52 | assert isinstance(judge_llm, LLM) 53 | self.judge_llm = judge_llm 54 | self.show_progress = show_progress 55 | self.evals = { 56 | "correctness": [], 57 | "relevancy": [], 58 | "faithfulness": [], 59 | "context_similarity": [], 60 | } 61 | self.eval_queue = deque(range(len(rag_dataset.examples))) 62 | self.prediction_dataset = None 63 | 64 | async def _amake_predictions( 65 | self, 66 | batch_size: int = 20, 67 | sleep_time_in_seconds: int = 1, 68 | ): 69 | """Async make predictions with query engine.""" 70 | self.prediction_dataset: BaseLlamaPredictionDataset = ( 71 | await self.rag_dataset.amake_predictions_with( 72 | self.query_engine, 73 | show_progress=self.show_progress, 74 | batch_size=batch_size, 75 | sleep_time_in_seconds=sleep_time_in_seconds, 76 | ) 77 | ) 78 | 79 | def _make_predictions( 80 | self, 81 | batch_size: int = 20, 82 | sleep_time_in_seconds: int = 1, 83 | ): 84 | """Sync make predictions with query engine.""" 85 | self.prediction_dataset: BaseLlamaPredictionDataset = ( 86 | self.rag_dataset.make_predictions_with( 87 | self.query_engine, 88 | show_progress=self.show_progress, 89 | batch_size=batch_size, 90 | sleep_time_in_seconds=sleep_time_in_seconds, 91 | ) 92 | ) 93 | 94 | def _prepare_judges(self): 95 | """Construct the evaluators.""" 96 | judges = {} 97 | judges["correctness"] = CorrectnessEvaluator( 98 | llm=self.judge_llm, 99 | ) 100 | judges["relevancy"] = RelevancyEvaluator( 101 | llm=self.judge_llm, 102 | ) 103 | judges["faithfulness"] = FaithfulnessEvaluator( 104 | llm=self.judge_llm, 105 | ) 106 | judges["semantic_similarity"] = SemanticSimilarityEvaluator( 107 | embed_model=OpenAIEmbedding() 108 | ) 109 | return judges 110 | 111 | async def _areturn_null_eval_result(self, query) -> EvaluationResult: 112 | """A dummy async method that returns None. 113 | 114 | NOTE: this is used to handle case when creating async tasks for evaluating 115 | predictions where contexts do not exist. 116 | """ 117 | return EvaluationResult( 118 | query=query, 119 | ) 120 | 121 | def _return_null_eval_result(self, query) -> EvaluationResult: 122 | """A dummy async method that returns None. 123 | 124 | NOTE: this is used to handle case when creating async tasks for evaluating 125 | predictions where contexts do not exist. 126 | """ 127 | return EvaluationResult( 128 | query=query, 129 | ) 130 | 131 | def _create_async_evaluate_example_prediction_tasks( 132 | self, judges, example, prediction, sleep_time_in_seconds 133 | ): 134 | """Collect the co-routines.""" 135 | correctness_task = judges["correctness"].aevaluate( 136 | query=example.query, 137 | response=prediction.response, 138 | reference=example.reference_answer, 139 | sleep_time_in_seconds=sleep_time_in_seconds, 140 | ) 141 | 142 | relevancy_task = judges["relevancy"].aevaluate( 143 | query=example.query, 144 | response=prediction.response, 145 | contexts=prediction.contexts, 146 | sleep_time_in_seconds=sleep_time_in_seconds, 147 | ) 148 | 149 | faithfulness_task = judges["faithfulness"].aevaluate( 150 | query=example.query, 151 | response=prediction.response, 152 | contexts=prediction.contexts, 153 | sleep_time_in_seconds=sleep_time_in_seconds, 154 | ) 155 | 156 | if example.reference_contexts and prediction.contexts: 157 | semantic_similarity_task = judges["semantic_similarity"].aevaluate( 158 | query=example.query, 159 | response="\n".join(prediction.contexts), 160 | reference="\n".join(example.reference_contexts), 161 | ) 162 | else: 163 | semantic_similarity_task = self._areturn_null_eval_result( 164 | query=example.query 165 | ) 166 | 167 | return ( 168 | correctness_task, 169 | relevancy_task, 170 | faithfulness_task, 171 | semantic_similarity_task, 172 | ) 173 | 174 | def _evaluate_example_prediction(self, judges, example, prediction): 175 | """Collect the co-routines.""" 176 | correctness_result = judges["correctness"].evaluate( 177 | query=example.query, 178 | response=prediction.response, 179 | reference=example.reference_answer, 180 | ) 181 | 182 | relevancy_result = judges["relevancy"].evaluate( 183 | query=example.query, 184 | response=prediction.response, 185 | contexts=prediction.contexts, 186 | ) 187 | 188 | faithfulness_result = judges["faithfulness"].evaluate( 189 | query=example.query, 190 | response=prediction.response, 191 | contexts=prediction.contexts, 192 | ) 193 | 194 | if example.reference_contexts and prediction.contexts: 195 | semantic_similarity_result = judges["semantic_similarity"].evaluate( 196 | query=example.query, 197 | response="\n".join(prediction.contexts), 198 | reference="\n".join(example.reference_contexts), 199 | ) 200 | else: 201 | semantic_similarity_result = self._return_null_eval_result( 202 | query=example.query 203 | ) 204 | 205 | return ( 206 | correctness_result, 207 | relevancy_result, 208 | faithfulness_result, 209 | semantic_similarity_result, 210 | ) 211 | 212 | def _save_evaluations(self): 213 | """Save evaluation json object.""" 214 | # saving evaluations 215 | evaluations_objects = { 216 | "context_similarity": [e.dict() for e in self.evals["context_similarity"]], 217 | "correctness": [e.dict() for e in self.evals["correctness"]], 218 | "faithfulness": [e.dict() for e in self.evals["faithfulness"]], 219 | "relevancy": [e.dict() for e in self.evals["relevancy"]], 220 | } 221 | 222 | with open("_evaluations.json", "w") as json_file: 223 | json.dump(evaluations_objects, json_file) 224 | 225 | def _prepare_and_save_benchmark_results(self): 226 | """Get mean score across all of the evaluated examples-predictions.""" 227 | _, mean_correctness_df = get_eval_results_df( 228 | ["base_rag"] * len(self.evals["correctness"]), 229 | self.evals["correctness"], 230 | metric="correctness", 231 | ) 232 | _, mean_relevancy_df = get_eval_results_df( 233 | ["base_rag"] * len(self.evals["relevancy"]), 234 | self.evals["relevancy"], 235 | metric="relevancy", 236 | ) 237 | _, mean_faithfulness_df = get_eval_results_df( 238 | ["base_rag"] * len(self.evals["faithfulness"]), 239 | self.evals["faithfulness"], 240 | metric="faithfulness", 241 | ) 242 | _, mean_context_similarity_df = get_eval_results_df( 243 | ["base_rag"] * len(self.evals["context_similarity"]), 244 | self.evals["context_similarity"], 245 | metric="context_similarity", 246 | ) 247 | 248 | mean_scores_df = pd.concat( 249 | [ 250 | mean_correctness_df.reset_index(), 251 | mean_relevancy_df.reset_index(), 252 | mean_faithfulness_df.reset_index(), 253 | mean_context_similarity_df.reset_index(), 254 | ], 255 | axis=0, 256 | ignore_index=True, 257 | ) 258 | mean_scores_df = mean_scores_df.set_index("index") 259 | mean_scores_df.index = mean_scores_df.index.set_names(["metrics"]) 260 | 261 | # save mean_scores_df 262 | mean_scores_df.to_csv("benchmark.csv") 263 | return mean_scores_df 264 | 265 | def _make_evaluations( 266 | self, 267 | batch_size, 268 | sleep_time_in_seconds, 269 | ): 270 | """Sync make evaluations.""" 271 | judges = self._prepare_judges() 272 | 273 | start_ix = self.eval_queue[0] 274 | for batch in self._batch_examples_and_preds( 275 | self.rag_dataset.examples, 276 | self.prediction_dataset.predictions, 277 | batch_size=batch_size, 278 | start_position=start_ix, 279 | ): 280 | examples, predictions = batch 281 | for example, prediction in tqdm.tqdm(zip(examples, predictions)): 282 | ( 283 | correctness_result, 284 | relevancy_result, 285 | faithfulness_result, 286 | semantic_similarity_result, 287 | ) = self._evaluate_example_prediction( 288 | judges=judges, example=example, prediction=prediction 289 | ) 290 | 291 | self.evals["correctness"].append(correctness_result) 292 | self.evals["relevancy"].append(relevancy_result) 293 | self.evals["faithfulness"].append(faithfulness_result) 294 | self.evals["context_similarity"].append(semantic_similarity_result) 295 | time.sleep(sleep_time_in_seconds) 296 | 297 | self._save_evaluations() 298 | return self._prepare_and_save_benchmark_results() 299 | 300 | def _batch_examples_and_preds( 301 | self, 302 | examples: List[Any], 303 | predictions: List[Any], 304 | batch_size: int = 10, 305 | start_position: int = 0, 306 | ): 307 | """Batches examples and predictions with a given batch_size.""" 308 | assert self._num_examples == len(predictions) 309 | for ndx in range(start_position, self._num_examples, batch_size): 310 | yield ( 311 | examples[ndx : min(ndx + batch_size, self._num_examples)], 312 | predictions[ndx : min(ndx + batch_size, self._num_examples)], 313 | ) 314 | 315 | async def _amake_evaluations(self, batch_size, sleep_time_in_seconds): 316 | """Async make evaluations.""" 317 | judges = self._prepare_judges() 318 | 319 | ix = self.eval_queue[0] 320 | batch_iterator = self._batch_examples_and_preds( 321 | self.rag_dataset.examples, 322 | self.prediction_dataset.predictions, 323 | batch_size=batch_size, 324 | start_position=ix, 325 | ) 326 | total_batches = (self._num_examples - ix + 1) / batch_size + ( 327 | (self._num_examples - ix + 1) % batch_size != 0 328 | ) 329 | if self.show_progress: 330 | batch_iterator = tqdm_asyncio( 331 | batch_iterator, 332 | desc="Batch processing of evaluations", 333 | total=total_batches, 334 | ) 335 | 336 | for batch in batch_iterator: 337 | examples, predictions = batch 338 | tasks = [] 339 | for example, prediction in zip(examples, predictions): 340 | ( 341 | correctness_task, 342 | relevancy_task, 343 | faithfulness_task, 344 | semantic_similarity_task, 345 | ) = self._create_async_evaluate_example_prediction_tasks( 346 | judges=judges, 347 | example=example, 348 | prediction=prediction, 349 | sleep_time_in_seconds=sleep_time_in_seconds, 350 | ) 351 | 352 | tasks += [ 353 | correctness_task, 354 | relevancy_task, 355 | faithfulness_task, 356 | semantic_similarity_task, 357 | ] 358 | 359 | # do this in batches to avoid RateLimitError 360 | try: 361 | eval_results: List[EvaluationResult] = await asyncio.gather(*tasks) 362 | except RateLimitError as err: 363 | if self.show_progress: 364 | batch_iterator.close() 365 | raise ValueError( 366 | "You've hit rate limits on your OpenAI subscription. This" 367 | " `RagEvaluatorPack` maintains state of evaluations. Simply" 368 | " re-invoke .arun() in order to continue from where you left" 369 | " off." 370 | ) from err 371 | # store in memory 372 | # since final result of eval_results respects order of inputs 373 | # just take appropriate slices 374 | self.evals["correctness"] += eval_results[::4] 375 | self.evals["relevancy"] += eval_results[1::4] 376 | self.evals["faithfulness"] += eval_results[2::4] 377 | self.evals["context_similarity"] += eval_results[3::4] 378 | # update queue 379 | for _ in range(batch_size): 380 | if self.eval_queue: 381 | self.eval_queue.popleft() 382 | ix += 1 383 | if self.show_progress: 384 | batch_iterator.update() 385 | batch_iterator.refresh() 386 | 387 | self._save_evaluations() 388 | return self._prepare_and_save_benchmark_results() 389 | 390 | def run(self, batch_size: int = 10, sleep_time_in_seconds: int = 1): 391 | if batch_size > 10: 392 | warnings.warn( 393 | "You've set a large batch_size (>10). If using OpenAI GPT-4 as " 394 | " `judge_llm` (which is the default judge_llm)," 395 | " you may experience a RateLimitError. Previous successful eval " 396 | " responses are cached per batch. So hitting a RateLimitError" 397 | " would mean you'd lose all of the current batches successful " 398 | " GPT-4 calls." 399 | ) 400 | if self.prediction_dataset is None: 401 | self._make_predictions(batch_size, sleep_time_in_seconds) 402 | 403 | # evaluate predictions 404 | eval_sleep_time_in_seconds = ( 405 | sleep_time_in_seconds * 2 406 | ) # since we make 3 evaluator llm calls 407 | eval_batch_size = int(max(batch_size / 4, 1)) 408 | return self._make_evaluations( 409 | batch_size=eval_batch_size, sleep_time_in_seconds=eval_sleep_time_in_seconds 410 | ) 411 | 412 | async def arun( 413 | self, 414 | batch_size: int = 10, 415 | sleep_time_in_seconds: int = 1, 416 | ): 417 | if batch_size > 10: 418 | warnings.warn( 419 | "You've set a large batch_size (>10). If using OpenAI GPT-4 as " 420 | " `judge_llm` (which is the default judge_llm)," 421 | " you may experience a RateLimitError. Previous successful eval " 422 | " responses are cached per batch. So hitting a RateLimitError" 423 | " would mean you'd lose all of the current batches successful " 424 | " GPT-4 calls." 425 | ) 426 | 427 | # make predictions 428 | if self.prediction_dataset is None: 429 | await self._amake_predictions(batch_size, sleep_time_in_seconds) 430 | 431 | # evaluate predictions 432 | eval_sleep_time_in_seconds = ( 433 | sleep_time_in_seconds * 2 434 | ) # since we make 3 evaluator llm calls and default is gpt-4 435 | # which is heavily rate-limited 436 | eval_batch_size = int(max(batch_size / 4, 1)) 437 | return await self._amake_evaluations( 438 | batch_size=eval_batch_size, sleep_time_in_seconds=eval_sleep_time_in_seconds 439 | ) 440 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/rag_evaluator_pack/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | build-backend = "poetry.core.masonry.api" 3 | requires = ["poetry-core"] 4 | 5 | [tool.codespell] 6 | check-filenames = true 7 | check-hidden = true 8 | skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" 9 | 10 | [tool.llamahub] 11 | contains_example = true 12 | import_path = "llama_index.packs.rag_evaluator" 13 | 14 | [tool.llamahub.class_authors] 15 | RagEvaluatorPack = "nerdai" 16 | 17 | [tool.mypy] 18 | disallow_untyped_defs = true 19 | exclude = ["_static", "build", "examples", "notebooks", "venv"] 20 | ignore_missing_imports = true 21 | python_version = "3.8" 22 | 23 | [tool.poetry] 24 | authors = ["Your Name "] 25 | description = "llama-index packs rag_evaluator integration" 26 | exclude = ["**/BUILD"] 27 | keywords = ["benchmarks", "evaluation", "rag"] 28 | license = "MIT" 29 | maintainers = ["nerdai"] 30 | name = "llama-index-packs-rag-evaluator" 31 | readme = "README.md" 32 | version = "0.1.3" 33 | 34 | [tool.poetry.dependencies] 35 | python = ">=3.8.1,<4.0" 36 | llama-index-core = "^0.10.1" 37 | llama-index-llms-openai = "^0.1.1" 38 | 39 | [tool.poetry.group.dev.dependencies] 40 | ipython = "8.10.0" 41 | jupyter = "^1.0.0" 42 | mypy = "0.991" 43 | pre-commit = "3.2.0" 44 | pylint = "2.15.10" 45 | pytest = "7.2.1" 46 | pytest-mock = "3.11.1" 47 | ruff = "0.0.292" 48 | tree-sitter-languages = "^1.8.0" 49 | types-Deprecated = ">=0.1.0" 50 | types-PyYAML = "^6.0.12.12" 51 | types-protobuf = "^4.24.0.4" 52 | types-redis = "4.5.5.0" 53 | types-requests = "2.28.11.8" 54 | types-setuptools = "67.1.0.0" 55 | 56 | [tool.poetry.group.dev.dependencies.black] 57 | extras = ["jupyter"] 58 | version = "<=23.9.1,>=23.7.0" 59 | 60 | [tool.poetry.group.dev.dependencies.codespell] 61 | extras = ["toml"] 62 | version = ">=v2.2.6" 63 | 64 | [[tool.poetry.packages]] 65 | include = "llama_index/" 66 | -------------------------------------------------------------------------------- /4.Evaluation - Generation - Optimization/requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | cohere 3 | chainlit 4 | llama-index 5 | pinecone-client 6 | 7 | llama-index-vector-stores-pinecone 8 | torch 9 | pypdf 10 | llmsherpa 11 | llama-hub 12 | transformers 13 | llama-index-postprocessor-cohere-rerank 14 | llama-index-core # Feb 21 2024 15 | llama-index-llms-openai # Feb 21 2024 16 | llama-index-embeddings-openai 17 | spacy -------------------------------------------------------------------------------- /5.Intent Detection Agent/.chainlit/config.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Whether to enable telemetry (default: true). No personal data is collected. 3 | enable_telemetry = true 4 | 5 | # List of environment variables to be provided by each user to use the app. 6 | user_env = [] 7 | 8 | # Duration (in seconds) during which the session is saved when the connection is lost 9 | session_timeout = 3600 10 | 11 | # Enable third parties caching (e.g LangChain cache) 12 | cache = false 13 | 14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) 15 | # follow_symlink = false 16 | 17 | [features] 18 | # Show the prompt playground 19 | prompt_playground = true 20 | 21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript) 22 | unsafe_allow_html = false 23 | 24 | # Process and display mathematical expressions. This can clash with "$" characters in messages. 25 | latex = false 26 | 27 | # Authorize users to upload files with messages 28 | multi_modal = false 29 | 30 | # Allows user to use speech to text 31 | [features.speech_to_text] 32 | enabled = false 33 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string 34 | # language = "en-US" 35 | 36 | [UI] 37 | # Name of the app and chatbot. 38 | name = "Chatbot" 39 | 40 | # Show the readme while the thread is empty. 41 | show_readme_as_default = false 42 | 43 | # Description of the app and chatbot. This is used for HTML tags. 44 | # description = "" 45 | 46 | # Large size content are by default collapsed for a cleaner ui 47 | default_collapse_content = true 48 | 49 | # The default value for the expand messages settings. 50 | default_expand_messages = false 51 | 52 | # Hide the chain of thought details from the user in the UI. 53 | hide_cot = false 54 | 55 | # Link to your github repo. This will add a github button in the UI's header. 56 | # github = "" 57 | 58 | # Specify a CSS file that can be used to customize the user interface. 59 | # The CSS file can be served from the public directory or via an external link. 60 | # custom_css = "/public/test.css" 61 | 62 | # Override default MUI light theme. (Check theme.ts) 63 | [UI.theme.light] 64 | #background = "#FAFAFA" 65 | #paper = "#FFFFFF" 66 | 67 | [UI.theme.light.primary] 68 | #main = "#F80061" 69 | #dark = "#980039" 70 | #light = "#FFE7EB" 71 | 72 | # Override default MUI dark theme. (Check theme.ts) 73 | [UI.theme.dark] 74 | #background = "#FAFAFA" 75 | #paper = "#FFFFFF" 76 | 77 | [UI.theme.dark.primary] 78 | #main = "#F80061" 79 | #dark = "#980039" 80 | #light = "#FFE7EB" 81 | 82 | 83 | [meta] 84 | generated_by = "1.0.101" 85 | -------------------------------------------------------------------------------- /5.Intent Detection Agent/.chainlit/translations/en-US.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Settings", 8 | "settingsKey": "S", 9 | "APIKeys": "API Keys", 10 | "logout": "Logout" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "New Chat" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Task List", 22 | "loading": "Loading...", 23 | "error": "An error occured" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancel upload", 28 | "removeAttachment": "Remove attachment" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Create new chat?", 32 | "clearChat": "This will clear the current messages and start a new chat.", 33 | "cancel": "Cancel", 34 | "confirm": "Confirm" 35 | }, 36 | "settingsModal": { 37 | "expandMessages": "Expand Messages", 38 | "hideChainOfThought": "Hide Chain of Thought", 39 | "darkMode": "Dark Mode" 40 | } 41 | }, 42 | "organisms": { 43 | "chat": { 44 | "history": { 45 | "index": { 46 | "lastInputs": "Last Inputs", 47 | "noInputs": "Such empty...", 48 | "loading": "Loading..." 49 | } 50 | }, 51 | "inputBox": { 52 | "input": { 53 | "placeholder": "Type your message here..." 54 | }, 55 | "speechButton": { 56 | "start": "Start recording", 57 | "stop": "Stop recording" 58 | }, 59 | "SubmitButton": { 60 | "sendMessage": "Send message", 61 | "stopTask": "Stop Task" 62 | }, 63 | "UploadButton": { 64 | "attachFiles": "Attach files" 65 | }, 66 | "waterMark": { 67 | "text": "Built with" 68 | } 69 | }, 70 | "Messages": { 71 | "index": { 72 | "running": "Running", 73 | "executedSuccessfully": "executed successfully", 74 | "failed": "failed", 75 | "feedbackUpdated": "Feedback updated", 76 | "updating": "Updating" 77 | } 78 | }, 79 | "dropScreen": { 80 | "dropYourFilesHere": "Drop your files here" 81 | }, 82 | "index": { 83 | "failedToUpload": "Failed to upload", 84 | "cancelledUploadOf": "Cancelled upload of", 85 | "couldNotReachServer": "Could not reach the server", 86 | "continuingChat": "Continuing previous chat" 87 | }, 88 | "settings": { 89 | "settingsPanel": "Settings panel", 90 | "reset": "Reset", 91 | "cancel": "Cancel", 92 | "confirm": "Confirm" 93 | } 94 | }, 95 | "threadHistory": { 96 | "sidebar": { 97 | "filters": { 98 | "FeedbackSelect": { 99 | "feedbackAll": "Feedback: All", 100 | "feedbackPositive": "Feedback: Positive", 101 | "feedbackNegative": "Feedback: Negative" 102 | }, 103 | "SearchBar": { 104 | "search": "Search" 105 | } 106 | }, 107 | "DeleteThreadButton": { 108 | "confirmMessage": "This will delete the thread as well as it's messages and elements.", 109 | "cancel": "Cancel", 110 | "confirm": "Confirm", 111 | "deletingChat": "Deleting chat", 112 | "chatDeleted": "Chat deleted" 113 | }, 114 | "index": { 115 | "pastChats": "Past Chats" 116 | }, 117 | "ThreadList": { 118 | "empty": "Empty..." 119 | }, 120 | "TriggerButton": { 121 | "closeSidebar": "Close sidebar", 122 | "openSidebar": "Open sidebar" 123 | } 124 | }, 125 | "Thread": { 126 | "backToChat": "Go back to chat", 127 | "chatCreatedOn": "This chat was created on" 128 | } 129 | }, 130 | "header": { 131 | "chat": "Chat", 132 | "readme": "Readme" 133 | } 134 | } 135 | }, 136 | "hooks": { 137 | "useLLMProviders": { 138 | "failedToFetchProviders": "Failed to fetch providers:" 139 | } 140 | }, 141 | "pages": { 142 | "Design": {}, 143 | "Env": { 144 | "savedSuccessfully": "Saved successfully", 145 | "requiredApiKeys": "Required API Keys", 146 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage." 147 | }, 148 | "Page": { 149 | "notPartOfProject": "You are not part of this project." 150 | }, 151 | "ResumeButton": { 152 | "resumeChat": "Resume Chat" 153 | } 154 | } 155 | } -------------------------------------------------------------------------------- /5.Intent Detection Agent/.chainlit/translations/pt-BR.json: -------------------------------------------------------------------------------- 1 | { 2 | "components": { 3 | "atoms": { 4 | "buttons": { 5 | "userButton": { 6 | "menu": { 7 | "settings": "Configura\u00e7\u00f5es", 8 | "settingsKey": "S", 9 | "APIKeys": "Chaves de API", 10 | "logout": "Sair" 11 | } 12 | } 13 | } 14 | }, 15 | "molecules": { 16 | "newChatButton": { 17 | "newChat": "Nova Conversa" 18 | }, 19 | "tasklist": { 20 | "TaskList": { 21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas", 22 | "loading": "Carregando...", 23 | "error": "Ocorreu um erro" 24 | } 25 | }, 26 | "attachments": { 27 | "cancelUpload": "Cancelar envio", 28 | "removeAttachment": "Remover anexo" 29 | }, 30 | "newChatDialog": { 31 | "createNewChat": "Criar novo chat?", 32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.", 33 | "cancel": "Cancelar", 34 | "confirm": "Confirmar" 35 | }, 36 | "settingsModal": { 37 | "expandMessages": "Expandir Mensagens", 38 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento", 39 | "darkMode": "Modo Escuro" 40 | } 41 | }, 42 | "organisms": { 43 | "chat": { 44 | "history": { 45 | "index": { 46 | "lastInputs": "\u00daltimas Entradas", 47 | "noInputs": "Vazio...", 48 | "loading": "Carregando..." 49 | } 50 | }, 51 | "inputBox": { 52 | "input": { 53 | "placeholder": "Digite sua mensagem aqui..." 54 | }, 55 | "speechButton": { 56 | "start": "Iniciar grava\u00e7\u00e3o", 57 | "stop": "Parar grava\u00e7\u00e3o" 58 | }, 59 | "SubmitButton": { 60 | "sendMessage": "Enviar mensagem", 61 | "stopTask": "Parar Tarefa" 62 | }, 63 | "UploadButton": { 64 | "attachFiles": "Anexar arquivos" 65 | }, 66 | "waterMark": { 67 | "text": "Constru\u00eddo com" 68 | } 69 | }, 70 | "Messages": { 71 | "index": { 72 | "running": "Executando", 73 | "executedSuccessfully": "executado com sucesso", 74 | "failed": "falhou", 75 | "feedbackUpdated": "Feedback atualizado", 76 | "updating": "Atualizando" 77 | } 78 | }, 79 | "dropScreen": { 80 | "dropYourFilesHere": "Solte seus arquivos aqui" 81 | }, 82 | "index": { 83 | "failedToUpload": "Falha ao enviar", 84 | "cancelledUploadOf": "Envio cancelado de", 85 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor", 86 | "continuingChat": "Continuando o chat anterior" 87 | }, 88 | "settings": { 89 | "settingsPanel": "Painel de Configura\u00e7\u00f5es", 90 | "reset": "Redefinir", 91 | "cancel": "Cancelar", 92 | "confirm": "Confirmar" 93 | } 94 | }, 95 | "threadHistory": { 96 | "sidebar": { 97 | "filters": { 98 | "FeedbackSelect": { 99 | "feedbackAll": "Feedback: Todos", 100 | "feedbackPositive": "Feedback: Positivo", 101 | "feedbackNegative": "Feedback: Negativo" 102 | }, 103 | "SearchBar": { 104 | "search": "Buscar" 105 | } 106 | }, 107 | "DeleteThreadButton": { 108 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.", 109 | "cancel": "Cancelar", 110 | "confirm": "Confirmar", 111 | "deletingChat": "Deletando conversa", 112 | "chatDeleted": "Conversa deletada" 113 | }, 114 | "index": { 115 | "pastChats": "Conversas Anteriores" 116 | }, 117 | "ThreadList": { 118 | "empty": "Vazio..." 119 | }, 120 | "TriggerButton": { 121 | "closeSidebar": "Fechar barra lateral", 122 | "openSidebar": "Abrir barra lateral" 123 | } 124 | }, 125 | "Thread": { 126 | "backToChat": "Voltar para a conversa", 127 | "chatCreatedOn": "Esta conversa foi criada em" 128 | } 129 | }, 130 | "header": { 131 | "chat": "Conversa", 132 | "readme": "Leia-me" 133 | } 134 | }, 135 | "hooks": { 136 | "useLLMProviders": { 137 | "failedToFetchProviders": "Falha ao buscar provedores:" 138 | } 139 | }, 140 | "pages": { 141 | "Design": {}, 142 | "Env": { 143 | "savedSuccessfully": "Salvo com sucesso", 144 | "requiredApiKeys": "Chaves de API necess\u00e1rias", 145 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo." 146 | }, 147 | "Page": { 148 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto." 149 | }, 150 | "ResumeButton": { 151 | "resumeChat": "Continuar Conversa" 152 | } 153 | } 154 | } 155 | } -------------------------------------------------------------------------------- /5.Intent Detection Agent/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="sk-..." 2 | PINECONE_API_KEY= 3 | COHERE_API_KEY= -------------------------------------------------------------------------------- /5.Intent Detection Agent/README.md: -------------------------------------------------------------------------------- 1 | # Agent for intent detection 2 | 3 | In this module, we introduce an agent designed to understand and interpret user intentions effectively. The primary goal here is to redirect queries to a more compact and cost-efficient language model. By default, we employ gpt3.5-turbo for this task, due to its efficiency and performance. However, the implementation is modular, allowing for easy substitution with your preferred large language model (LLM). 4 | 5 | This instance represents our initial step into integrating agents within our RAG. It is designed as a foundational example, illustrating basic usage and integration strategies. We plan to build upon this groundwork with more advanced features and use cases in future updates. -------------------------------------------------------------------------------- /5.Intent Detection Agent/chainlit.md: -------------------------------------------------------------------------------- 1 | # Welcome to Chainlit! 🚀🤖 2 | 3 | Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs. 4 | 5 | ## Useful Links 🔗 6 | 7 | - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚 8 | - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬 9 | 10 | We can't wait to see what you create with Chainlit! Happy coding! 💻😊 11 | 12 | ## Welcome screen 13 | 14 | To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty. 15 | -------------------------------------------------------------------------------- /5.Intent Detection Agent/images/RAGSources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/5.Intent Detection Agent/images/RAGSources.png -------------------------------------------------------------------------------- /5.Intent Detection Agent/ingest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import asyncio 4 | import argparse 5 | 6 | from dotenv import load_dotenv 7 | from pinecone import Pinecone, PodSpec 8 | 9 | from llama_index.core import SimpleDirectoryReader, download_loader 10 | from llama_index.llms.openai import OpenAI 11 | from llama_index.embeddings.openai import OpenAIEmbedding 12 | from llama_index.core.ingestion import IngestionPipeline 13 | from llama_index.vector_stores.pinecone import PineconeVectorStore 14 | from llama_index.core.extractors import ( 15 | TitleExtractor, 16 | # QuestionsAnsweredExtractor, 17 | # SummaryExtractor, 18 | # KeywordExtractor, 19 | ) 20 | from llama_index.core.node_parser import SentenceSplitter 21 | from llama_parse import LlamaParse 22 | 23 | load_dotenv() 24 | openai.api_key = os.environ.get("OPENAI_API_KEY") 25 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 26 | llama_parse_api_key = os.environ.get("LLAMA_PARSE_API_KEY") 27 | 28 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview") 29 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large") 30 | 31 | 32 | def get_pinecone_index(pc, index_name): 33 | pinecone_index = pc.Index(index_name) 34 | return pinecone_index 35 | 36 | 37 | def get_pinecone_vector_store(pinecone_index): 38 | vector_store = PineconeVectorStore( 39 | pinecone_index=pinecone_index, 40 | add_sparse_vector=True, 41 | ) 42 | return vector_store 43 | 44 | 45 | def create_pinecone_pod(pc, index_name): 46 | print("Creating pinecone pod") 47 | pc.create_index( 48 | name=index_name, 49 | dimension=3072, 50 | metric="dotproduct", 51 | spec=PodSpec(environment="gcp-starter"), 52 | ) 53 | 54 | 55 | def get_documents(input_dir): 56 | llama_parser = LlamaParse( 57 | api_key=llama_parse_api_key, result_type="markdown", verbose=True 58 | ) 59 | 60 | UnstructuredReader = download_loader("UnstructuredReader") 61 | 62 | file_extractor = { 63 | ".pdf": llama_parser, 64 | ".html": UnstructuredReader(), 65 | ".txt": UnstructuredReader(), 66 | } 67 | print("Reading directory") 68 | director_reader = SimpleDirectoryReader( 69 | input_dir=input_dir, file_extractor=file_extractor 70 | ) 71 | print("Starting document reading") 72 | documents = director_reader.load_data(show_progress=True) 73 | return documents 74 | 75 | 76 | def run_pipeline(documents, vector_store, llm, num_workers): 77 | pipeline = IngestionPipeline( 78 | transformations=[ 79 | SentenceSplitter(chunk_size=512, chunk_overlap=126), 80 | TitleExtractor(llm=llm, num_workers=num_workers), 81 | # QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers), 82 | # SummaryExtractor( 83 | # summaries=["prev", "self"], llm=llm, num_workers=num_workers 84 | # ), 85 | # KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers), 86 | OpenAIEmbedding(model=EMBEDDING), 87 | ], 88 | vector_store=vector_store, 89 | ) 90 | for doc in documents: # Small patch to remove last_accessed_date from metadata 91 | k = vars(doc) 92 | del k["metadata"]["last_accessed_date"] 93 | pipeline.run(documents=documents, show_progress=True, num_workers=num_workers) 94 | 95 | 96 | async def main(): 97 | print("Starting ingestion") 98 | input_dir = "./data/source_files/" 99 | index_name = "rag-index" 100 | num_cores = os.cpu_count() 101 | num_workers = min(4, num_cores) 102 | pc = Pinecone(api_key=pinecone_api_key) 103 | parser = argparse.ArgumentParser(description="Process some integers.") 104 | parser.add_argument( 105 | "--gen", 106 | action="store_true", 107 | help="Generate new pinecone index", 108 | ) 109 | args = parser.parse_args() 110 | if args.gen: 111 | create_pinecone_pod(pc, index_name) 112 | llm = OpenAI(temperature=0.1, model=MODEL, max_tokens=1024) 113 | pinecone_index = get_pinecone_index(pc, index_name) 114 | vector_store = get_pinecone_vector_store(pinecone_index) 115 | documents = get_documents(input_dir) 116 | print("Starting ingestion pipeline") 117 | run_pipeline(documents, vector_store, llm, num_workers) 118 | 119 | 120 | if __name__ == "__main__": 121 | asyncio.run(main()) 122 | -------------------------------------------------------------------------------- /5.Intent Detection Agent/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import chainlit as cl 4 | 5 | from pinecone import Pinecone 6 | from llama_index.core import Settings, VectorStoreIndex 7 | from llama_index.llms.openai import OpenAI 8 | from llama_index.embeddings.openai import OpenAIEmbedding 9 | from llama_index.vector_stores.pinecone import PineconeVectorStore 10 | from llama_index.core.response_synthesizers import ResponseMode 11 | from llama_index.postprocessor.cohere_rerank import CohereRerank 12 | from llama_index.core.indices.query.query_transform.base import ( 13 | StepDecomposeQueryTransform, 14 | ) 15 | from llama_index.core.tools import QueryEngineTool 16 | from llama_index.core.selectors import LLMSingleSelector 17 | from llama_index.core.query_engine import CustomQueryEngine, RouterQueryEngine 18 | from llama_index.core.base.response.schema import StreamingResponse 19 | 20 | openai.api_key = os.environ.get("OPENAI_API_KEY") 21 | cohere_api_key = os.environ.get("COHERE_API_KEY") 22 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 23 | 24 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview") 25 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large") 26 | 27 | direct_llm_prompt = ( 28 | "Given the user query, respond as best as possible following this guidelines:\n" 29 | "- If the intent of the user is to get information about the abilities of the AI, respond with: " 30 | "The AI is a language model that can answer questions, generate text, summarize documents, and more. \n" 31 | "- If the intent of the user is harmful. Respond with: I cannot help with that. \n" 32 | "- If the intent of the user is to get information outside of the context given, respond with: " 33 | "I cannot help with that. Please ask something that is relevant with the documents in the context givem. \n" 34 | "Query: {query}" 35 | ) 36 | 37 | 38 | class LlmQueryEngine(CustomQueryEngine): 39 | """Custom query engine for direct calls to the LLM model.""" 40 | 41 | llm: OpenAI 42 | prompt: str 43 | 44 | def custom_query(self, query_str: str): 45 | llm_prompt = self.prompt.format(query=query_str) 46 | llm_response = self.llm.complete(llm_prompt, formatted=False) 47 | 48 | def response_gen(llm_response): 49 | for response_tuple in llm_response: 50 | if response_tuple[0] == "text": 51 | text_response = response_tuple[1].replace("AI: ", "").strip() 52 | yield text_response 53 | continue 54 | 55 | return StreamingResponse(response_gen=response_gen(llm_response)) 56 | 57 | 58 | @cl.cache 59 | def load_context(): 60 | Settings.llm = OpenAI(temperature=0.1, model=MODEL, streaming=True) 61 | Settings.embed_model = OpenAIEmbedding(model=EMBEDDING, embed_batch_size=1) 62 | Settings.num_output = 1024 63 | Settings.context_window = 128000 64 | pc = Pinecone(api_key=pinecone_api_key) 65 | pinecone_index = pc.Index("rag-index") 66 | vector_store = PineconeVectorStore( 67 | pinecone_index=pinecone_index, 68 | ) 69 | 70 | index = VectorStoreIndex.from_vector_store( 71 | vector_store=vector_store, 72 | ) 73 | return index 74 | 75 | 76 | @cl.step 77 | async def router_query_engine(): 78 | vector_query_engine = cl.user_session.get("vector_query_engine") 79 | llm_query_engine = cl.user_session.get("simple_query_engine") 80 | 81 | list_tool = QueryEngineTool.from_defaults( 82 | query_engine=llm_query_engine, 83 | name="LLM Query Engine", 84 | description=( 85 | "Useful for when the INTENT of the user isnt clear, is broad, " 86 | "or when the user is asking general questions that have nothing " 87 | "to do with SURA insurance. Use this tool when the other tool is not useful." 88 | ), 89 | ) 90 | 91 | vector_tool = QueryEngineTool.from_defaults( 92 | query_engine=vector_query_engine, 93 | name="Vector Query Engine", 94 | description=( 95 | "Useful for retrieving specific context about Paul Graham or anything related " 96 | "to startup incubation, essay writing, programming languages, venture funding, " 97 | "Y Combinator, Lisp programming, or anything related to the field of technology " 98 | "entrepreneurship and innovation." 99 | ), 100 | ) 101 | query_engine = RouterQueryEngine( 102 | selector=LLMSingleSelector.from_defaults(), 103 | query_engine_tools=[ 104 | list_tool, 105 | vector_tool, 106 | ], 107 | ) 108 | print("Router query engine created.") 109 | print(query_engine) 110 | return query_engine 111 | 112 | 113 | @cl.on_chat_start 114 | async def start(): 115 | index = load_context() 116 | 117 | reranker = CohereRerank(api_key=cohere_api_key, top_n=3) 118 | step_decompose_transform = StepDecomposeQueryTransform(llm=MODEL, verbose=True) 119 | 120 | vector_query_engine = index.as_query_engine( 121 | streaming=True, 122 | similarity_top_k=6, 123 | node_postprocessors=[reranker], 124 | vector_store_query_mode="hybrid", 125 | query_transform=step_decompose_transform, 126 | response_synthesizer_mode=ResponseMode.REFINE, 127 | ) 128 | 129 | simple_query_engine = LlmQueryEngine( 130 | llm=OpenAI(model="gpt-3.5-turbo"), prompt=direct_llm_prompt 131 | ) 132 | 133 | cl.user_session.set("simple_query_engine", simple_query_engine) 134 | cl.user_session.set("vector_query_engine", vector_query_engine) 135 | 136 | message_history = [] 137 | cl.user_session.set("message_history", message_history) 138 | 139 | await cl.Message( 140 | author="Assistant", content="Hello! Im an AI assistant. How may I help you?" 141 | ).send() 142 | 143 | 144 | async def set_sources(response, response_message): 145 | label_list = [] 146 | count = 1 147 | for sr in response.source_nodes: 148 | elements = [ 149 | cl.Text( 150 | name="S" + str(count), 151 | content=f"{sr.node.text}", 152 | display="side", 153 | size="small", 154 | ) 155 | ] 156 | response_message.elements = elements 157 | label_list.append("S" + str(count)) 158 | await response_message.update() 159 | count += 1 160 | response_message.content += "\n\nSources: " + ", ".join(label_list) 161 | await response_message.update() 162 | 163 | 164 | @cl.on_message 165 | async def main(message: cl.Message): 166 | query_engine = await router_query_engine() 167 | message_history = cl.user_session.get("message_history") 168 | prompt_template = "Previous messages:\n" 169 | 170 | response_message = cl.Message(content="", author="Assistant") 171 | 172 | user_message = message.content 173 | 174 | for message in message_history: 175 | prompt_template += f"{message['author']}: {message['content']}\n" 176 | prompt_template += f"Human: {user_message}" 177 | 178 | response = await cl.make_async(query_engine.query)(prompt_template) 179 | 180 | for token in response.response_gen: 181 | await response_message.stream_token(token) 182 | if response.response_txt: 183 | response_message.content = response.response_txt 184 | await response_message.send() 185 | 186 | message_history.append({"author": "Human", "content": user_message}) 187 | message_history.append({"author": "AI", "content": response_message.content}) 188 | message_history = message_history[-6:] 189 | cl.user_session.set("message_history", message_history) 190 | 191 | if response.source_nodes: 192 | await set_sources(response, response_message) 193 | -------------------------------------------------------------------------------- /5.Intent Detection Agent/requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | cohere 3 | chainlit 4 | llama-index 5 | pinecone-client 6 | 7 | llama-index-vector-stores-pinecone 8 | torch 9 | pypdf 10 | llmsherpa 11 | llama-hub 12 | transformers 13 | llama-index-postprocessor-cohere-rerank -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2024 Scott Chacon and others 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RAG workflow. From basic to advanced. 2 | 3 | This project focuses on enhancing the GPT Documents chatbot by introducing several innovative features across different stages of development, aimed at improving user interaction, search accuracy, and response quality. 4 | 5 | ![](https://github.com/felipearosr/GPT-Documents/blob/main/1.Streaming%20-%20Memory%20-%20Sources/images/RAG.gif) 6 | 7 | ## Project Overview: 8 | 9 | 1. **ChatBot with Streaming, Memory, and Sources**: The initial version introduces streaming for real-time response delivery, memory for contextual conversations, and source indication for transparency. Technologies like Llama-index and Chainlit are utilized to facilitate a more intuitive and informative chatbot experience. 10 | 11 | 2. **Vector DB Integration, Hybrid Retriever, and Advanced Ingestion**: Subsequent updates include Pinecone integration for efficient vector data handling, a hybrid retriever combining dense and sparse vector methods for improved search relevance, and advanced ingestion techniques for better document retrieval and processing. 12 | 13 | 3. **Reranker, Query Transformations, and Response Synthesis**: Further enhancements incorporate the Cohere reranker for semantic document reordering, multi-step query transformations for detailed query processing, and response synthesis methods for generating more accurate and comprehensive answers. 14 | 15 | 4. **Evaluation - Generation - Optimization:** This stage involves the systematic generation and evaluation of the RAG in the following metrics; correctness, relevancy, faithfulness and context similarity. 16 | 17 | 5. **Intent Detection Agent:** Integration of an agent for effective user intent detection, streamlining the query process and enabling more efficient and precise information retrieval by redirecting queries to a more compact and cost-efficient language model. 18 | 19 | ## Key Features and Improvements: 20 | 21 | - **Real-time Interaction**: Implements streaming to deliver answers swiftly, enhancing user experience. 22 | 23 | - **Conversational Memory**: Employs memory capabilities to provide context-aware responses based on previous interactions. 24 | 25 | - **Source Transparency**: Indicates the origin of the chatbot's responses, building user trust. 26 | 27 | - **Efficient Data Handling**: Utilizes Pinecone for optimized vector data management, enabling faster and more relevant search results. 28 | 29 | - **Enhanced Search Accuracy**: Introduces a hybrid retriever that merges dense and sparse search methodologies, offering more precise results. 30 | 31 | - **Improved Document Processing**: Incorporates advanced ingestion techniques for various document types, enhancing the chatbot's understanding and retrieval capabilities. 32 | 33 | - **Semantic Reranking**: Integrates a reranker to adjust search results based on semantic relevance, ensuring responses align more closely with user queries. 34 | 35 | - **Advanced Query Processing**: Applies multi-step query transformations to break down complex inquiries into manageable parts, ensuring thorough exploration of user intents. 36 | 37 | - **Dynamic Response Generation**: Adopts multiple response synthesis methods, tailoring the chatbot's replies to user needs and ensuring comprehensive and detailed answers. 38 | 39 | This project represents a comprehensive approach to developing a sophisticated chatbot capable of real-time interaction, contextual understanding, and accurate information retrieval, all while maintaining transparency and user trust. 40 | 41 | 42 | ## Roadmap 43 | 44 | The order might change, and points might be added. 45 | 46 | - [x] Chat Streaming 47 | - [X] Memory 48 | - [x] Sources 49 | - [x] Pinecone Pod 50 | - [ ] Pinecone Serverless 51 | - [x] Implementing HybridSearch Retriever 52 | - [x] Implementing better ingestion 53 | - [x] Add evaluation 54 | - [x] Create set of documents and questions for evaluation 55 | - [ ] Trying out agents 56 | - [ ] Prompting 57 | - [x] Trying out Query Transformations 58 | - [ ] Implementing a llm router 59 | - [ ] Trying out GPT as a reranker and comparing it with others 60 | - [ ] Adding Mistral and Llama examples 61 | - [ ] Adding jupyter files to each subproject. 62 | - [x] Intent Detection, using 3.5T for some easy tasks. 63 | -------------------------------------------------------------------------------- /ruff.toml: -------------------------------------------------------------------------------- 1 | # Exclude a variety of commonly ignored directories. 2 | exclude = [ 3 | ".bzr", 4 | ".direnv", 5 | ".eggs", 6 | ".git", 7 | ".git-rewrite", 8 | ".hg", 9 | ".ipynb_checkpoints", 10 | ".mypy_cache", 11 | ".nox", 12 | ".pants.d", 13 | ".pyenv", 14 | ".pytest_cache", 15 | ".pytype", 16 | ".ruff_cache", 17 | ".svn", 18 | ".tox", 19 | ".venv", 20 | ".vscode", 21 | "__pypackages__", 22 | "_build", 23 | "buck-out", 24 | "build", 25 | "dist", 26 | "node_modules", 27 | "site-packages", 28 | "venv", 29 | ] 30 | 31 | # Same as Black. 32 | line-length = 88 33 | indent-width = 4 34 | 35 | # Assume Python 3.11 36 | target-version = "py311" 37 | 38 | [lint] 39 | # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. 40 | # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or 41 | # McCabe complexity (`C901`) by default. 42 | select = ["E4", "E7", "E9", "F"] 43 | ignore = [] 44 | 45 | # Allow fix for all enabled rules (when `--fix`) is provided. 46 | fixable = ["ALL"] 47 | unfixable = [] 48 | 49 | # Allow unused variables when underscore-prefixed. 50 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 51 | 52 | [format] 53 | # Like Black, use double quotes for strings. 54 | quote-style = "double" 55 | 56 | # Like Black, indent with spaces, rather than tabs. 57 | indent-style = "space" 58 | 59 | # Like Black, respect magic trailing commas. 60 | skip-magic-trailing-comma = false 61 | 62 | # Like Black, automatically detect the appropriate line ending. 63 | line-ending = "auto" 64 | 65 | # Enable auto-formatting of code examples in docstrings. Markdown, 66 | # reStructuredText code/literal blocks and doctests are all supported. 67 | # 68 | # This is currently disabled by default, but it is planned for this 69 | # to be opt-out in the future. 70 | docstring-code-format = false 71 | 72 | # Set the line length limit used when formatting code snippets in 73 | # docstrings. 74 | # 75 | # This only has an effect when the `docstring-code-format` setting is 76 | # enabled. 77 | docstring-code-line-length = "dynamic" --------------------------------------------------------------------------------