├── .github
└── workflows
│ └── ruff-action.yml
├── .gitignore
├── 1.Streaming - Memory - Sources
├── .chainlit
│ ├── config.toml
│ ├── configOld.toml
│ └── translations
│ │ ├── de.json
│ │ ├── en-US.json
│ │ └── pt-BR.json
├── .env.example
├── README.md
├── chainlit.md
├── images
│ └── RAG.gif
├── ingest.py
├── main.py
└── requirements.txt
├── 2.Pinecone - HybridRetriever - Adv.Ingestion
├── .chainlit
│ ├── config.toml
│ └── translations
│ │ ├── en-US.json
│ │ └── pt-BR.json
├── .env.example
├── README.md
├── chainlit.md
├── ingest.py
├── main.py
└── requirements.txt
├── 3.Reranker - Q.Transformation - Res.Synthesis
├── .chainlit
│ ├── config.toml
│ └── translations
│ │ ├── en-US.json
│ │ └── pt-BR.json
├── .env.example
├── README.md
├── chainlit.md
├── ingest.py
├── main.py
└── requirements.txt
├── 4.Evaluation - Generation - Optimization
├── .chainlit
│ ├── config.toml
│ └── translations
│ │ ├── en-US.json
│ │ └── pt-BR.json
├── .env.example
├── README.md
├── _evaluations.json
├── benchmark.csv
├── chainlit.md
├── evaluation.py
├── generation.py
├── ingest.py
├── main.py
├── optimization.py
├── rag_evaluator_pack
│ ├── README.md
│ ├── llama_index
│ │ └── packs
│ │ │ └── rag_evaluator
│ │ │ ├── BUILD
│ │ │ ├── __init__.py
│ │ │ └── base.py
│ └── pyproject.toml
└── requirements.txt
├── 5.Intent Detection Agent
├── .chainlit
│ ├── config.toml
│ └── translations
│ │ ├── en-US.json
│ │ └── pt-BR.json
├── .env.example
├── README.md
├── chainlit.md
├── images
│ └── RAGSources.png
├── ingest.py
├── main.py
└── requirements.txt
├── LICENSE.md
├── README.md
└── ruff.toml
/.github/workflows/ruff-action.yml:
--------------------------------------------------------------------------------
1 | name: Ruff
2 | on: [ push, pull_request ]
3 | jobs:
4 | ruff:
5 | runs-on: ubuntu-latest
6 | steps:
7 | - uses: actions/checkout@v4
8 | - uses: chartboost/ruff-action@v1
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.env
2 | *requirements_plain.txt
3 | *__pycache__
4 | **/storage/
5 | *.gitattributes
6 | **/data/*
7 | !**/data/
8 | *.files
9 | 6.*
10 | **/output/*
11 | !**/output/
12 | *questions.json
13 | *_evaluations.json
14 | *benchmark.csv
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/.chainlit/config.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | # Whether to enable telemetry (default: true). No personal data is collected.
3 | enable_telemetry = true
4 |
5 |
6 | # List of environment variables to be provided by each user to use the app.
7 | user_env = []
8 |
9 | # Duration (in seconds) during which the session is saved when the connection is lost
10 | session_timeout = 3600
11 |
12 | # Enable third parties caching (e.g LangChain cache)
13 | cache = false
14 |
15 | # Authorized origins
16 | allow_origins = ["*"]
17 |
18 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
19 | # follow_symlink = false
20 |
21 | [features]
22 | # Show the prompt playground
23 | prompt_playground = true
24 |
25 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
26 | unsafe_allow_html = false
27 |
28 | # Process and display mathematical expressions. This can clash with "$" characters in messages.
29 | latex = false
30 |
31 | # Authorize users to upload files with messages
32 | [features.multi_modal]
33 | enabled = false
34 | accept = ["*/*"]
35 | max_files = 20
36 | max_size_mb = 500
37 |
38 | # Allows user to use speech to text
39 | [features.speech_to_text]
40 | enabled = false
41 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
42 | # language = "en-US"
43 |
44 | [UI]
45 | # Name of the app and chatbot.
46 | name = "Chatbot"
47 |
48 | # Show the readme while the thread is empty.
49 | show_readme_as_default = false
50 |
51 | # Description of the app and chatbot. This is used for HTML tags.
52 | # description = ""
53 |
54 | # Large size content are by default collapsed for a cleaner ui
55 | default_collapse_content = true
56 |
57 | # The default value for the expand messages settings.
58 | default_expand_messages = false
59 |
60 | # Hide the chain of thought details from the user in the UI.
61 | hide_cot = false
62 |
63 | # Link to your github repo. This will add a github button in the UI's header.
64 | github = "https://github.com/felipearosr/GPT-Documents"
65 |
66 | # Specify a CSS file that can be used to customize the user interface.
67 | # The CSS file can be served from the public directory or via an external link.
68 | # custom_css = "/public/test.css"
69 |
70 | # Specify a Javascript file that can be used to customize the user interface.
71 | # The Javascript file can be served from the public directory.
72 | # custom_js = "/public/test.js"
73 |
74 | # Specify a custom font url.
75 | # custom_font = "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap"
76 |
77 | # Specify a custom build directory for the frontend.
78 | # This can be used to customize the frontend code.
79 | # Be careful: If this is a relative path, it should not start with a slash.
80 | # custom_build = "./public/build"
81 |
82 | # Override default MUI light theme. (Check theme.ts)
83 | [UI.theme]
84 | #font_family = "Inter, sans-serif"
85 | [UI.theme.light]
86 | #background = "#FAFAFA"
87 | #paper = "#FFFFFF"
88 |
89 | [UI.theme.light.primary]
90 | #main = "#F80061"
91 | #dark = "#980039"
92 | #light = "#FFE7EB"
93 |
94 | # Override default MUI dark theme. (Check theme.ts)
95 | [UI.theme.dark]
96 | #background = "#000212"
97 | #paper = "#00031E"
98 |
99 | [UI.theme.dark.primary]
100 | #main = "#FFFFFF" #"#F80061"
101 | #dark = "#FFFFFF" #"#980039"
102 | #light = "#FFFFFF" #"#00031E"
103 |
104 |
105 | [meta]
106 | generated_by = "1.0.500"
107 |
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/.chainlit/configOld.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | # Whether to enable telemetry (default: true). No personal data is collected.
3 | enable_telemetry = true
4 |
5 | # List of environment variables to be provided by each user to use the app.
6 | user_env = []
7 |
8 | # Duration (in seconds) during which the session is saved when the connection is lost
9 | session_timeout = 3600
10 |
11 | # Enable third parties caching (e.g LangChain cache)
12 | cache = false
13 |
14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15 | # follow_symlink = false
16 |
17 | [features]
18 | # Show the prompt playground
19 | prompt_playground = true
20 |
21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22 | unsafe_allow_html = false
23 |
24 | # Process and display mathematical expressions. This can clash with "$" characters in messages.
25 | latex = false
26 |
27 | # Authorize users to upload files with messages
28 | multi_modal = false
29 |
30 | # Allows user to use speech to text
31 | [features.speech_to_text]
32 | enabled = false
33 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34 | # language = "en-US"
35 |
36 | [UI]
37 | # Name of the app and chatbot.
38 | name = "Chatbot"
39 |
40 | # Show the readme while the thread is empty.
41 | show_readme_as_default = false
42 |
43 | # Description of the app and chatbot. This is used for HTML tags.
44 | # description = ""
45 |
46 | # Large size content are by default collapsed for a cleaner ui
47 | default_collapse_content = true
48 |
49 | # The default value for the expand messages settings.
50 | default_expand_messages = false
51 |
52 | # Hide the chain of thought details from the user in the UI.
53 | hide_cot = false
54 |
55 | # Link to your github repo. This will add a github button in the UI's header.
56 | github = "https://github.com/felipearosr/GPT-Documents"
57 |
58 | # Specify a CSS file that can be used to customize the user interface.
59 | # The CSS file can be served from the public directory or via an external link.
60 | # custom_css = "/public/test.css"
61 |
62 | # Override default MUI light theme. (Check theme.ts)
63 | [UI.theme.light]
64 | #background = "#FAFAFA"
65 | #paper = "#FFFFFF"
66 |
67 | [UI.theme.light.primary]
68 | #main = "#F80061"
69 | #dark = "#980039"
70 | #light = "#FFE7EB"
71 |
72 | # Override default MUI dark theme. (Check theme.ts)
73 | [UI.theme.dark]
74 | #background = "#FAFAFA"
75 | #paper = "#FFFFFF"
76 |
77 | [UI.theme.dark.primary]
78 | #main = "#F80061"
79 | #dark = "#980039"
80 | #light = "#FFE7EB"
81 |
82 |
83 | [meta]
84 | generated_by = "1.0.101"
85 |
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/.chainlit/translations/de.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Einstellungen",
8 | "settingsKey": "S",
9 | "APIKeys": "API-Schl\u00fcssel",
10 | "logout": "Abmelden"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "Neuer Chat"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Aufgabenliste",
22 | "loading": "L\u00e4dt...",
23 | "error": "Ein Fehler ist aufgetreten"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Upload abbrechen",
28 | "removeAttachment": "Anhang entfernen"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Neuen Chat erstellen?",
32 | "clearChat": "Dies wird die aktuellen Nachrichten l\u00f6schen und einen neuen Chat starten.",
33 | "cancel": "Abbrechen",
34 | "confirm": "Best\u00e4tigen"
35 | },
36 | "settingsModal": {
37 | "settings": "Einstellungen",
38 | "expandMessages": "Nachrichten ausklappen",
39 | "hideChainOfThought": "Zwischenschritte verbergen",
40 | "darkMode": "Dunkelmodus"
41 | }
42 | },
43 | "organisms": {
44 | "chat": {
45 | "history": {
46 | "index": {
47 | "showHistory": "Zeige Chatverlauf",
48 | "lastInputs": "Letzte Eingaben",
49 | "noInputs": "Leer...",
50 | "loading": "L\u00e4dt..."
51 | }
52 | },
53 | "inputBox": {
54 | "input": {
55 | "placeholder": "Nachricht eingeben..."
56 | },
57 | "speechButton": {
58 | "start": "Aufnahme starten",
59 | "stop": "Aufnahme stoppen"
60 | },
61 | "SubmitButton": {
62 | "sendMessage": "Nachricht senden",
63 | "stopTask": "Aufgabe stoppen"
64 | },
65 | "UploadButton": {
66 | "attachFiles": "Dateien anh\u00e4ngen"
67 | },
68 | "waterMark": {
69 | "text": "Erstellt mit"
70 | }
71 | },
72 | "Messages": {
73 | "index": {
74 | "running": "L\u00e4uft",
75 | "executedSuccessfully": "erfolgreich ausgef\u00fchrt",
76 | "failed": "fehlgeschlagen",
77 | "feedbackUpdated": "Feedback aktualisiert",
78 | "updating": "Aktualisiert"
79 | }
80 | },
81 | "dropScreen": {
82 | "dropYourFilesHere": "Ziehe deine Dateien hierher"
83 | },
84 | "index": {
85 | "failedToUpload": "Upload fehlgeschlagen",
86 | "cancelledUploadOf": "Upload abgebrochen von",
87 | "couldNotReachServer": "Konnte den Server nicht erreichen",
88 | "continuingChat": "Vorherigen Chat fortsetzen"
89 | },
90 | "settings": {
91 | "settingsPanel": "Einstellungsfenster",
92 | "reset": "Zur\u00fccksetzen",
93 | "cancel": "Abbrechen",
94 | "confirm": "Best\u00e4tigen"
95 | }
96 | },
97 | "threadHistory": {
98 | "sidebar": {
99 | "filters": {
100 | "FeedbackSelect": {
101 | "feedbackAll": "Feedback: Alle",
102 | "feedbackPositive": "Feedback: Positiv",
103 | "feedbackNegative": "Feedback: Negativ"
104 | },
105 | "SearchBar": {
106 | "search": "Suche"
107 | }
108 | },
109 | "DeleteThreadButton": {
110 | "confirmMessage": "Dies wird den Thread sowie seine Nachrichten und Elemente l\u00f6schen.",
111 | "cancel": "Abbrechen",
112 | "confirm": "Best\u00e4tigen",
113 | "deletingChat": "Chat wird gel\u00f6scht",
114 | "chatDeleted": "Chat gel\u00f6scht"
115 | },
116 | "index": {
117 | "pastChats": "Vergangene Chats"
118 | },
119 | "ThreadList": {
120 | "empty": "Leer...",
121 | "today": "Heute",
122 | "yesterday": "Gestern",
123 | "previous7days": "Vor 7 Tagen",
124 | "previous30days": "Vor 30 Tagen"
125 | },
126 | "TriggerButton": {
127 | "closeSidebar": "Seitenleiste schlie\u00dfen",
128 | "openSidebar": "Seitenleiste \u00f6ffnen"
129 | }
130 | },
131 | "Thread": {
132 | "backToChat": "Zur\u00fcck zum Chat",
133 | "chatCreatedOn": "Dieser Chat wurde erstellt am"
134 | }
135 | },
136 | "header": {
137 | "chat": "Chat",
138 | "readme": "Liesmich"
139 | }
140 | }
141 | },
142 | "hooks": {
143 | "useLLMProviders": {
144 | "failedToFetchProviders": "Anbieter konnten nicht geladen werden:"
145 | }
146 | },
147 | "pages": {
148 | "Design": {},
149 | "Env": {
150 | "savedSuccessfully": "Erfolgreich gespeichert",
151 | "requiredApiKeys": "Ben\u00f6tigte API-Schl\u00fcssel",
152 | "requiredApiKeysInfo": "Um diese App zu nutzen, werden die folgenden API-Schl\u00fcssel ben\u00f6tigt. Die Schl\u00fcssel werden im lokalen Speicher Ihres Ger\u00e4ts gespeichert."
153 | },
154 | "Page": {
155 | "notPartOfProject": "Sie sind nicht Teil dieses Projekts."
156 | },
157 | "ResumeButton": {
158 | "resumeChat": "Chat fortsetzen"
159 | }
160 | }
161 | }
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/.chainlit/translations/en-US.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Settings",
8 | "settingsKey": "S",
9 | "APIKeys": "API Keys",
10 | "logout": "Logout"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "New Chat"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Task List",
22 | "loading": "Loading...",
23 | "error": "An error occured"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Cancel upload",
28 | "removeAttachment": "Remove attachment"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Create new chat?",
32 | "clearChat": "This will clear the current messages and start a new chat.",
33 | "cancel": "Cancel",
34 | "confirm": "Confirm"
35 | },
36 | "settingsModal": {
37 | "settings": "Settings",
38 | "expandMessages": "Expand Messages",
39 | "hideChainOfThought": "Hide Chain of Thought",
40 | "darkMode": "Dark Mode"
41 | },
42 | "detailsButton": {
43 | "using": "Using",
44 | "running": "Running",
45 | "took_one": "Took {{count}} step",
46 | "took_other": "Took {{count}} steps"
47 | },
48 | "auth": {
49 | "authLogin": {
50 | "title": "Login to access the app.",
51 | "form": {
52 | "email": "Email address",
53 | "password": "Password",
54 | "noAccount": "Don't have an account?",
55 | "alreadyHaveAccount": "Already have an account?",
56 | "signup": "Sign Up",
57 | "signin": "Sign In",
58 | "or": "OR",
59 | "continue": "Continue",
60 | "forgotPassword": "Forgot password?",
61 | "passwordMustContain": "Your password must contain:",
62 | "emailRequired": "email is a required field",
63 | "passwordRequired": "password is a required field"
64 | },
65 | "error": {
66 | "default": "Unable to sign in.",
67 | "signin": "Try signing in with a different account.",
68 | "oauthsignin": "Try signing in with a different account.",
69 | "redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.",
70 | "oauthcallbackerror": "Try signing in with a different account.",
71 | "oauthcreateaccount": "Try signing in with a different account.",
72 | "emailcreateaccount": "Try signing in with a different account.",
73 | "callback": "Try signing in with a different account.",
74 | "oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.",
75 | "emailsignin": "The e-mail could not be sent.",
76 | "emailverify": "Please verify your email, a new email has been sent.",
77 | "credentialssignin": "Sign in failed. Check the details you provided are correct.",
78 | "sessionrequired": "Please sign in to access this page."
79 | }
80 | },
81 | "authVerifyEmail": {
82 | "almostThere": "You're almost there! We've sent an email to ",
83 | "verifyEmailLink": "Please click on the link in that email to complete your signup.",
84 | "didNotReceive": "Can't find the email?",
85 | "resendEmail": "Resend email",
86 | "goBack": "Go Back",
87 | "emailSent": "Email sent successfully.",
88 | "verifyEmail": "Verify your email address"
89 | },
90 | "providerButton": {
91 | "continue": "Continue with {{provider}}",
92 | "signup": "Sign up with {{provider}}"
93 | },
94 | "authResetPassword": {
95 | "newPasswordRequired": "New password is a required field",
96 | "passwordsMustMatch": "Passwords must match",
97 | "confirmPasswordRequired": "Confirm password is a required field",
98 | "newPassword": "New password",
99 | "confirmPassword": "Confirm password",
100 | "resetPassword": "Reset Password"
101 | },
102 | "authForgotPassword": {
103 | "email": "Email address",
104 | "emailRequired": "email is a required field",
105 | "emailSent": "Please check the email address {{email}} for instructions to reset your password.",
106 | "enterEmail": "Enter your email address and we will send you instructions to reset your password.",
107 | "resendEmail": "Resend email",
108 | "continue": "Continue",
109 | "goBack": "Go Back"
110 | }
111 | }
112 | },
113 | "organisms": {
114 | "chat": {
115 | "history": {
116 | "index": {
117 | "showHistory": "Show history",
118 | "lastInputs": "Last Inputs",
119 | "noInputs": "Such empty...",
120 | "loading": "Loading..."
121 | }
122 | },
123 | "inputBox": {
124 | "input": {
125 | "placeholder": "Type your message here..."
126 | },
127 | "speechButton": {
128 | "start": "Start recording",
129 | "stop": "Stop recording"
130 | },
131 | "SubmitButton": {
132 | "sendMessage": "Send message",
133 | "stopTask": "Stop Task"
134 | },
135 | "UploadButton": {
136 | "attachFiles": "Attach files"
137 | },
138 | "waterMark": {
139 | "text": "Built with"
140 | }
141 | },
142 | "Messages": {
143 | "index": {
144 | "running": "Running",
145 | "executedSuccessfully": "executed successfully",
146 | "failed": "failed",
147 | "feedbackUpdated": "Feedback updated",
148 | "updating": "Updating"
149 | }
150 | },
151 | "dropScreen": {
152 | "dropYourFilesHere": "Drop your files here"
153 | },
154 | "index": {
155 | "failedToUpload": "Failed to upload",
156 | "cancelledUploadOf": "Cancelled upload of",
157 | "couldNotReachServer": "Could not reach the server",
158 | "continuingChat": "Continuing previous chat"
159 | },
160 | "settings": {
161 | "settingsPanel": "Settings panel",
162 | "reset": "Reset",
163 | "cancel": "Cancel",
164 | "confirm": "Confirm"
165 | }
166 | },
167 | "threadHistory": {
168 | "sidebar": {
169 | "filters": {
170 | "FeedbackSelect": {
171 | "feedbackAll": "Feedback: All",
172 | "feedbackPositive": "Feedback: Positive",
173 | "feedbackNegative": "Feedback: Negative"
174 | },
175 | "SearchBar": {
176 | "search": "Search"
177 | }
178 | },
179 | "DeleteThreadButton": {
180 | "confirmMessage": "This will delete the thread as well as it's messages and elements.",
181 | "cancel": "Cancel",
182 | "confirm": "Confirm",
183 | "deletingChat": "Deleting chat",
184 | "chatDeleted": "Chat deleted"
185 | },
186 | "index": {
187 | "pastChats": "Past Chats"
188 | },
189 | "ThreadList": {
190 | "empty": "Empty...",
191 | "today": "Today",
192 | "yesterday": "Yesterday",
193 | "previous7days": "Previous 7 days",
194 | "previous30days": "Previous 30 days"
195 | },
196 | "TriggerButton": {
197 | "closeSidebar": "Close sidebar",
198 | "openSidebar": "Open sidebar"
199 | }
200 | },
201 | "Thread": {
202 | "backToChat": "Go back to chat",
203 | "chatCreatedOn": "This chat was created on"
204 | }
205 | },
206 | "header": {
207 | "chat": "Chat",
208 | "readme": "Readme"
209 | }
210 | }
211 | },
212 | "hooks": {
213 | "useLLMProviders": {
214 | "failedToFetchProviders": "Failed to fetch providers:"
215 | }
216 | },
217 | "pages": {
218 | "Design": {},
219 | "Env": {
220 | "savedSuccessfully": "Saved successfully",
221 | "requiredApiKeys": "Required API Keys",
222 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
223 | },
224 | "Page": {
225 | "notPartOfProject": "You are not part of this project."
226 | },
227 | "ResumeButton": {
228 | "resumeChat": "Resume Chat"
229 | }
230 | }
231 | }
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/.chainlit/translations/pt-BR.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Configura\u00e7\u00f5es",
8 | "settingsKey": "S",
9 | "APIKeys": "Chaves de API",
10 | "logout": "Sair"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "Nova Conversa"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas",
22 | "loading": "Carregando...",
23 | "error": "Ocorreu um erro"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Cancelar envio",
28 | "removeAttachment": "Remover anexo"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Criar novo chat?",
32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.",
33 | "cancel": "Cancelar",
34 | "confirm": "Confirmar"
35 | },
36 | "settingsModal": {
37 | "settings": "Configura\u00e7\u00f5es",
38 | "expandMessages": "Expandir Mensagens",
39 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento",
40 | "darkMode": "Modo Escuro"
41 | }
42 | },
43 | "organisms": {
44 | "chat": {
45 | "history": {
46 | "index": {
47 | "showHistory": "Mostrar hist\u00f3rico",
48 | "lastInputs": "\u00daltimas Entradas",
49 | "noInputs": "Vazio...",
50 | "loading": "Carregando..."
51 | }
52 | },
53 | "inputBox": {
54 | "input": {
55 | "placeholder": "Digite sua mensagem aqui..."
56 | },
57 | "speechButton": {
58 | "start": "Iniciar grava\u00e7\u00e3o",
59 | "stop": "Parar grava\u00e7\u00e3o"
60 | },
61 | "SubmitButton": {
62 | "sendMessage": "Enviar mensagem",
63 | "stopTask": "Parar Tarefa"
64 | },
65 | "UploadButton": {
66 | "attachFiles": "Anexar arquivos"
67 | },
68 | "waterMark": {
69 | "text": "Constru\u00eddo com"
70 | }
71 | },
72 | "Messages": {
73 | "index": {
74 | "running": "Executando",
75 | "executedSuccessfully": "executado com sucesso",
76 | "failed": "falhou",
77 | "feedbackUpdated": "Feedback atualizado",
78 | "updating": "Atualizando"
79 | }
80 | },
81 | "dropScreen": {
82 | "dropYourFilesHere": "Solte seus arquivos aqui"
83 | },
84 | "index": {
85 | "failedToUpload": "Falha ao enviar",
86 | "cancelledUploadOf": "Envio cancelado de",
87 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor",
88 | "continuingChat": "Continuando o chat anterior"
89 | },
90 | "settings": {
91 | "settingsPanel": "Painel de Configura\u00e7\u00f5es",
92 | "reset": "Redefinir",
93 | "cancel": "Cancelar",
94 | "confirm": "Confirmar"
95 | }
96 | },
97 | "threadHistory": {
98 | "sidebar": {
99 | "filters": {
100 | "FeedbackSelect": {
101 | "feedbackAll": "Feedback: Todos",
102 | "feedbackPositive": "Feedback: Positivo",
103 | "feedbackNegative": "Feedback: Negativo"
104 | },
105 | "SearchBar": {
106 | "search": "Buscar"
107 | }
108 | },
109 | "DeleteThreadButton": {
110 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.",
111 | "cancel": "Cancelar",
112 | "confirm": "Confirmar",
113 | "deletingChat": "Deletando conversa",
114 | "chatDeleted": "Conversa deletada"
115 | },
116 | "index": {
117 | "pastChats": "Conversas Anteriores"
118 | },
119 | "ThreadList": {
120 | "empty": "Vazio..."
121 | },
122 | "TriggerButton": {
123 | "closeSidebar": "Fechar barra lateral",
124 | "openSidebar": "Abrir barra lateral"
125 | }
126 | },
127 | "Thread": {
128 | "backToChat": "Voltar para a conversa",
129 | "chatCreatedOn": "Esta conversa foi criada em"
130 | }
131 | },
132 | "header": {
133 | "chat": "Conversa",
134 | "readme": "Leia-me"
135 | }
136 | },
137 | "hooks": {
138 | "useLLMProviders": {
139 | "failedToFetchProviders": "Falha ao buscar provedores:"
140 | }
141 | },
142 | "pages": {
143 | "Design": {},
144 | "Env": {
145 | "savedSuccessfully": "Salvo com sucesso",
146 | "requiredApiKeys": "Chaves de API necess\u00e1rias",
147 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo."
148 | },
149 | "Page": {
150 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto."
151 | },
152 | "ResumeButton": {
153 | "resumeChat": "Continuar Conversa"
154 | }
155 | }
156 | }
157 | }
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY="sk-..."
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/README.md:
--------------------------------------------------------------------------------
1 | # ChatBot with streaming, memory and sources
2 |
3 | Embarking on the creation of an advanced Retrieval-Augmented Generation (RAG) system marks a significant first step towards innovative chatbot development. This foundational version incorporates three critical features:
4 |
5 | - **Streaming:** Enhance user experience with fast, real-time answers as the chatbot generates responses on-the-fly, reducing wait times.
6 | - **Memory:** Facilitate natural, conversational interactions by enabling the chatbot to recall previous parts of the conversation, adding context and relevance to the dialogue.
7 | - **Sources:** Increase transparency and trust by clearly indicating the origin of the chatbot's answers, allowing users to understand where the information is coming from.
8 |
9 | These functionalities are powered by technologies like the Llama-index and Chainlit, setting the stage for a more intuitive, responsive, and informed chatbot experience.
10 |
11 | 
12 |
13 |
14 | ## Table of Contents
15 |
16 | 1. [Installation](#installation")
17 | 2. [Usage](#usage)
18 | 3. [Streaming](#streaming)
19 | 4. [Memory](#memory)
20 | 5. [Sources](#sources)
21 | 6. [Improvements](#improvements)
22 |
23 |
24 | ## Installation
25 |
26 | Follow these steps to set up the GPT Documents chatbot on your local machine:
27 |
28 | 1. Create a conda environment:
29 |
30 | ```shell
31 | conda create -n rag python==3.11 -y && source activate rag
32 | ```
33 |
34 | 2. Install the required dependencies:
35 |
36 | ```shell
37 | pip install -r requirements.txt
38 | ```
39 |
40 | 3. Load your documents into the vector store by:
41 | - Create a folder named `data`.
42 | - Place your documents inside the `data` folder.
43 | - Execute the `ingest.py` script to initiate the loading process.
44 |
45 | ## Usage
46 |
47 | Once the setup is complete, launch the chainlit app using the following command:
48 |
49 | ```shell
50 | chainlit run main.py
51 | ```
52 |
53 | ## Streaming
54 |
55 | ### Understanding Streaming in LLMs
56 |
57 | Streaming is a feature that enables real-time delivery of responses from the language learning model (LLM) as they are being generated. This process significantly reduces response latency by allowing immediate display of each part of the answer, token by token, as it is streamed from the LLM. This means users do not have to wait for the entire response to be composed and sent before beginning to read the answer, facilitating a smoother and faster interaction.
58 |
59 |
60 | ### How do we implement it?
61 |
62 | ```python
63 | @cl.on_chat_start
64 | async def start():
65 | # we simply add `streaming=True` to the OpenAI settings
66 | Settings.llm = OpenAI(
67 | model="gpt-3.5-turbo", temperature=0.1, max_tokens=1024, streaming=True
68 | )
69 | # ...
70 | ```
71 |
72 | ```python
73 | @cl.on_message
74 | async def main(message: cl.Message):
75 | # ...
76 | # we need to make the response an async function
77 | response = await cl.make_async(query_engine.query)(prompt_template)
78 |
79 | # now we stream the tokens into chainlit
80 | for token in response.response_gen:
81 | await response_message.stream_token(token)
82 | if response.response_txt:
83 | response_message.content = response.response_txt
84 | await response_message.send()
85 | # ...
86 | ```
87 |
88 |
89 |
90 | ## Memory
91 |
92 | ### Exploring Memory in LLMs
93 |
94 | Memory in llms is a feature we integrate to enhance their ability to maintain and recall the history of interactions with users. This functionality enriches the conversational experience by allowing the model to reference previous exchanges and build on them, creating a more coherent and contextually relevant dialogue.
95 |
96 | ### How do we implement it?
97 | ```python
98 | @cl.on_chat_start
99 | async def start():
100 | # ...
101 | # create an empty list to store the message history
102 | message_history = []
103 | # set message_history to user_session
104 | cl.user_session.set("message_history", message_history)
105 | # ...
106 | ```
107 |
108 | ```python
109 | @cl.on_message
110 | async def main(message: cl.Message):
111 | # get message_history from user_session
112 | message_history = cl.user_session.get("message_history")
113 | prompt_template = "Previous messages:\n"
114 | # ...
115 | user_message = message.content
116 |
117 | # fills prompt with the messages
118 | for message in message_history:
119 | prompt_template += f"{message['author']}: {message['content']}\n"
120 | prompt_template += f"Human: {user_message}"
121 | # ...
122 | # we add both the user_message and the response_message to the message_history
123 | message_history.append({"author": "Human", "content": user_message})
124 | message_history.append({"author": "AI", "content": response_message.content})
125 | # limits the memory to only the last 2 queries and responses
126 | message_history = message_history[-4:]
127 | # finally we set the filled message_history into the user_session
128 | cl.user_session.set("message_history", message_history)
129 | # ...
130 | ```
131 |
132 | ## Sources
133 |
134 | ### What are Sources?
135 |
136 | Sources refer to the documents or materials returned by the retrieval system, which provide the foundation for the answers to your queries. They offer a transparent way to verify the origin of the information used by the language model to generate its responses.
137 |
138 | ### How do we implement it?
139 |
140 | This is a basic implementation of sources, you can also separate them by file types, using the metadata of the source_nodes.
141 |
142 | ```python
143 | async def set_sources(response, response_message):
144 | label_list = []
145 | count = 1
146 |
147 | # we run through all the source_nodes of the response
148 | for sr in response.source_nodes:
149 | elements = [
150 | # we put this sources into a chainlit element, in this case Text,
151 | # it can also be PDF or other elements available in chainlit.
152 | cl.Text(
153 | name="S" + str(count),
154 | content=f"{sr.node.text}",
155 | display="side",
156 | size="small",
157 | )
158 | ]
159 | response_message.elements = elements
160 | label_list.append("S" + str(count))
161 | await response_message.update()
162 | count += 1
163 | response_message.content += "\n\nSources: " + ", ".join(label_list)
164 | # we update the response_message so that this sources are displayed in chainlit
165 | await response_message.update()
166 | ```
167 |
168 | ## Improvements
169 |
170 | Adding the callback manager from chainlit. Right now is broken since the llama-index v0.10 update.
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/chainlit.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/1.Streaming - Memory - Sources/chainlit.md
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/images/RAG.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/1.Streaming - Memory - Sources/images/RAG.gif
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/ingest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
4 | from dotenv import load_dotenv
5 |
6 | load_dotenv()
7 | openai.api_key = os.environ.get("OPENAI_API_KEY")
8 |
9 | documents = SimpleDirectoryReader("./data").load_data(show_progress=True)
10 | index = VectorStoreIndex.from_documents(documents)
11 | index.storage_context.persist()
12 |
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import chainlit as cl
4 |
5 | from llama_index.core import Settings, load_index_from_storage, StorageContext
6 | from llama_index.llms.openai import OpenAI
7 | from llama_index.core.callbacks import CallbackManager
8 | from llama_index.embeddings.openai import OpenAIEmbedding
9 | from llama_index.core.service_context import ServiceContext
10 |
11 | openai.api_key = os.environ.get("OPENAI_API_KEY")
12 |
13 |
14 | @cl.cache
15 | def load_context():
16 | storage_context = StorageContext.from_defaults(
17 | persist_dir="./storage",
18 | )
19 | index = load_index_from_storage(storage_context)
20 | return index
21 |
22 |
23 | @cl.on_chat_start
24 | async def start():
25 | index = load_context()
26 |
27 | Settings.llm = OpenAI(
28 | model="gpt-3.5-turbo", temperature=0.1, max_tokens=1024, streaming=True
29 | )
30 | Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
31 | Settings.context_window = 4096
32 | Settings.callback_manager = CallbackManager([cl.LlamaIndexCallbackHandler()])
33 |
34 | service_context = ServiceContext.from_defaults()
35 | query_engine = index.as_query_engine(
36 | streaming=True, similarity_top_k=2, service_context=service_context
37 | )
38 | cl.user_session.set("query_engine", query_engine)
39 |
40 | message_history = []
41 | cl.user_session.set("message_history", message_history)
42 |
43 | await cl.Message(
44 | author="Assistant", content="Hello! Im an AI assistant. How may I help you?"
45 | ).send()
46 |
47 |
48 | async def set_sources(response, response_message):
49 | label_list = []
50 | count = 1
51 | for sr in response.source_nodes:
52 | elements = [
53 | cl.Text(
54 | name="S" + str(count),
55 | content=f"{sr.node.text}",
56 | display="side",
57 | size="small",
58 | )
59 | ]
60 | response_message.elements = elements
61 | label_list.append("S" + str(count))
62 | await response_message.update()
63 | count += 1
64 | response_message.content += "\n\nSources: " + ", ".join(label_list)
65 | await response_message.update()
66 |
67 |
68 | @cl.on_message
69 | async def main(message: cl.Message):
70 | query_engine = cl.user_session.get("query_engine")
71 | message_history = cl.user_session.get("message_history")
72 | prompt_template = "Previous messages:\n"
73 |
74 | response_message = cl.Message(content="", author="Assistant")
75 |
76 | user_message = message.content
77 |
78 | for message in message_history:
79 | prompt_template += f"{message['author']}: {message['content']}\n"
80 | prompt_template += f"Human: {user_message}"
81 |
82 | response = await cl.make_async(query_engine.query)(prompt_template)
83 |
84 | for token in response.response_gen:
85 | await response_message.stream_token(token)
86 | if response.response_txt:
87 | response_message.content = response.response_txt
88 | await response_message.send()
89 |
90 | message_history.append({"author": "Human", "content": user_message})
91 | message_history.append({"author": "AI", "content": response_message.content})
92 | message_history = message_history[-4:]
93 | cl.user_session.set("message_history", message_history)
94 |
95 | if response.source_nodes:
96 | await set_sources(response, response_message)
97 |
--------------------------------------------------------------------------------
/1.Streaming - Memory - Sources/requirements.txt:
--------------------------------------------------------------------------------
1 | aiofiles==23.2.1
2 | aiohttp==3.9.3
3 | aiosignal==1.3.1
4 | annotated-types==0.6.0
5 | anyio==3.7.1
6 | asyncer==0.0.2
7 | attrs==23.2.0
8 | beautifulsoup4==4.12.3
9 | bidict==0.23.1
10 | certifi==2024.2.2
11 | chainlit==1.0.500
12 | charset-normalizer==3.3.2
13 | chevron==0.14.0
14 | click==8.1.7
15 | dataclasses-json==0.5.14
16 | Deprecated==1.2.14
17 | dirtyjson==1.0.8
18 | distro==1.9.0
19 | fastapi==0.108.0
20 | fastapi-socketio==0.0.10
21 | filetype==1.2.0
22 | frozenlist==1.4.1
23 | fsspec==2024.3.1
24 | googleapis-common-protos==1.63.0
25 | greenlet==3.0.3
26 | grpcio==1.62.1
27 | h11==0.14.0
28 | httpcore==1.0.5
29 | httpx==0.27.0
30 | idna==3.6
31 | importlib-metadata==7.0.0
32 | joblib==1.3.2
33 | Lazify==0.4.0
34 | literalai==0.0.401
35 | llama-index==0.10.26
36 | llama-index-agent-openai==0.2.1
37 | llama-index-cli==0.1.11
38 | llama-index-core==0.10.26
39 | llama-index-embeddings-openai==0.1.7
40 | llama-index-indices-managed-llama-cloud==0.1.5
41 | llama-index-legacy==0.9.48
42 | llama-index-llms-openai==0.1.14
43 | llama-index-multi-modal-llms-openai==0.1.4
44 | llama-index-program-openai==0.1.5
45 | llama-index-question-gen-openai==0.1.3
46 | llama-index-readers-file==0.1.13
47 | llama-index-readers-llama-parse==0.1.4
48 | llama-parse==0.4.0
49 | llamaindex-py-client==0.1.15
50 | marshmallow==3.21.1
51 | multidict==6.0.5
52 | mypy-extensions==1.0.0
53 | nest-asyncio==1.6.0
54 | networkx==3.2.1
55 | nltk==3.8.1
56 | numpy==1.26.4
57 | openai==1.16.1
58 | opentelemetry-api==1.24.0
59 | opentelemetry-exporter-otlp==1.24.0
60 | opentelemetry-exporter-otlp-proto-common==1.24.0
61 | opentelemetry-exporter-otlp-proto-grpc==1.24.0
62 | opentelemetry-exporter-otlp-proto-http==1.24.0
63 | opentelemetry-instrumentation==0.45b0
64 | opentelemetry-proto==1.24.0
65 | opentelemetry-sdk==1.24.0
66 | opentelemetry-semantic-conventions==0.45b0
67 | packaging==23.2
68 | pandas==2.2.1
69 | pillow==10.3.0
70 | protobuf==4.25.3
71 | pydantic==2.6.4
72 | pydantic_core==2.16.3
73 | PyJWT==2.8.0
74 | PyMuPDF==1.24.1
75 | PyMuPDFb==1.24.1
76 | pypdf==4.1.0
77 | python-dateutil==2.9.0.post0
78 | python-dotenv==1.0.1
79 | python-engineio==4.9.0
80 | python-graphql-client==0.4.3
81 | python-multipart==0.0.9
82 | python-socketio==5.11.2
83 | pytz==2024.1
84 | PyYAML==6.0.1
85 | regex==2023.12.25
86 | requests==2.31.0
87 | simple-websocket==1.0.0
88 | six==1.16.0
89 | sniffio==1.3.1
90 | soupsieve==2.5
91 | SQLAlchemy==2.0.29
92 | starlette==0.32.0.post1
93 | striprtf==0.0.26
94 | syncer==2.0.3
95 | tenacity==8.2.3
96 | tiktoken==0.6.0
97 | tomli==2.0.1
98 | tqdm==4.66.2
99 | typing-inspect==0.9.0
100 | typing_extensions==4.10.0
101 | tzdata==2024.1
102 | uptrace==1.22.0
103 | urllib3==2.2.1
104 | uvicorn==0.25.0
105 | watchfiles==0.20.0
106 | websockets==12.0
107 | wrapt==1.16.0
108 | wsproto==1.2.0
109 | yarl==1.9.4
110 | zipp==3.18.1
--------------------------------------------------------------------------------
/2.Pinecone - HybridRetriever - Adv.Ingestion/.chainlit/config.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | # Whether to enable telemetry (default: true). No personal data is collected.
3 | enable_telemetry = true
4 |
5 | # List of environment variables to be provided by each user to use the app.
6 | user_env = []
7 |
8 | # Duration (in seconds) during which the session is saved when the connection is lost
9 | session_timeout = 3600
10 |
11 | # Enable third parties caching (e.g LangChain cache)
12 | cache = false
13 |
14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15 | # follow_symlink = false
16 |
17 | [features]
18 | # Show the prompt playground
19 | prompt_playground = true
20 |
21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22 | unsafe_allow_html = false
23 |
24 | # Process and display mathematical expressions. This can clash with "$" characters in messages.
25 | latex = false
26 |
27 | # Authorize users to upload files with messages
28 | multi_modal = false
29 |
30 | # Allows user to use speech to text
31 | [features.speech_to_text]
32 | enabled = false
33 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34 | # language = "en-US"
35 |
36 | [UI]
37 | # Name of the app and chatbot.
38 | name = "Chatbot"
39 |
40 | # Show the readme while the thread is empty.
41 | show_readme_as_default = false
42 |
43 | # Description of the app and chatbot. This is used for HTML tags.
44 | # description = ""
45 |
46 | # Large size content are by default collapsed for a cleaner ui
47 | default_collapse_content = true
48 |
49 | # The default value for the expand messages settings.
50 | default_expand_messages = false
51 |
52 | # Hide the chain of thought details from the user in the UI.
53 | hide_cot = false
54 |
55 | # Link to your github repo. This will add a github button in the UI's header.
56 | github = "https://github.com/felipearosr/GPT-Documents"
57 |
58 | # Specify a CSS file that can be used to customize the user interface.
59 | # The CSS file can be served from the public directory or via an external link.
60 | # custom_css = "/public/test.css"
61 |
62 | # Override default MUI light theme. (Check theme.ts)
63 | [UI.theme.light]
64 | #background = "#FAFAFA"
65 | #paper = "#FFFFFF"
66 |
67 | [UI.theme.light.primary]
68 | #main = "#F80061"
69 | #dark = "#980039"
70 | #light = "#FFE7EB"
71 |
72 | # Override default MUI dark theme. (Check theme.ts)
73 | [UI.theme.dark]
74 | #background = "#FAFAFA"
75 | #paper = "#FFFFFF"
76 |
77 | [UI.theme.dark.primary]
78 | #main = "#F80061"
79 | #dark = "#980039"
80 | #light = "#FFE7EB"
81 |
82 |
83 | [meta]
84 | generated_by = "1.0.101"
85 |
--------------------------------------------------------------------------------
/2.Pinecone - HybridRetriever - Adv.Ingestion/.chainlit/translations/en-US.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Settings",
8 | "settingsKey": "S",
9 | "APIKeys": "API Keys",
10 | "logout": "Logout"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "New Chat"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Task List",
22 | "loading": "Loading...",
23 | "error": "An error occured"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Cancel upload",
28 | "removeAttachment": "Remove attachment"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Create new chat?",
32 | "clearChat": "This will clear the current messages and start a new chat.",
33 | "cancel": "Cancel",
34 | "confirm": "Confirm"
35 | },
36 | "settingsModal": {
37 | "expandMessages": "Expand Messages",
38 | "hideChainOfThought": "Hide Chain of Thought",
39 | "darkMode": "Dark Mode"
40 | }
41 | },
42 | "organisms": {
43 | "chat": {
44 | "history": {
45 | "index": {
46 | "lastInputs": "Last Inputs",
47 | "noInputs": "Such empty...",
48 | "loading": "Loading..."
49 | }
50 | },
51 | "inputBox": {
52 | "input": {
53 | "placeholder": "Type your message here..."
54 | },
55 | "speechButton": {
56 | "start": "Start recording",
57 | "stop": "Stop recording"
58 | },
59 | "SubmitButton": {
60 | "sendMessage": "Send message",
61 | "stopTask": "Stop Task"
62 | },
63 | "UploadButton": {
64 | "attachFiles": "Attach files"
65 | },
66 | "waterMark": {
67 | "text": "Built with"
68 | }
69 | },
70 | "Messages": {
71 | "index": {
72 | "running": "Running",
73 | "executedSuccessfully": "executed successfully",
74 | "failed": "failed",
75 | "feedbackUpdated": "Feedback updated",
76 | "updating": "Updating"
77 | }
78 | },
79 | "dropScreen": {
80 | "dropYourFilesHere": "Drop your files here"
81 | },
82 | "index": {
83 | "failedToUpload": "Failed to upload",
84 | "cancelledUploadOf": "Cancelled upload of",
85 | "couldNotReachServer": "Could not reach the server",
86 | "continuingChat": "Continuing previous chat"
87 | },
88 | "settings": {
89 | "settingsPanel": "Settings panel",
90 | "reset": "Reset",
91 | "cancel": "Cancel",
92 | "confirm": "Confirm"
93 | }
94 | },
95 | "threadHistory": {
96 | "sidebar": {
97 | "filters": {
98 | "FeedbackSelect": {
99 | "feedbackAll": "Feedback: All",
100 | "feedbackPositive": "Feedback: Positive",
101 | "feedbackNegative": "Feedback: Negative"
102 | },
103 | "SearchBar": {
104 | "search": "Search"
105 | }
106 | },
107 | "DeleteThreadButton": {
108 | "confirmMessage": "This will delete the thread as well as it's messages and elements.",
109 | "cancel": "Cancel",
110 | "confirm": "Confirm",
111 | "deletingChat": "Deleting chat",
112 | "chatDeleted": "Chat deleted"
113 | },
114 | "index": {
115 | "pastChats": "Past Chats"
116 | },
117 | "ThreadList": {
118 | "empty": "Empty..."
119 | },
120 | "TriggerButton": {
121 | "closeSidebar": "Close sidebar",
122 | "openSidebar": "Open sidebar"
123 | }
124 | },
125 | "Thread": {
126 | "backToChat": "Go back to chat",
127 | "chatCreatedOn": "This chat was created on"
128 | }
129 | },
130 | "header": {
131 | "chat": "Chat",
132 | "readme": "Readme"
133 | }
134 | }
135 | },
136 | "hooks": {
137 | "useLLMProviders": {
138 | "failedToFetchProviders": "Failed to fetch providers:"
139 | }
140 | },
141 | "pages": {
142 | "Design": {},
143 | "Env": {
144 | "savedSuccessfully": "Saved successfully",
145 | "requiredApiKeys": "Required API Keys",
146 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
147 | },
148 | "Page": {
149 | "notPartOfProject": "You are not part of this project."
150 | },
151 | "ResumeButton": {
152 | "resumeChat": "Resume Chat"
153 | }
154 | }
155 | }
--------------------------------------------------------------------------------
/2.Pinecone - HybridRetriever - Adv.Ingestion/.chainlit/translations/pt-BR.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Configura\u00e7\u00f5es",
8 | "settingsKey": "S",
9 | "APIKeys": "Chaves de API",
10 | "logout": "Sair"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "Nova Conversa"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas",
22 | "loading": "Carregando...",
23 | "error": "Ocorreu um erro"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Cancelar envio",
28 | "removeAttachment": "Remover anexo"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Criar novo chat?",
32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.",
33 | "cancel": "Cancelar",
34 | "confirm": "Confirmar"
35 | },
36 | "settingsModal": {
37 | "expandMessages": "Expandir Mensagens",
38 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento",
39 | "darkMode": "Modo Escuro"
40 | }
41 | },
42 | "organisms": {
43 | "chat": {
44 | "history": {
45 | "index": {
46 | "lastInputs": "\u00daltimas Entradas",
47 | "noInputs": "Vazio...",
48 | "loading": "Carregando..."
49 | }
50 | },
51 | "inputBox": {
52 | "input": {
53 | "placeholder": "Digite sua mensagem aqui..."
54 | },
55 | "speechButton": {
56 | "start": "Iniciar grava\u00e7\u00e3o",
57 | "stop": "Parar grava\u00e7\u00e3o"
58 | },
59 | "SubmitButton": {
60 | "sendMessage": "Enviar mensagem",
61 | "stopTask": "Parar Tarefa"
62 | },
63 | "UploadButton": {
64 | "attachFiles": "Anexar arquivos"
65 | },
66 | "waterMark": {
67 | "text": "Constru\u00eddo com"
68 | }
69 | },
70 | "Messages": {
71 | "index": {
72 | "running": "Executando",
73 | "executedSuccessfully": "executado com sucesso",
74 | "failed": "falhou",
75 | "feedbackUpdated": "Feedback atualizado",
76 | "updating": "Atualizando"
77 | }
78 | },
79 | "dropScreen": {
80 | "dropYourFilesHere": "Solte seus arquivos aqui"
81 | },
82 | "index": {
83 | "failedToUpload": "Falha ao enviar",
84 | "cancelledUploadOf": "Envio cancelado de",
85 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor",
86 | "continuingChat": "Continuando o chat anterior"
87 | },
88 | "settings": {
89 | "settingsPanel": "Painel de Configura\u00e7\u00f5es",
90 | "reset": "Redefinir",
91 | "cancel": "Cancelar",
92 | "confirm": "Confirmar"
93 | }
94 | },
95 | "threadHistory": {
96 | "sidebar": {
97 | "filters": {
98 | "FeedbackSelect": {
99 | "feedbackAll": "Feedback: Todos",
100 | "feedbackPositive": "Feedback: Positivo",
101 | "feedbackNegative": "Feedback: Negativo"
102 | },
103 | "SearchBar": {
104 | "search": "Buscar"
105 | }
106 | },
107 | "DeleteThreadButton": {
108 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.",
109 | "cancel": "Cancelar",
110 | "confirm": "Confirmar",
111 | "deletingChat": "Deletando conversa",
112 | "chatDeleted": "Conversa deletada"
113 | },
114 | "index": {
115 | "pastChats": "Conversas Anteriores"
116 | },
117 | "ThreadList": {
118 | "empty": "Vazio..."
119 | },
120 | "TriggerButton": {
121 | "closeSidebar": "Fechar barra lateral",
122 | "openSidebar": "Abrir barra lateral"
123 | }
124 | },
125 | "Thread": {
126 | "backToChat": "Voltar para a conversa",
127 | "chatCreatedOn": "Esta conversa foi criada em"
128 | }
129 | },
130 | "header": {
131 | "chat": "Conversa",
132 | "readme": "Leia-me"
133 | }
134 | },
135 | "hooks": {
136 | "useLLMProviders": {
137 | "failedToFetchProviders": "Falha ao buscar provedores:"
138 | }
139 | },
140 | "pages": {
141 | "Design": {},
142 | "Env": {
143 | "savedSuccessfully": "Salvo com sucesso",
144 | "requiredApiKeys": "Chaves de API necess\u00e1rias",
145 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo."
146 | },
147 | "Page": {
148 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto."
149 | },
150 | "ResumeButton": {
151 | "resumeChat": "Continuar Conversa"
152 | }
153 | }
154 | }
155 | }
--------------------------------------------------------------------------------
/2.Pinecone - HybridRetriever - Adv.Ingestion/.env.example:
--------------------------------------------------------------------------------
1 | # Mandatory
2 | OPENAI_API_KEY="sk-..."
3 | PINECONE_API_KEY="..." # for vector database
4 | LLAMA_PARSE_API_KEY="llx-..." # for pdf ingestion
5 |
6 | # Optional
7 | MODEL="gpt-4-0125-preview"
8 | EMBEDDING="text-embedding-3-large"
--------------------------------------------------------------------------------
/2.Pinecone - HybridRetriever - Adv.Ingestion/README.md:
--------------------------------------------------------------------------------
1 | # Adding vector db, hybrid retriever and improved ingestion
2 |
3 | In this updated version, we've added three major features to enhance the repository:
4 |
5 | 1. **Vector DB Integration**: We've integrated Pinecone for efficient vector data handling, improving real-time analysis and insight extraction.
6 |
7 | 2. **Hybrid Retriever**: Implements a blend of dense and sparse vector methods, enhancing search accuracy and relevance.
8 |
9 | 3. **Advanced Ingestion**: Employs specialized techniques like Unstructured for general documents and LLM Sherpa for PDFs, plus metadata enhancement to improve document retrievability and context for LLMs.
10 |
11 | 
12 |
13 | ## Table of Contents
14 |
15 | 1. [Installation Instructions](#installation-instructions)
16 | 2. [Usage](#usage)
17 | 3. [Pinecone](#pinecone)
18 | 4. [Hybrid Retriever](#hybrid-retriever)
19 | 5. [Advanced Ingestion](#advanced-ingestion)
20 |
21 | ## Installation Instructions
22 |
23 | Follow these steps to set up the GPT Documents chatbot on your local machine:
24 |
25 | 1. Create a conda environment:
26 |
27 | ```shell
28 | conda create -n rag python==3.11 -y && source activate rag
29 | ```
30 |
31 | 2. Install the required dependencies:
32 |
33 | ```shell
34 | pip install -r requirements.txt
35 | ```
36 |
37 | 3. Load your documents into the vector store by:
38 | - Create a folder named `data`.
39 | - Place your documents inside the `data` folder.
40 | - Execute the `ingest.py` script to initiate the loading process.
41 |
42 | ## Usage
43 |
44 | Once the setup is complete, launch the chainlit app using the following command:
45 |
46 | ```shell
47 | chainlit run -w main.py
48 | ```
49 |
50 | Feel free to explore the functionalities and contribute to the development of this project. Your feedback and contributions are highly appreciated!
51 |
52 | ## Pinecone
53 |
54 | ### What is Pinecone?
55 |
56 | Pinecone is a specialized vector database designed to optimize the storage and querying of vector embeddings. This capability enables efficient real-time analysis and extraction of insights from complex, large-scale data. Its architecture is specifically tuned for handling the intricacies of vector data, making it an ideal choice for applications requiring rapid retrieval and analysis of such information.
57 |
58 | In the provided example, Pinecone is utilized to create a hybrid index, which is a critical component for a hybrid retriever system. This system leverages both textual and vector-based data to enhance search and retrieval capabilities. While Pinecone is highlighted for its effective handling of vector embeddings and support for hybrid indexing, it's worth noting that other vector databases offering similar types of indexing could also be considered based on project requirements and specific use cases.
59 |
60 | By adopting Pinecone or a similar vector database, developers can implement advanced retrieval systems that combine the strengths of traditional and vector-based search methods, leading to more nuanced and efficient data handling and retrieval solutions.
61 |
62 | ### How do we implement it?
63 | `main.py`
64 | ```python
65 | from pinecone import Pinecone
66 | from llama_index.vector_stores.pinecone import PineconeVectorStore
67 |
68 | pinecone_api_key = os.environ.get("PINECONE_API_KEY")
69 |
70 | @cl.cache
71 | def load_context():
72 | pc = Pinecone(api_key=pinecone_api_key)
73 | pinecone_index = pc.Index("pinecone-index")
74 | vector_store = PineconeVectorStore(
75 | pinecone_index=pinecone_index,
76 | )
77 |
78 | index = VectorStoreIndex.from_vector_store(
79 | vector_store=vector_store,
80 | )
81 | return index
82 | ```
83 | `ingest.py`
84 | ```python
85 | from pinecone import Pinecone, PodSpec
86 | from llama_index.vector_stores.pinecone import PineconeVectorStore
87 |
88 | pinecone_api_key = os.environ.get("PINECONE_API_KEY")
89 | pc = Pinecone(api_key=pinecone_api_key)
90 |
91 | def get_pinecone_index(pc, index_name):
92 | pinecone_index = pc.Index(index_name)
93 | return pinecone_index
94 |
95 |
96 | def get_pinecone_vector_store(pinecone_index):
97 | vector_store = PineconeVectorStore(
98 | pinecone_index=pinecone_index,
99 | add_sparse_vector=True,
100 | )
101 | return vector_store
102 | ```
103 | Use `--gen` flag to generate a pinecone pod if you haven't created one already.
104 | `ingest.py --gen`
105 | def create_pinecone_pod(pc, index_name):
106 | print("Creating pinecone pod")
107 | pc.create_index(
108 | name=index_name,
109 | dimension=3072,
110 | metric="dotproduct",
111 | spec=PodSpec(environment="gcp-starter"),
112 | )
113 |
114 | ## Hybrid Retriever
115 |
116 | ### What is an Hybrid Retriever?
117 |
118 | A hybrid retriever is a sophisticated tool used in information retrieval systems, combining the best features of both dense and sparse vector methods to enhance search results' accuracy and relevance. In the context of AI and data search, this means leveraging the strengths of both context-understanding capabilities (dense vectors) and keyword-matching skills (sparse vectors).
119 |
120 | Typically, dense vectors are excellent at grasping the overall context of a query but may miss out on important keyword-specific details. On the other hand, sparse vectors excel at identifying exact keyword matches but might lack in understanding the broader context. A hybrid retriever merges these approaches, providing a more balanced and effective retrieval mechanism.
121 |
122 | For instance, in the field of document retrieval, such as with academic papers or medical abstracts, a hybrid approach can be particularly beneficial. By combining the contextual understanding of dense vector models with the precision of sparse retrieval methods like BM25, a hybrid retrieval pipeline can significantly improve the relevance and accuracy of search results.
123 |
124 | In practical applications, hybrid retrievers involve creating and processing both sparse and dense vectors for documents and queries. This includes tokenization processes for sparse vectors and embedding generation for dense vectors, as well as the management of these vectors within a suitable database or search engine like Pinecone or Weaviate. The retrieval process then utilizes these vectors to deliver highly relevant search results, balancing the depth of context and specificity of keywords.
125 |
126 |
127 | ### How do we implement it?
128 | ```python
129 | @cl.on_chat_start
130 | async def start():
131 | # ...
132 | # What is important here is adding `vector_store_query_mode="hybrid"`
133 | # Is also really important to change what type of index you have, make sure
134 | # that you read the ingestion part of this README.
135 | query_engine = index.as_query_engine(
136 | streaming=True,
137 | similarity_top_k=4,
138 | vector_store_query_mode="hybrid", # Added line of code
139 | )
140 | # ...
141 | ```
142 | ## Advanced Ingestion
143 |
144 | ### What is advanced ingestion?
145 |
146 | Advanced ingestion involves specialized methods to optimize documents for better retrieval by large language models (LLMs). We use two main approaches:
147 |
148 | 1. **Unstructured**: Applied for all document types except PDFs, enhancing data extraction and structuring to improve LLM readability. Explore various connectors from Llama Index for optimal results. More details [here](https://github.com/Unstructured-IO/unstructured).
149 |
150 | 2. **Llama Parse**: Specifically for processing PDFs, transforming them into a more LLM-friendly format. Check it out [here](https://github.com/run-llama/llama_parse).
151 |
152 | 3. **Metadata Enhancement**: We're incorporating metadata into the documents for enriched context and searchability. You have the option to exclude them as needed. However, be mindful that each piece of metadata incurs a processing cost by the LLM due to the additional analysis required.
153 |
154 | ### How do we implement it?
155 |
156 | ```unstructured```
157 | ```python
158 | UnstructuredReader = download_loader("UnstructuredReader")
159 |
160 | file_extractor = {
161 | # ...
162 | ".html": UnstructuredReader(),
163 | ".txt": UnstructuredReader(),
164 | }
165 | director_reader = SimpleDirectoryReader(
166 | input_dir=input_dir, file_extractor=file_extractor
167 | )
168 | documents = director_reader.load_data(show_progress=True)
169 | ```
170 |
171 | `llama parse`
172 | ```python
173 | llama_parser = LlamaParse(api_key=llama_parse_api_key, result_type="markdown", verbose=True)
174 |
175 | file_extractor = {
176 | ".pdf": llama_parser,
177 | # ...
178 | }
179 | director_reader = SimpleDirectoryReader(
180 | input_dir=input_dir, file_extractor=file_extractor
181 | )
182 | documents = director_reader.load_data(show_progress=True)
183 | ```
184 |
185 | `metadata enhancement`
186 | ```python
187 | pipeline = IngestionPipeline(
188 | transformations=[
189 | SentenceSplitter(chunk_size=512, chunk_overlap=126),
190 | TitleExtractor(llm=llm, num_workers=num_workers),
191 | QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers),
192 | SummaryExtractor(summaries=["prev", "self"], llm=llm, num_workers=num_workers),
193 | KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers),
194 | OpenAIEmbedding(model=EMBEDDING)
195 | ],
196 | vector_store=vector_store,
197 | )
198 | pipeline.run(documents=documents, show_progress=True, num_workers=num_workers)
199 | ```
200 |
--------------------------------------------------------------------------------
/2.Pinecone - HybridRetriever - Adv.Ingestion/chainlit.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/2.Pinecone - HybridRetriever - Adv.Ingestion/chainlit.md
--------------------------------------------------------------------------------
/2.Pinecone - HybridRetriever - Adv.Ingestion/ingest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import argparse
4 |
5 | from dotenv import load_dotenv
6 | from pinecone import Pinecone, ServerlessSpec
7 |
8 | from llama_parse import LlamaParse
9 | from llama_index.core import SimpleDirectoryReader
10 | from llama_index.llms.openai import OpenAI
11 | from llama_index.core.ingestion import IngestionPipeline
12 | from llama_index.core.node_parser import SentenceSplitter
13 | from llama_index.embeddings.openai import OpenAIEmbedding
14 | from llama_index.vector_stores.pinecone import PineconeVectorStore
15 | from llama_index.core.extractors import (
16 | TitleExtractor,
17 | # QuestionsAnsweredExtractor,
18 | # SummaryExtractor,
19 | # KeywordExtractor,
20 | )
21 |
22 | load_dotenv()
23 | openai.api_key = os.environ.get("OPENAI_API_KEY")
24 | pinecone_api_key = os.environ.get("PINECONE_API_KEY")
25 | llama_parse_api_key = os.environ.get("LLAMA_PARSE_API_KEY")
26 |
27 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview")
28 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large")
29 |
30 |
31 | def get_pinecone_index(pc, index_name):
32 | pinecone_index = pc.Index(index_name)
33 | return pinecone_index
34 |
35 |
36 | def get_pinecone_vector_store(pinecone_index):
37 | vector_store = PineconeVectorStore(
38 | pinecone_index=pinecone_index,
39 | add_sparse_vector=True,
40 | )
41 | return vector_store
42 |
43 |
44 | def create_pinecone_serverless_index(pc, index_name):
45 | print("Creating pinecone serverless index")
46 | pc.create_index(
47 | name=index_name,
48 | dimension=3072,
49 | metric="dotproduct",
50 | spec=ServerlessSpec(cloud="aws", region="us-east-1"),
51 | )
52 |
53 |
54 | def get_documents(input_dir):
55 | llama_parser = LlamaParse(
56 | api_key=llama_parse_api_key, result_type="markdown", verbose=True
57 | )
58 |
59 | file_extractor = {
60 | ".pdf": llama_parser,
61 | }
62 | print("Reading directory")
63 | director_reader = SimpleDirectoryReader(
64 | input_dir=input_dir, file_extractor=file_extractor
65 | )
66 | print("Starting document reading")
67 | documents = director_reader.load_data(show_progress=True)
68 | return documents
69 |
70 |
71 | def run_pipeline(documents, vector_store, llm, num_workers):
72 | pipeline = IngestionPipeline(
73 | transformations=[
74 | SentenceSplitter(chunk_size=512, chunk_overlap=126),
75 | TitleExtractor(llm=llm, num_workers=num_workers),
76 | # QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers),
77 | # SummaryExtractor(
78 | # summaries=["prev", "self"], llm=llm, num_workers=num_workers
79 | # ),
80 | # KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers),
81 | OpenAIEmbedding(model=EMBEDDING),
82 | ],
83 | vector_store=vector_store,
84 | )
85 | pipeline.run(documents=documents, show_progress=True, num_workers=num_workers)
86 |
87 |
88 | def main():
89 | input_dir = "./data/"
90 | index_name = "rag-index"
91 | num_cores = os.cpu_count()
92 | num_workers = min(4, num_cores)
93 | pc = Pinecone(api_key=pinecone_api_key)
94 | parser = argparse.ArgumentParser(description="Process some integers.")
95 | parser.add_argument(
96 | "--gen",
97 | action="store_true",
98 | help="Generate new pinecone index",
99 | )
100 | args = parser.parse_args()
101 | if args.gen:
102 | create_pinecone_serverless_index(pc, index_name)
103 | llm = OpenAI(temperature=0.1, model=MODEL, max_tokens=1024)
104 | pinecone_index = get_pinecone_index(pc, index_name)
105 | vector_store = get_pinecone_vector_store(pinecone_index)
106 | documents = get_documents(input_dir)
107 | print("Starting ingestion pipeline")
108 | run_pipeline(documents, vector_store, llm, num_workers)
109 |
110 |
111 | if __name__ == "__main__":
112 | print("Starting ingestion")
113 | main()
114 |
--------------------------------------------------------------------------------
/2.Pinecone - HybridRetriever - Adv.Ingestion/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import chainlit as cl
4 |
5 | from pinecone import Pinecone
6 | from llama_index.core import Settings, VectorStoreIndex
7 | from llama_index.llms.openai import OpenAI
8 | from llama_index.embeddings.openai import OpenAIEmbedding
9 | from llama_index.vector_stores.pinecone import PineconeVectorStore
10 |
11 | openai.api_key = os.environ.get("OPENAI_API_KEY")
12 | pinecone_api_key = os.environ.get("PINECONE_API_KEY")
13 |
14 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview")
15 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large")
16 |
17 |
18 | @cl.cache
19 | def load_context():
20 | Settings.llm = OpenAI(temperature=0.1, model=MODEL, streaming=True)
21 | Settings.embed_model = OpenAIEmbedding(model=EMBEDDING, embed_batch_size=1)
22 | Settings.num_output = 1024
23 | Settings.context_window = 128000
24 | pc = Pinecone(api_key=pinecone_api_key)
25 | pinecone_index = pc.Index("rag-index")
26 | vector_store = PineconeVectorStore(
27 | pinecone_index=pinecone_index,
28 | )
29 |
30 | index = VectorStoreIndex.from_vector_store(
31 | vector_store=vector_store,
32 | )
33 | return index
34 |
35 |
36 | @cl.on_chat_start
37 | async def start():
38 | index = load_context()
39 |
40 | query_engine = index.as_query_engine(
41 | streaming=True,
42 | similarity_top_k=4,
43 | vector_store_query_mode="hybrid",
44 | )
45 | cl.user_session.set("query_engine", query_engine)
46 |
47 | message_history = []
48 | cl.user_session.set("message_history", message_history)
49 |
50 | await cl.Message(
51 | author="Assistant", content="Hello! Im an AI assistant. How may I help you?"
52 | ).send()
53 |
54 |
55 | async def set_sources(response, response_message):
56 | label_list = []
57 | count = 1
58 | for sr in response.source_nodes:
59 | elements = [
60 | cl.Text(
61 | name="S" + str(count),
62 | content=f"{sr.node.text}",
63 | display="side",
64 | size="small",
65 | )
66 | ]
67 | response_message.elements = elements
68 | label_list.append("S" + str(count))
69 | await response_message.update()
70 | count += 1
71 | response_message.content += "\n\nSources: " + ", ".join(label_list)
72 | await response_message.update()
73 |
74 |
75 | @cl.on_message
76 | async def main(message: cl.Message):
77 | query_engine = cl.user_session.get("query_engine")
78 | message_history = cl.user_session.get("message_history")
79 | prompt_template = "Previous messages:\n"
80 |
81 | response_message = cl.Message(content="", author="Assistant")
82 |
83 | user_message = message.content
84 |
85 | for message in message_history:
86 | prompt_template += f"{message['author']}: {message['content']}\n"
87 | prompt_template += f"Human: {user_message}"
88 |
89 | response = await cl.make_async(query_engine.query)(prompt_template)
90 |
91 | for token in response.response_gen:
92 | await response_message.stream_token(token)
93 | if response.response_txt:
94 | response_message.content = response.response_txt
95 | await response_message.send()
96 |
97 | message_history.append({"author": "Human", "content": user_message})
98 | message_history.append({"author": "AI", "content": response_message.content})
99 | message_history = message_history[-4:]
100 | cl.user_session.set("message_history", message_history)
101 |
102 | if response.source_nodes:
103 | await set_sources(response, response_message)
104 |
--------------------------------------------------------------------------------
/2.Pinecone - HybridRetriever - Adv.Ingestion/requirements.txt:
--------------------------------------------------------------------------------
1 | aiofiles==23.2.1
2 | aiohttp==3.9.5
3 | aiosignal==1.3.1
4 | annotated-types==0.6.0
5 | anyio==3.7.1
6 | asyncer==0.0.2
7 | attrs==23.2.0
8 | backoff==2.2.1
9 | beautifulsoup4==4.12.3
10 | bidict==0.23.1
11 | certifi==2024.2.2
12 | chainlit==1.0.505
13 | chardet==5.2.0
14 | charset-normalizer==3.3.2
15 | chevron==0.14.0
16 | click==8.1.7
17 | dataclasses-json==0.5.14
18 | dataclasses-json-speakeasy==0.5.11
19 | Deprecated==1.2.14
20 | dirtyjson==1.0.8
21 | distro==1.9.0
22 | emoji==2.11.1
23 | fastapi==0.110.2
24 | fastapi-socketio==0.0.10
25 | filelock==3.13.4
26 | filetype==1.2.0
27 | frozenlist==1.4.1
28 | fsspec==2024.3.1
29 | googleapis-common-protos==1.63.0
30 | greenlet==3.0.3
31 | grpcio==1.62.2
32 | h11==0.14.0
33 | html2text==2024.2.26
34 | httpcore==1.0.5
35 | httpx==0.27.0
36 | huggingface-hub==0.22.2
37 | idna==3.7
38 | importlib-metadata==7.0.0
39 | joblib==1.4.0
40 | jsonpath-python==1.0.6
41 | langdetect==1.0.9
42 | Lazify==0.4.0
43 | literalai==0.0.507
44 | llama-hub==0.0.79.post1
45 | llama-index==0.10.32
46 | llama-index-agent-openai==0.2.3
47 | llama-index-cli==0.1.12
48 | llama-index-core==0.10.32
49 | llama-index-embeddings-openai==0.1.9
50 | llama-index-indices-managed-llama-cloud==0.1.6
51 | llama-index-legacy==0.9.48
52 | llama-index-llms-openai==0.1.16
53 | llama-index-multi-modal-llms-openai==0.1.5
54 | llama-index-program-openai==0.1.6
55 | llama-index-question-gen-openai==0.1.3
56 | llama-index-readers-file==0.1.19
57 | llama-index-readers-llama-parse==0.1.4
58 | llama-index-vector-stores-pinecone==0.1.6
59 | llama-parse==0.4.2
60 | llamaindex-py-client==0.1.19
61 | lxml==5.2.1
62 | marshmallow==3.21.1
63 | multidict==6.0.5
64 | mypy-extensions==1.0.0
65 | nest-asyncio==1.6.0
66 | networkx==3.3
67 | nltk==3.8.1
68 | numpy==1.26.4
69 | openai==1.23.6
70 | opentelemetry-api==1.24.0
71 | opentelemetry-exporter-otlp==1.24.0
72 | opentelemetry-exporter-otlp-proto-common==1.24.0
73 | opentelemetry-exporter-otlp-proto-grpc==1.24.0
74 | opentelemetry-exporter-otlp-proto-http==1.24.0
75 | opentelemetry-instrumentation==0.45b0
76 | opentelemetry-proto==1.24.0
77 | opentelemetry-sdk==1.24.0
78 | opentelemetry-semantic-conventions==0.45b0
79 | packaging==23.2
80 | pandas==2.2.2
81 | pillow==10.3.0
82 | pinecone-client==3.2.2
83 | protobuf==4.25.3
84 | psutil==5.9.8
85 | pyaml==23.12.0
86 | pydantic==2.7.1
87 | pydantic_core==2.18.2
88 | PyJWT==2.8.0
89 | pypdf==4.2.0
90 | python-dateutil==2.9.0.post0
91 | python-dotenv==1.0.1
92 | python-engineio==4.9.0
93 | python-graphql-client==0.4.3
94 | python-iso639==2024.2.7
95 | python-magic==0.4.27
96 | python-multipart==0.0.9
97 | python-socketio==5.11.2
98 | pytz==2024.1
99 | PyYAML==6.0.1
100 | rapidfuzz==3.8.1
101 | regex==2024.4.16
102 | requests==2.31.0
103 | retrying==1.3.4
104 | safetensors==0.4.3
105 | simple-websocket==1.0.0
106 | six==1.16.0
107 | sniffio==1.3.1
108 | soupsieve==2.5
109 | SQLAlchemy==2.0.29
110 | starlette==0.37.2
111 | striprtf==0.0.26
112 | syncer==2.0.3
113 | tabulate==0.9.0
114 | tenacity==8.2.3
115 | tiktoken==0.6.0
116 | tokenizer==3.4.3
117 | tokenizers==0.19.1
118 | tomli==2.0.1
119 | tqdm==4.66.2
120 | transformers==4.40.1
121 | typing-inspect==0.9.0
122 | typing_extensions==4.11.0
123 | tzdata==2024.1
124 | unstructured==0.13.4
125 | unstructured-client==0.18.0
126 | uptrace==1.24.0
127 | urllib3==2.2.1
128 | uvicorn==0.25.0
129 | watchfiles==0.20.0
130 | websockets==12.0
131 | wrapt==1.16.0
132 | wsproto==1.2.0
133 | yarl==1.9.4
134 | zipp==3.18.1
135 |
--------------------------------------------------------------------------------
/3.Reranker - Q.Transformation - Res.Synthesis/.chainlit/config.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | # Whether to enable telemetry (default: true). No personal data is collected.
3 | enable_telemetry = true
4 |
5 | # List of environment variables to be provided by each user to use the app.
6 | user_env = []
7 |
8 | # Duration (in seconds) during which the session is saved when the connection is lost
9 | session_timeout = 3600
10 |
11 | # Enable third parties caching (e.g LangChain cache)
12 | cache = false
13 |
14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15 | # follow_symlink = false
16 |
17 | [features]
18 | # Show the prompt playground
19 | prompt_playground = true
20 |
21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22 | unsafe_allow_html = false
23 |
24 | # Process and display mathematical expressions. This can clash with "$" characters in messages.
25 | latex = false
26 |
27 | # Authorize users to upload files with messages
28 | multi_modal = false
29 |
30 | # Allows user to use speech to text
31 | [features.speech_to_text]
32 | enabled = false
33 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34 | # language = "en-US"
35 |
36 | [UI]
37 | # Name of the app and chatbot.
38 | name = "Chatbot"
39 |
40 | # Show the readme while the thread is empty.
41 | show_readme_as_default = false
42 |
43 | # Description of the app and chatbot. This is used for HTML tags.
44 | # description = ""
45 |
46 | # Large size content are by default collapsed for a cleaner ui
47 | default_collapse_content = true
48 |
49 | # The default value for the expand messages settings.
50 | default_expand_messages = false
51 |
52 | # Hide the chain of thought details from the user in the UI.
53 | hide_cot = false
54 |
55 | # Link to your github repo. This will add a github button in the UI's header.
56 | github = "https://github.com/felipearosr/GPT-Documents"
57 |
58 | # Specify a CSS file that can be used to customize the user interface.
59 | # The CSS file can be served from the public directory or via an external link.
60 | # custom_css = "/public/test.css"
61 |
62 | # Override default MUI light theme. (Check theme.ts)
63 | [UI.theme.light]
64 | #background = "#FAFAFA"
65 | #paper = "#FFFFFF"
66 |
67 | [UI.theme.light.primary]
68 | #main = "#F80061"
69 | #dark = "#980039"
70 | #light = "#FFE7EB"
71 |
72 | # Override default MUI dark theme. (Check theme.ts)
73 | [UI.theme.dark]
74 | #background = "#FAFAFA"
75 | #paper = "#FFFFFF"
76 |
77 | [UI.theme.dark.primary]
78 | #main = "#F80061"
79 | #dark = "#980039"
80 | #light = "#FFE7EB"
81 |
82 |
83 | [meta]
84 | generated_by = "1.0.101"
85 |
--------------------------------------------------------------------------------
/3.Reranker - Q.Transformation - Res.Synthesis/.chainlit/translations/en-US.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Settings",
8 | "settingsKey": "S",
9 | "APIKeys": "API Keys",
10 | "logout": "Logout"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "New Chat"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Task List",
22 | "loading": "Loading...",
23 | "error": "An error occured"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Cancel upload",
28 | "removeAttachment": "Remove attachment"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Create new chat?",
32 | "clearChat": "This will clear the current messages and start a new chat.",
33 | "cancel": "Cancel",
34 | "confirm": "Confirm"
35 | },
36 | "settingsModal": {
37 | "expandMessages": "Expand Messages",
38 | "hideChainOfThought": "Hide Chain of Thought",
39 | "darkMode": "Dark Mode"
40 | }
41 | },
42 | "organisms": {
43 | "chat": {
44 | "history": {
45 | "index": {
46 | "lastInputs": "Last Inputs",
47 | "noInputs": "Such empty...",
48 | "loading": "Loading..."
49 | }
50 | },
51 | "inputBox": {
52 | "input": {
53 | "placeholder": "Type your message here..."
54 | },
55 | "speechButton": {
56 | "start": "Start recording",
57 | "stop": "Stop recording"
58 | },
59 | "SubmitButton": {
60 | "sendMessage": "Send message",
61 | "stopTask": "Stop Task"
62 | },
63 | "UploadButton": {
64 | "attachFiles": "Attach files"
65 | },
66 | "waterMark": {
67 | "text": "Built with"
68 | }
69 | },
70 | "Messages": {
71 | "index": {
72 | "running": "Running",
73 | "executedSuccessfully": "executed successfully",
74 | "failed": "failed",
75 | "feedbackUpdated": "Feedback updated",
76 | "updating": "Updating"
77 | }
78 | },
79 | "dropScreen": {
80 | "dropYourFilesHere": "Drop your files here"
81 | },
82 | "index": {
83 | "failedToUpload": "Failed to upload",
84 | "cancelledUploadOf": "Cancelled upload of",
85 | "couldNotReachServer": "Could not reach the server",
86 | "continuingChat": "Continuing previous chat"
87 | },
88 | "settings": {
89 | "settingsPanel": "Settings panel",
90 | "reset": "Reset",
91 | "cancel": "Cancel",
92 | "confirm": "Confirm"
93 | }
94 | },
95 | "threadHistory": {
96 | "sidebar": {
97 | "filters": {
98 | "FeedbackSelect": {
99 | "feedbackAll": "Feedback: All",
100 | "feedbackPositive": "Feedback: Positive",
101 | "feedbackNegative": "Feedback: Negative"
102 | },
103 | "SearchBar": {
104 | "search": "Search"
105 | }
106 | },
107 | "DeleteThreadButton": {
108 | "confirmMessage": "This will delete the thread as well as it's messages and elements.",
109 | "cancel": "Cancel",
110 | "confirm": "Confirm",
111 | "deletingChat": "Deleting chat",
112 | "chatDeleted": "Chat deleted"
113 | },
114 | "index": {
115 | "pastChats": "Past Chats"
116 | },
117 | "ThreadList": {
118 | "empty": "Empty..."
119 | },
120 | "TriggerButton": {
121 | "closeSidebar": "Close sidebar",
122 | "openSidebar": "Open sidebar"
123 | }
124 | },
125 | "Thread": {
126 | "backToChat": "Go back to chat",
127 | "chatCreatedOn": "This chat was created on"
128 | }
129 | },
130 | "header": {
131 | "chat": "Chat",
132 | "readme": "Readme"
133 | }
134 | }
135 | },
136 | "hooks": {
137 | "useLLMProviders": {
138 | "failedToFetchProviders": "Failed to fetch providers:"
139 | }
140 | },
141 | "pages": {
142 | "Design": {},
143 | "Env": {
144 | "savedSuccessfully": "Saved successfully",
145 | "requiredApiKeys": "Required API Keys",
146 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
147 | },
148 | "Page": {
149 | "notPartOfProject": "You are not part of this project."
150 | },
151 | "ResumeButton": {
152 | "resumeChat": "Resume Chat"
153 | }
154 | }
155 | }
--------------------------------------------------------------------------------
/3.Reranker - Q.Transformation - Res.Synthesis/.chainlit/translations/pt-BR.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Configura\u00e7\u00f5es",
8 | "settingsKey": "S",
9 | "APIKeys": "Chaves de API",
10 | "logout": "Sair"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "Nova Conversa"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas",
22 | "loading": "Carregando...",
23 | "error": "Ocorreu um erro"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Cancelar envio",
28 | "removeAttachment": "Remover anexo"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Criar novo chat?",
32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.",
33 | "cancel": "Cancelar",
34 | "confirm": "Confirmar"
35 | },
36 | "settingsModal": {
37 | "expandMessages": "Expandir Mensagens",
38 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento",
39 | "darkMode": "Modo Escuro"
40 | }
41 | },
42 | "organisms": {
43 | "chat": {
44 | "history": {
45 | "index": {
46 | "lastInputs": "\u00daltimas Entradas",
47 | "noInputs": "Vazio...",
48 | "loading": "Carregando..."
49 | }
50 | },
51 | "inputBox": {
52 | "input": {
53 | "placeholder": "Digite sua mensagem aqui..."
54 | },
55 | "speechButton": {
56 | "start": "Iniciar grava\u00e7\u00e3o",
57 | "stop": "Parar grava\u00e7\u00e3o"
58 | },
59 | "SubmitButton": {
60 | "sendMessage": "Enviar mensagem",
61 | "stopTask": "Parar Tarefa"
62 | },
63 | "UploadButton": {
64 | "attachFiles": "Anexar arquivos"
65 | },
66 | "waterMark": {
67 | "text": "Constru\u00eddo com"
68 | }
69 | },
70 | "Messages": {
71 | "index": {
72 | "running": "Executando",
73 | "executedSuccessfully": "executado com sucesso",
74 | "failed": "falhou",
75 | "feedbackUpdated": "Feedback atualizado",
76 | "updating": "Atualizando"
77 | }
78 | },
79 | "dropScreen": {
80 | "dropYourFilesHere": "Solte seus arquivos aqui"
81 | },
82 | "index": {
83 | "failedToUpload": "Falha ao enviar",
84 | "cancelledUploadOf": "Envio cancelado de",
85 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor",
86 | "continuingChat": "Continuando o chat anterior"
87 | },
88 | "settings": {
89 | "settingsPanel": "Painel de Configura\u00e7\u00f5es",
90 | "reset": "Redefinir",
91 | "cancel": "Cancelar",
92 | "confirm": "Confirmar"
93 | }
94 | },
95 | "threadHistory": {
96 | "sidebar": {
97 | "filters": {
98 | "FeedbackSelect": {
99 | "feedbackAll": "Feedback: Todos",
100 | "feedbackPositive": "Feedback: Positivo",
101 | "feedbackNegative": "Feedback: Negativo"
102 | },
103 | "SearchBar": {
104 | "search": "Buscar"
105 | }
106 | },
107 | "DeleteThreadButton": {
108 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.",
109 | "cancel": "Cancelar",
110 | "confirm": "Confirmar",
111 | "deletingChat": "Deletando conversa",
112 | "chatDeleted": "Conversa deletada"
113 | },
114 | "index": {
115 | "pastChats": "Conversas Anteriores"
116 | },
117 | "ThreadList": {
118 | "empty": "Vazio..."
119 | },
120 | "TriggerButton": {
121 | "closeSidebar": "Fechar barra lateral",
122 | "openSidebar": "Abrir barra lateral"
123 | }
124 | },
125 | "Thread": {
126 | "backToChat": "Voltar para a conversa",
127 | "chatCreatedOn": "Esta conversa foi criada em"
128 | }
129 | },
130 | "header": {
131 | "chat": "Conversa",
132 | "readme": "Leia-me"
133 | }
134 | },
135 | "hooks": {
136 | "useLLMProviders": {
137 | "failedToFetchProviders": "Falha ao buscar provedores:"
138 | }
139 | },
140 | "pages": {
141 | "Design": {},
142 | "Env": {
143 | "savedSuccessfully": "Salvo com sucesso",
144 | "requiredApiKeys": "Chaves de API necess\u00e1rias",
145 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo."
146 | },
147 | "Page": {
148 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto."
149 | },
150 | "ResumeButton": {
151 | "resumeChat": "Continuar Conversa"
152 | }
153 | }
154 | }
155 | }
--------------------------------------------------------------------------------
/3.Reranker - Q.Transformation - Res.Synthesis/.env.example:
--------------------------------------------------------------------------------
1 | # Mandatory
2 | OPENAI_API_KEY="sk-..."
3 | COHERE_API_KEY="..." # for the reranker
4 | PINECONE_API_KEY="..." # for vector database
5 | LLAMA_PARSE_API_KEY="llx-..." # for pdf ingestion
6 |
7 | # Optional
8 | MODEL="gpt-4-0125-preview"
9 | EMBEDDING="text-embedding-3-large"
--------------------------------------------------------------------------------
/3.Reranker - Q.Transformation - Res.Synthesis/README.md:
--------------------------------------------------------------------------------
1 | # Adding reranker, query transformations and response synthesis.
2 |
3 | This repository leverages the synergy between Cohere reranker and a hybrid retriever to merge the strengths of keyword and vector-based searches with sophisticated semantic reranking. This innovative approach guarantees not only the retrieval of a wide array of pertinent documents but also organizes them in a manner that aligns seamlessly with the user's intentions.
4 |
5 | To enhance query processing, we implement two additional methods:
6 |
7 | 1. **Multi-Step Transformations**: This method deconstructs intricate queries into simpler, more manageable subquestions, each of which is then executed against the database. The responses obtained from these subquestions guide the construction and execution of follow-up queries, ensuring a comprehensive and detailed exploration of the user's original inquiry.
8 |
9 | 2. **Refine**: This approach methodically processes each piece of retrieved text, making individual calls to the Large Language Model (LLM) for each text chunk. This sequential refinement enables a progressive enhancement of the answer, ensuring that each chunk contributes to a more complete and accurate response.
10 |
11 | By incorporating these methods, the repository not only improves the precision and relevance of search results but also ensures a deeper, more nuanced understanding and response to complex queries, enhancing overall search performance and user experience.
12 |
13 | 
14 |
15 | ## Table of Contents
16 |
17 | 1. [Installation Instructions](#installation-instructions)
18 | 2. [Usage](#usage)
19 | 3. [Reranker](#reranker)
20 | 4. [Query Transformations](#query-transformations)
21 | 5. [Response Synthesis](#response-synthesis)
22 |
23 |
24 | ## Installation Instructions
25 |
26 | Follow these steps to set up the GPT Documents chatbot on your local machine:
27 |
28 | 1. Create a conda environment:
29 |
30 | ```shell
31 | conda create -n rag python==3.11 -y && source activate rag
32 | ```
33 |
34 | 2. Install the required dependencies:
35 |
36 | ```shell
37 | pip install -r requirements.txt
38 | ```
39 |
40 | 3. Load your documents into the vector store by:
41 | - Create a folder named 'data'.
42 | - Place your documents inside the 'data' folder.
43 | - Execute the 'ingest.py' script to initiate the loading process.
44 |
45 | ## Usage
46 |
47 | Once the setup is complete, launch the chainlit app using the following command:
48 |
49 | ```shell
50 | chainlit run -w main.py
51 | ```
52 |
53 | Feel free to explore the functionalities and contribute to the development of this project. Your feedback and contributions are highly appreciated!
54 |
55 | ## Reranker
56 |
57 | ### What is a reranker and why do we use it?
58 |
59 | The Cohere reranker is a tool that enhances search quality by semantically reranking documents to align more closely with user queries, without needing major changes to existing systems. It's easy to implement and can be customized for specific needs. When combined with a hybrid retriever, which merges keyword and vector search benefits, the Cohere reranker ensures documents are not just relevant but also correctly prioritized according to the query's semantic intent, thus boosting search accuracy and user satisfaction.
60 |
61 | ### How do we implement it?
62 |
63 | ```python
64 | from llama_index.postprocessor.cohere_rerank import CohereRerank
65 |
66 | # make sure you add your cohere key to your .env file
67 | cohere_api_key = os.environ.get("COHERE_API_KEY")
68 |
69 | @cl.on_chat_start
70 | async def start():
71 | # ...
72 | reranker = CohereRerank(api_key=cohere_api_key, top_n=3)
73 |
74 | query_engine = index.as_query_engine(
75 | streaming=True,
76 | similarity_top_k=6,
77 | node_postprocessors=[reranker], # add this line
78 | vector_store_query_mode="hybrid",
79 | query_transform=step_decompose_transform,
80 | response_synthesizer_mode=ResponseMode.REFINE,
81 | )
82 | # ...
83 | ```
84 |
85 | ## Query Transformations
86 |
87 | ### What are query transformations?
88 |
89 | LlamaIndex facilitates query transformations, allowing the conversion of a query into a different form for improved processing. These transformations can be single-step, where the transformation occurs once before execution, or multi-step, involving sequential transformation and execution phases with responses influencing subsequent queries.
90 |
91 | Use Cases:
92 |
93 | - **HyDE**: This technique generates a hypothetical answer document from a natural language query for more effective embedding lookup.
94 | - **Multi-Step Transformations**: Involves breaking down a complex query into smaller, manageable subquestions, executing each against the index, and using the responses to inform follow-up questions.
95 |
96 | In this case we implement the multi-step transformation, feel free to play around with other techniques.
97 |
98 | ### How do we implement it?
99 |
100 | ```python
101 | from llama_index.core.indices.query.query_transform.base import StepDecomposeQueryTransform
102 |
103 | @cl.on_chat_start
104 | async def start():
105 | step_decompose_transform = StepDecomposeQueryTransform(llm=MODEL, verbose=True)
106 |
107 | query_engine = index.as_query_engine(
108 | streaming=True,
109 | similarity_top_k=6,
110 | node_postprocessors=[reranker],
111 | vector_store_query_mode="hybrid",
112 | query_transform=step_decompose_transform, # add this line
113 | response_synthesizer_mode=ResponseMode.REFINE,
114 | )
115 | ```
116 |
117 | ## Response Synthesis
118 |
119 | ### What are the different response modes?
120 |
121 | The system supports various response modes for processing and refining answers from retrieved text chunks:
122 |
123 | 1. **Refine**: Processes each retrieved text chunk sequentially, making separate LLM calls for each, refining the answer progressively with each chunk.
124 |
125 | 2. **Compact (default)**: Similar to refine, but compacts all chunks before processing, reducing the number of LLM calls needed by concatenating chunks to fit within context windows.
126 |
127 | 3. **Tree Summarize**: Uses a summary template for all chunks and recursively condenses responses into a single final answer, ideal for summarization with multiple rounds of LLM queries.
128 |
129 | 4. **Simple Summarize**: Truncates text chunks to fit a single LLM prompt for a quick summary, potentially losing details due to truncation.
130 |
131 | 5. **No Text**: Fetches nodes without sending them to the LLM, allowing for manual inspection of retrieved chunks.
132 |
133 | 6. **Accumulate**: Applies the query to each text chunk separately, accumulating responses into an array, useful for separate detailed queries.
134 |
135 | 7. **Compact Accumulate**: A combination of compact and accumulate, compacting prompts before applying the same query to each, for efficiency with detail.
136 |
137 | Each mode is designed for different levels of detail and summarization needs.
138 |
139 | For more information visit this [link](https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/response_modes.html).
140 |
141 | ### How do we implement it?
142 |
143 | ```python
144 | from llama_index.core.response_synthesizers import ResponseMode
145 |
146 | @cl.on_chat_start
147 | async def start():
148 | query_engine = index.as_query_engine(
149 | streaming=True,
150 | similarity_top_k=6,
151 | node_postprocessors=[reranker],
152 | vector_store_query_mode="hybrid",
153 | query_transform=step_decompose_transform,
154 | response_synthesizer_mode=ResponseMode.REFINE, # add this line
155 | )
156 | ```
157 |
--------------------------------------------------------------------------------
/3.Reranker - Q.Transformation - Res.Synthesis/chainlit.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/3.Reranker - Q.Transformation - Res.Synthesis/chainlit.md
--------------------------------------------------------------------------------
/3.Reranker - Q.Transformation - Res.Synthesis/ingest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import asyncio
4 | import argparse
5 |
6 | from dotenv import load_dotenv
7 | from pinecone import Pinecone, PodSpec
8 |
9 | from llama_index.core import SimpleDirectoryReader
10 | from llama_index.llms.openai import OpenAI
11 | from llama_index.readers.file import UnstructuredReader
12 | from llama_index.core.ingestion import IngestionPipeline
13 | from llama_index.core.node_parser import SentenceSplitter
14 | from llama_index.embeddings.openai import OpenAIEmbedding
15 | from llama_index.vector_stores.pinecone import PineconeVectorStore
16 | from llama_index.core.extractors import (
17 | TitleExtractor,
18 | # QuestionsAnsweredExtractor,
19 | # SummaryExtractor,
20 | # KeywordExtractor,
21 | )
22 | from llama_parse import LlamaParse
23 |
24 | load_dotenv()
25 | openai.api_key = os.environ.get("OPENAI_API_KEY")
26 | pinecone_api_key = os.environ.get("PINECONE_API_KEY")
27 | llama_parse_api_key = os.environ.get("LLAMA_PARSE_API_KEY")
28 |
29 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview")
30 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large")
31 |
32 |
33 | def get_pinecone_index(pc, index_name):
34 | pinecone_index = pc.Index(index_name)
35 | return pinecone_index
36 |
37 |
38 | def get_pinecone_vector_store(pinecone_index):
39 | vector_store = PineconeVectorStore(
40 | pinecone_index=pinecone_index,
41 | add_sparse_vector=True,
42 | )
43 | return vector_store
44 |
45 |
46 | def create_pinecone_pod(pc, index_name):
47 | print("Creating pinecone pod")
48 | pc.create_index(
49 | name=index_name,
50 | dimension=3072,
51 | metric="dotproduct",
52 | spec=PodSpec(environment="gcp-starter"),
53 | )
54 |
55 |
56 | def get_documents(input_dir):
57 | llama_parser = LlamaParse(
58 | api_key=llama_parse_api_key, result_type="markdown", verbose=True
59 | )
60 |
61 | file_extractor = {
62 | ".pdf": llama_parser,
63 | ".html": UnstructuredReader(),
64 | ".txt": UnstructuredReader(),
65 | }
66 | print("Reading directory")
67 | director_reader = SimpleDirectoryReader(
68 | input_dir=input_dir, file_extractor=file_extractor
69 | )
70 | print("Starting document reading")
71 | documents = director_reader.load_data(show_progress=True)
72 | return documents
73 |
74 |
75 | def run_pipeline(documents, vector_store, llm, num_workers):
76 | pipeline = IngestionPipeline(
77 | transformations=[
78 | SentenceSplitter(chunk_size=512, chunk_overlap=126),
79 | TitleExtractor(llm=llm, num_workers=num_workers),
80 | # QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers),
81 | # SummaryExtractor(
82 | # summaries=["prev", "self"], llm=llm, num_workers=num_workers
83 | # ),
84 | # KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers),
85 | OpenAIEmbedding(model=EMBEDDING),
86 | ],
87 | vector_store=vector_store,
88 | )
89 | for doc in documents: # Small patch to remove last_accessed_date from metadata
90 | k = vars(doc)
91 | del k["metadata"]["last_accessed_date"]
92 | pipeline.run(documents=documents, show_progress=True, num_workers=num_workers)
93 |
94 |
95 | async def main():
96 | print("Starting ingestion")
97 | input_dir = "./data/source_files/"
98 | index_name = "rag-index"
99 | num_cores = os.cpu_count()
100 | num_workers = min(4, num_cores)
101 | pc = Pinecone(api_key=pinecone_api_key)
102 | parser = argparse.ArgumentParser(description="Process some integers.")
103 | parser.add_argument(
104 | "--gen",
105 | action="store_true",
106 | help="Generate new pinecone index",
107 | )
108 | args = parser.parse_args()
109 | if args.gen:
110 | create_pinecone_pod(pc, index_name)
111 | llm = OpenAI(temperature=0.1, model=MODEL, max_tokens=1024)
112 | pinecone_index = get_pinecone_index(pc, index_name)
113 | vector_store = get_pinecone_vector_store(pinecone_index)
114 | documents = get_documents(input_dir)
115 | print("Starting ingestion pipeline")
116 | run_pipeline(documents, vector_store, llm, num_workers)
117 |
118 |
119 | if __name__ == "__main__":
120 | asyncio.run(main())
121 |
--------------------------------------------------------------------------------
/3.Reranker - Q.Transformation - Res.Synthesis/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import chainlit as cl
4 |
5 | from pinecone import Pinecone
6 | from llama_index.core import Settings, VectorStoreIndex
7 | from llama_index.llms.openai import OpenAI
8 | from llama_index.embeddings.openai import OpenAIEmbedding
9 | from llama_index.vector_stores.pinecone import PineconeVectorStore
10 | from llama_index.core.response_synthesizers import ResponseMode
11 | from llama_index.postprocessor.cohere_rerank import CohereRerank
12 | from llama_index.core.indices.query.query_transform.base import (
13 | StepDecomposeQueryTransform,
14 | )
15 |
16 | openai.api_key = os.environ.get("OPENAI_API_KEY")
17 | cohere_api_key = os.environ.get("COHERE_API_KEY")
18 | pinecone_api_key = os.environ.get("PINECONE_API_KEY")
19 |
20 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview")
21 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large")
22 |
23 | print(MODEL)
24 |
25 |
26 | @cl.cache
27 | def load_context():
28 | Settings.llm = OpenAI(temperature=0.1, model=MODEL, streaming=True)
29 | Settings.embed_model = OpenAIEmbedding(model=EMBEDDING, embed_batch_size=1)
30 | Settings.num_output = 1024
31 | Settings.context_window = 128000
32 | pc = Pinecone(api_key=pinecone_api_key)
33 | pinecone_index = pc.Index("rag-index")
34 | vector_store = PineconeVectorStore(
35 | pinecone_index=pinecone_index,
36 | )
37 |
38 | index = VectorStoreIndex.from_vector_store(
39 | vector_store=vector_store,
40 | )
41 | return index
42 |
43 |
44 | @cl.on_chat_start
45 | async def start():
46 | index = load_context()
47 |
48 | reranker = CohereRerank(api_key=cohere_api_key, top_n=3)
49 | step_decompose_transform = StepDecomposeQueryTransform(llm=MODEL, verbose=True)
50 |
51 | query_engine = index.as_query_engine(
52 | streaming=True,
53 | similarity_top_k=6,
54 | node_postprocessors=[reranker],
55 | vector_store_query_mode="hybrid",
56 | query_transform=step_decompose_transform,
57 | response_synthesizer_mode=ResponseMode.REFINE,
58 | )
59 | cl.user_session.set("query_engine", query_engine)
60 |
61 | message_history = []
62 | cl.user_session.set("message_history", message_history)
63 |
64 | await cl.Message(
65 | author="Assistant", content="Hello! Im an AI assistant. How may I help you?"
66 | ).send()
67 |
68 |
69 | async def set_sources(response, response_message):
70 | label_list = []
71 | count = 1
72 | for sr in response.source_nodes:
73 | elements = [
74 | cl.Text(
75 | name="S" + str(count),
76 | content=f"{sr.node.text}",
77 | display="side",
78 | size="small",
79 | )
80 | ]
81 | response_message.elements = elements
82 | label_list.append("S" + str(count))
83 | await response_message.update()
84 | count += 1
85 | response_message.content += "\n\nSources: " + ", ".join(label_list)
86 | await response_message.update()
87 |
88 |
89 | @cl.on_message
90 | async def main(message: cl.Message):
91 | query_engine = cl.user_session.get("query_engine")
92 | message_history = cl.user_session.get("message_history")
93 | prompt_template = "Previous messages:\n"
94 |
95 | response_message = cl.Message(content="", author="Assistant")
96 |
97 | user_message = message.content
98 |
99 | for message in message_history:
100 | prompt_template += f"{message['author']}: {message['content']}\n"
101 | prompt_template += f"Human: {user_message}"
102 |
103 | response = await cl.make_async(query_engine.query)(prompt_template)
104 |
105 | for token in response.response_gen:
106 | await response_message.stream_token(token)
107 | if response.response_txt:
108 | response_message.content = response.response_txt
109 | await response_message.send()
110 |
111 | message_history.append({"author": "Human", "content": user_message})
112 | message_history.append({"author": "AI", "content": response_message.content})
113 | message_history = message_history[-6:]
114 | cl.user_session.set("message_history", message_history)
115 |
116 | if response.source_nodes:
117 | await set_sources(response, response_message)
118 |
--------------------------------------------------------------------------------
/3.Reranker - Q.Transformation - Res.Synthesis/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | cohere
3 | chainlit
4 | llama-index
5 | pinecone-client
6 |
7 | llama-index-vector-stores-pinecone
8 | torch
9 | pypdf
10 | llmsherpa
11 | llama-hub
12 | transformers
13 | llama-index-postprocessor-cohere-rerank
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/.chainlit/config.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | # Whether to enable telemetry (default: true). No personal data is collected.
3 | enable_telemetry = true
4 |
5 | # List of environment variables to be provided by each user to use the app.
6 | user_env = []
7 |
8 | # Duration (in seconds) during which the session is saved when the connection is lost
9 | session_timeout = 3600
10 |
11 | # Enable third parties caching (e.g LangChain cache)
12 | cache = false
13 |
14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15 | # follow_symlink = false
16 |
17 | [features]
18 | # Show the prompt playground
19 | prompt_playground = true
20 |
21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22 | unsafe_allow_html = false
23 |
24 | # Process and display mathematical expressions. This can clash with "$" characters in messages.
25 | latex = false
26 |
27 | # Authorize users to upload files with messages
28 | multi_modal = false
29 |
30 | # Allows user to use speech to text
31 | [features.speech_to_text]
32 | enabled = false
33 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34 | # language = "en-US"
35 |
36 | [UI]
37 | # Name of the app and chatbot.
38 | name = "Chatbot"
39 |
40 | # Show the readme while the thread is empty.
41 | show_readme_as_default = false
42 |
43 | # Description of the app and chatbot. This is used for HTML tags.
44 | # description = ""
45 |
46 | # Large size content are by default collapsed for a cleaner ui
47 | default_collapse_content = true
48 |
49 | # The default value for the expand messages settings.
50 | default_expand_messages = false
51 |
52 | # Hide the chain of thought details from the user in the UI.
53 | hide_cot = false
54 |
55 | # Link to your github repo. This will add a github button in the UI's header.
56 | # github = ""
57 |
58 | # Specify a CSS file that can be used to customize the user interface.
59 | # The CSS file can be served from the public directory or via an external link.
60 | # custom_css = "/public/test.css"
61 |
62 | # Override default MUI light theme. (Check theme.ts)
63 | [UI.theme.light]
64 | #background = "#FAFAFA"
65 | #paper = "#FFFFFF"
66 |
67 | [UI.theme.light.primary]
68 | #main = "#F80061"
69 | #dark = "#980039"
70 | #light = "#FFE7EB"
71 |
72 | # Override default MUI dark theme. (Check theme.ts)
73 | [UI.theme.dark]
74 | #background = "#FAFAFA"
75 | #paper = "#FFFFFF"
76 |
77 | [UI.theme.dark.primary]
78 | #main = "#F80061"
79 | #dark = "#980039"
80 | #light = "#FFE7EB"
81 |
82 |
83 | [meta]
84 | generated_by = "1.0.101"
85 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/.chainlit/translations/en-US.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Settings",
8 | "settingsKey": "S",
9 | "APIKeys": "API Keys",
10 | "logout": "Logout"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "New Chat"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Task List",
22 | "loading": "Loading...",
23 | "error": "An error occured"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Cancel upload",
28 | "removeAttachment": "Remove attachment"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Create new chat?",
32 | "clearChat": "This will clear the current messages and start a new chat.",
33 | "cancel": "Cancel",
34 | "confirm": "Confirm"
35 | },
36 | "settingsModal": {
37 | "expandMessages": "Expand Messages",
38 | "hideChainOfThought": "Hide Chain of Thought",
39 | "darkMode": "Dark Mode"
40 | }
41 | },
42 | "organisms": {
43 | "chat": {
44 | "history": {
45 | "index": {
46 | "lastInputs": "Last Inputs",
47 | "noInputs": "Such empty...",
48 | "loading": "Loading..."
49 | }
50 | },
51 | "inputBox": {
52 | "input": {
53 | "placeholder": "Type your message here..."
54 | },
55 | "speechButton": {
56 | "start": "Start recording",
57 | "stop": "Stop recording"
58 | },
59 | "SubmitButton": {
60 | "sendMessage": "Send message",
61 | "stopTask": "Stop Task"
62 | },
63 | "UploadButton": {
64 | "attachFiles": "Attach files"
65 | },
66 | "waterMark": {
67 | "text": "Built with"
68 | }
69 | },
70 | "Messages": {
71 | "index": {
72 | "running": "Running",
73 | "executedSuccessfully": "executed successfully",
74 | "failed": "failed",
75 | "feedbackUpdated": "Feedback updated",
76 | "updating": "Updating"
77 | }
78 | },
79 | "dropScreen": {
80 | "dropYourFilesHere": "Drop your files here"
81 | },
82 | "index": {
83 | "failedToUpload": "Failed to upload",
84 | "cancelledUploadOf": "Cancelled upload of",
85 | "couldNotReachServer": "Could not reach the server",
86 | "continuingChat": "Continuing previous chat"
87 | },
88 | "settings": {
89 | "settingsPanel": "Settings panel",
90 | "reset": "Reset",
91 | "cancel": "Cancel",
92 | "confirm": "Confirm"
93 | }
94 | },
95 | "threadHistory": {
96 | "sidebar": {
97 | "filters": {
98 | "FeedbackSelect": {
99 | "feedbackAll": "Feedback: All",
100 | "feedbackPositive": "Feedback: Positive",
101 | "feedbackNegative": "Feedback: Negative"
102 | },
103 | "SearchBar": {
104 | "search": "Search"
105 | }
106 | },
107 | "DeleteThreadButton": {
108 | "confirmMessage": "This will delete the thread as well as it's messages and elements.",
109 | "cancel": "Cancel",
110 | "confirm": "Confirm",
111 | "deletingChat": "Deleting chat",
112 | "chatDeleted": "Chat deleted"
113 | },
114 | "index": {
115 | "pastChats": "Past Chats"
116 | },
117 | "ThreadList": {
118 | "empty": "Empty..."
119 | },
120 | "TriggerButton": {
121 | "closeSidebar": "Close sidebar",
122 | "openSidebar": "Open sidebar"
123 | }
124 | },
125 | "Thread": {
126 | "backToChat": "Go back to chat",
127 | "chatCreatedOn": "This chat was created on"
128 | }
129 | },
130 | "header": {
131 | "chat": "Chat",
132 | "readme": "Readme"
133 | }
134 | }
135 | },
136 | "hooks": {
137 | "useLLMProviders": {
138 | "failedToFetchProviders": "Failed to fetch providers:"
139 | }
140 | },
141 | "pages": {
142 | "Design": {},
143 | "Env": {
144 | "savedSuccessfully": "Saved successfully",
145 | "requiredApiKeys": "Required API Keys",
146 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
147 | },
148 | "Page": {
149 | "notPartOfProject": "You are not part of this project."
150 | },
151 | "ResumeButton": {
152 | "resumeChat": "Resume Chat"
153 | }
154 | }
155 | }
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/.chainlit/translations/pt-BR.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Configura\u00e7\u00f5es",
8 | "settingsKey": "S",
9 | "APIKeys": "Chaves de API",
10 | "logout": "Sair"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "Nova Conversa"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas",
22 | "loading": "Carregando...",
23 | "error": "Ocorreu um erro"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Cancelar envio",
28 | "removeAttachment": "Remover anexo"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Criar novo chat?",
32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.",
33 | "cancel": "Cancelar",
34 | "confirm": "Confirmar"
35 | },
36 | "settingsModal": {
37 | "expandMessages": "Expandir Mensagens",
38 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento",
39 | "darkMode": "Modo Escuro"
40 | }
41 | },
42 | "organisms": {
43 | "chat": {
44 | "history": {
45 | "index": {
46 | "lastInputs": "\u00daltimas Entradas",
47 | "noInputs": "Vazio...",
48 | "loading": "Carregando..."
49 | }
50 | },
51 | "inputBox": {
52 | "input": {
53 | "placeholder": "Digite sua mensagem aqui..."
54 | },
55 | "speechButton": {
56 | "start": "Iniciar grava\u00e7\u00e3o",
57 | "stop": "Parar grava\u00e7\u00e3o"
58 | },
59 | "SubmitButton": {
60 | "sendMessage": "Enviar mensagem",
61 | "stopTask": "Parar Tarefa"
62 | },
63 | "UploadButton": {
64 | "attachFiles": "Anexar arquivos"
65 | },
66 | "waterMark": {
67 | "text": "Constru\u00eddo com"
68 | }
69 | },
70 | "Messages": {
71 | "index": {
72 | "running": "Executando",
73 | "executedSuccessfully": "executado com sucesso",
74 | "failed": "falhou",
75 | "feedbackUpdated": "Feedback atualizado",
76 | "updating": "Atualizando"
77 | }
78 | },
79 | "dropScreen": {
80 | "dropYourFilesHere": "Solte seus arquivos aqui"
81 | },
82 | "index": {
83 | "failedToUpload": "Falha ao enviar",
84 | "cancelledUploadOf": "Envio cancelado de",
85 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor",
86 | "continuingChat": "Continuando o chat anterior"
87 | },
88 | "settings": {
89 | "settingsPanel": "Painel de Configura\u00e7\u00f5es",
90 | "reset": "Redefinir",
91 | "cancel": "Cancelar",
92 | "confirm": "Confirmar"
93 | }
94 | },
95 | "threadHistory": {
96 | "sidebar": {
97 | "filters": {
98 | "FeedbackSelect": {
99 | "feedbackAll": "Feedback: Todos",
100 | "feedbackPositive": "Feedback: Positivo",
101 | "feedbackNegative": "Feedback: Negativo"
102 | },
103 | "SearchBar": {
104 | "search": "Buscar"
105 | }
106 | },
107 | "DeleteThreadButton": {
108 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.",
109 | "cancel": "Cancelar",
110 | "confirm": "Confirmar",
111 | "deletingChat": "Deletando conversa",
112 | "chatDeleted": "Conversa deletada"
113 | },
114 | "index": {
115 | "pastChats": "Conversas Anteriores"
116 | },
117 | "ThreadList": {
118 | "empty": "Vazio..."
119 | },
120 | "TriggerButton": {
121 | "closeSidebar": "Fechar barra lateral",
122 | "openSidebar": "Abrir barra lateral"
123 | }
124 | },
125 | "Thread": {
126 | "backToChat": "Voltar para a conversa",
127 | "chatCreatedOn": "Esta conversa foi criada em"
128 | }
129 | },
130 | "header": {
131 | "chat": "Conversa",
132 | "readme": "Leia-me"
133 | }
134 | },
135 | "hooks": {
136 | "useLLMProviders": {
137 | "failedToFetchProviders": "Falha ao buscar provedores:"
138 | }
139 | },
140 | "pages": {
141 | "Design": {},
142 | "Env": {
143 | "savedSuccessfully": "Salvo com sucesso",
144 | "requiredApiKeys": "Chaves de API necess\u00e1rias",
145 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo."
146 | },
147 | "Page": {
148 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto."
149 | },
150 | "ResumeButton": {
151 | "resumeChat": "Continuar Conversa"
152 | }
153 | }
154 | }
155 | }
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY="sk-..."
2 | PINECONE_API_KEY=
3 | COHERE_API_KEY=
4 |
5 | MODEL=
6 | EMBEDDING=
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/README.md:
--------------------------------------------------------------------------------
1 | # Evaluation, generation and optimization
2 |
3 | > [!IMPORTANT]
4 | > Optimization is untested and likely unfinished.
5 |
6 | ## Table of Contents
7 |
8 | 1. [Installation Instructions](#installation-instructions)
9 | 2. [Usage](#usage)
10 | 3. [Generation](#generation)
11 | 4. [Evaluation](#evaluation)
12 | 10. [Testing](#tested)
13 |
14 | ## Installation Instructions
15 |
16 | Follow these steps to set up the GPT Documents chatbot on your local machine:
17 |
18 | 1. Create a conda environment:
19 |
20 | ```shell
21 | conda create -n rag python==3.11 -y && source activate rag
22 | ```
23 |
24 | 2. Install the required dependencies:
25 |
26 | ```shell
27 | pip install -r requirements.txt
28 | ```
29 |
30 | 3. Load your documents into the vector store by:
31 | - Create a folder named 'data'.
32 | - Place your documents inside the 'data' folder.
33 | - Execute the 'ingest.py' script to initiate the loading process.
34 |
35 | ## Usage
36 |
37 | Once the setup is complete, launch the chainlit app using the following command:
38 |
39 | ```shell
40 | chainlit run -w main.py
41 | ```
42 |
43 | Feel free to explore the functionalities and contribute to the development of this project. Your feedback and contributions are highly appreciated!
44 |
45 | ## Generation
46 |
47 | ### Why is it important?
48 |
49 | The creation of a dataset is crucial for developing and refining a RAG. These systems combine the capabilities of information retrieval and generative language models to provide answers that are both relevant and contextually accurate. By generating and using a labeled dataset, we can train, test, and improve the RAG models more effectively, ensuring that they provide high-quality, contextually relevant answers based on the information retrieved.
50 |
51 | ### What are we generating?
52 |
53 | We are generating a `LabelledRagDataset`. This dataset is designed to train and test Retriever-Augmented Generation systems. You can generate this dataset by hand or with the help of a large language model (LLM), such as GPT-4. For the purposes of this example, we are generating it with `gpt-4`.
54 |
55 | The dataset consists of the following structured data:
56 |
57 | ```json
58 | {
59 | "query": "Query",
60 | "query_by": {
61 | "model_name": "gpt-4",
62 | "type": "ai"
63 | },
64 | "reference_contexts": [
65 | "context_1",
66 | "context_2"
67 | ],
68 | "reference_answer": "answer",
69 | "reference_answer_by": {
70 | "model_name": "gpt-4",
71 | "type": "ai"
72 | }
73 | },
74 | ```
75 |
76 | Each entry in the dataset includes:
77 |
78 | - `query`: The question or prompt that the RAG system needs to respond to.
79 | - `query_by`: Information about how or by whom the query was generated, it can be a model or a person.
80 | - `reference_contexts`: An array of texts that provide relevant information or context to the query. These are the pieces of information that the retriever component is expected to fetch, which will aid the generator in crafting a coherent and contextually appropriate response.
81 | - `reference_answer`: The correct or expected answer to the query, which will be used as the ground truth for evaluating the RAG system's performance.
82 | - `reference_answer_by`: Information about how or by whom the reference answer was generated. This could be a human annotator or an AI model.
83 |
84 | This structure allows for the comprehensive training and evaluation of RAG systems, ensuring they can effectively retrieve relevant information and generate appropriate responses.
85 |
86 | ### How do we implement it?
87 |
88 | The implementation is super easy thanks to LlamaIndex.
89 |
90 | ```python
91 | dataset_generator = RagDatasetGenerator.from_documents(
92 | documents,
93 | llm=llm,
94 | num_questions_per_chunk=1,
95 | show_progress=True,
96 | )
97 |
98 | rag_dataset = dataset_generator.generate_dataset_from_nodes()
99 | ```
100 |
101 | ## Evaluation
102 |
103 | ### Why is evaluation important?
104 |
105 | Evaluation is crucial for measuring the performance and guiding the improvement of Retriever-Augmented Generation (RAG) systems. It ensures that these systems produce accurate, relevant, and contextually appropriate responses. By evaluating various metrics, we can identify areas of strength and weakness, benchmark against other systems, and refine our approach for better outcomes.
106 |
107 | We assess the following metrics:
108 |
109 | - **Mean Correctness Score**: Accuracy of the generated answers.
110 | - **Mean Relevancy Score**: Relevance of the retrieved documents to the query.
111 | - **Mean Faithfulness Score**: Adherence of the responses to the retrieved information.
112 | - **Mean Context Similarity Score**: Similarity of the responses to the query and document context.
113 |
114 | These metrics collectively help ensure the RAG system meets quality standards and user needs.
115 |
116 | ### Notes
117 |
118 | In the process of adjusting my pipeline within `main.py`, several modifications were necessary. Notably, due to the limitations of using a trial API key for Cohere, I was unable to perform evaluations as initially intended. This constraint has influenced the functionality of the RAG, rendering my current results less applicable. If you have access to a full API key and can conduct the evaluation, I encourage you to share your findings through a pull request.
119 |
120 | Additionally, I disabled streaming functionality to accommodate these changes. This decision may affect the overall operation and efficiency of the system.
121 |
122 | I acknowledge that my current implementation might not be optimal. I am planning to revisit and refine it to enhance performance and reliability. Your feedback and contributions are highly welcomed to help improve this project.
123 |
124 | ---
125 |
126 | ## Testing
127 | | Tested | Function | Last Time Tested | Notes |
128 | |:-------------|:----------------|:-----------------|:---------------------------|
129 | | ✅ | Generation | 2023-03-14 | |
130 | | ✅ | Evaluation | 2023-03-14 | Had to change some things in main.py, this is the [notes](#notes) above. |
131 | | ❌ | Optimization | Untested | |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/benchmark.csv:
--------------------------------------------------------------------------------
1 | base_rag
2 | 2.6136363636363638
3 | 0.0
4 | 0.11363636363636363
5 | 0.7192798337295863
6 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/chainlit.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/4.Evaluation - Generation - Optimization/chainlit.md
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/evaluation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import asyncio
4 |
5 | from llama_index.core import Settings
6 | from llama_index.llms.openai import OpenAI
7 | from llama_index.core.llama_pack import download_llama_pack
8 | from llama_index.core.llama_dataset import LabelledRagDataset
9 | from main import load_query_engine, load_index
10 |
11 | openai.api_key = os.getenv("OPENAI_API_KEY")
12 | model = os.getenv("MODEL", "gpt-4-0125-preview")
13 | print(f"model = {model}")
14 | Settings.model = OpenAI(model=model)
15 |
16 |
17 | async def evaluate():
18 | rag_dataset = LabelledRagDataset.from_json("./data/rag_dataset.json")
19 | print("Rag dataset loaded")
20 | index = load_index()
21 | print("Index loaded")
22 | query_engine = load_query_engine(index)
23 | print("Query engine loaded")
24 | RagEvaluatorPack = download_llama_pack("RagEvaluatorPack", "./rag_evaluator_pack")
25 | print("RagEvaluatorPack downloaded")
26 | rag_evaluator_pack = RagEvaluatorPack(
27 | rag_dataset=rag_dataset, query_engine=query_engine
28 | )
29 | print("RagEvaluatorPack created")
30 | ############################################################################
31 | # NOTE: If have a lower tier subscription for OpenAI API like Usage Tier 1 #
32 | # then you'll need to use different batch_size and sleep_time_in_seconds. #
33 | # For Usage Tier 1, settings that seemed to work well were batch_size=5, #
34 | # and sleep_time_in_seconds=15 (as of December 2023.) #
35 | ############################################################################
36 | benchmark_df = await rag_evaluator_pack.arun(
37 | batch_size=20, # batches the number of openai api calls to make
38 | sleep_time_in_seconds=1, # seconds to sleep before making an api call
39 | )
40 | print("Benchmarking complete")
41 | benchmark_df.to_csv("benchmark.csv", index=True)
42 |
43 |
44 | if __name__ == "__main__":
45 | asyncio.run(evaluate())
46 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/generation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 |
4 | from llama_index.core import SimpleDirectoryReader
5 | from llama_index.llms.openai import OpenAI
6 | from llama_index.core.llama_dataset.generator import RagDatasetGenerator
7 |
8 |
9 | openai.api_key = os.environ.get("OPENAI_API_KEY")
10 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview")
11 | print(f"model = {MODEL}")
12 |
13 |
14 | def get_documents(input_dir):
15 | documents = SimpleDirectoryReader(input_dir).load_data(show_progress=True)
16 | return documents
17 |
18 |
19 | def generate_dataset(documents):
20 | llm = OpenAI(model=MODEL, temperature=0.1)
21 |
22 | dataset_generator = RagDatasetGenerator.from_documents(
23 | documents,
24 | llm=llm,
25 | num_questions_per_chunk=1,
26 | show_progress=True,
27 | )
28 |
29 | rag_dataset = dataset_generator.generate_dataset_from_nodes()
30 | return rag_dataset
31 |
32 |
33 | def main():
34 | input_dir = "./data/source_files/"
35 | documents = get_documents(input_dir)
36 | rag_dataset = generate_dataset(documents)
37 | rag_dataset.save_json("./output/rag_dataset.json")
38 |
39 |
40 | if __name__ == "__main__":
41 | main()
42 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/ingest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import asyncio
4 | import argparse
5 |
6 | from dotenv import load_dotenv
7 | from pinecone import Pinecone, PodSpec
8 |
9 | from llama_index.core import SimpleDirectoryReader, download_loader
10 | from llama_index.llms.openai import OpenAI
11 | from llama_index.embeddings.openai import OpenAIEmbedding
12 | from llama_index.core.ingestion import IngestionPipeline
13 | from llama_index.vector_stores.pinecone import PineconeVectorStore
14 | from llama_index.core.extractors import (
15 | TitleExtractor,
16 | # QuestionsAnsweredExtractor,
17 | # SummaryExtractor,
18 | # KeywordExtractor,
19 | )
20 | from llama_index.core.node_parser import SentenceSplitter
21 | from llama_parse import LlamaParse
22 |
23 | load_dotenv()
24 | openai.api_key = os.environ.get("OPENAI_API_KEY")
25 | pinecone_api_key = os.environ.get("PINECONE_API_KEY")
26 | llama_parse_api_key = os.environ.get("LLAMA_PARSE_API_KEY")
27 |
28 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview")
29 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large")
30 |
31 |
32 | def get_pinecone_index(pc, index_name):
33 | pinecone_index = pc.Index(index_name)
34 | return pinecone_index
35 |
36 |
37 | def get_pinecone_vector_store(pinecone_index):
38 | vector_store = PineconeVectorStore(
39 | pinecone_index=pinecone_index,
40 | add_sparse_vector=True,
41 | )
42 | return vector_store
43 |
44 |
45 | def create_pinecone_pod(pc, index_name):
46 | print("Creating pinecone pod")
47 | pc.create_index(
48 | name=index_name,
49 | dimension=3072,
50 | metric="dotproduct",
51 | spec=PodSpec(environment="gcp-starter"),
52 | )
53 |
54 |
55 | def get_documents(input_dir):
56 | llama_parser = LlamaParse(
57 | api_key=llama_parse_api_key, result_type="markdown", verbose=True
58 | )
59 |
60 | UnstructuredReader = download_loader("UnstructuredReader")
61 |
62 | file_extractor = {
63 | ".pdf": llama_parser,
64 | ".html": UnstructuredReader(),
65 | ".txt": UnstructuredReader(),
66 | }
67 | print("Reading directory")
68 | director_reader = SimpleDirectoryReader(
69 | input_dir=input_dir, file_extractor=file_extractor
70 | )
71 | print("Starting document reading")
72 | documents = director_reader.load_data(show_progress=True)
73 | return documents
74 |
75 |
76 | def run_pipeline(documents, vector_store, llm, num_workers):
77 | pipeline = IngestionPipeline(
78 | transformations=[
79 | SentenceSplitter(chunk_size=512, chunk_overlap=126),
80 | TitleExtractor(llm=llm, num_workers=num_workers),
81 | # QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers),
82 | # SummaryExtractor(
83 | # summaries=["prev", "self"], llm=llm, num_workers=num_workers
84 | # ),
85 | # KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers),
86 | OpenAIEmbedding(model=EMBEDDING),
87 | ],
88 | vector_store=vector_store,
89 | )
90 | for doc in documents: # Small patch to remove last_accessed_date from metadata
91 | k = vars(doc)
92 | del k["metadata"]["last_accessed_date"]
93 | pipeline.run(documents=documents, show_progress=True, num_workers=num_workers)
94 |
95 |
96 | async def main():
97 | print("Starting ingestion")
98 | input_dir = "./data/source_files/"
99 | index_name = "rag-index"
100 | num_cores = os.cpu_count()
101 | num_workers = min(4, num_cores)
102 | pc = Pinecone(api_key=pinecone_api_key)
103 | parser = argparse.ArgumentParser(description="Process some integers.")
104 | parser.add_argument(
105 | "--gen",
106 | action="store_true",
107 | help="Generate new pinecone index",
108 | )
109 | args = parser.parse_args()
110 | if args.gen:
111 | create_pinecone_pod(pc, index_name)
112 | llm = OpenAI(temperature=0.1, model=MODEL, max_tokens=1024)
113 | pinecone_index = get_pinecone_index(pc, index_name)
114 | vector_store = get_pinecone_vector_store(pinecone_index)
115 | documents = get_documents(input_dir)
116 | print("Starting ingestion pipeline")
117 | run_pipeline(documents, vector_store, llm, num_workers)
118 |
119 |
120 | if __name__ == "__main__":
121 | asyncio.run(main())
122 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import chainlit as cl
4 |
5 | from pinecone import Pinecone
6 | from llama_index.core import Settings, VectorStoreIndex
7 | from llama_index.llms.openai import OpenAI
8 | from llama_index.embeddings.openai import OpenAIEmbedding
9 | from llama_index.vector_stores.pinecone import PineconeVectorStore
10 | from llama_index.core.response_synthesizers import ResponseMode
11 |
12 | # from llama_index.postprocessor.cohere_rerank import CohereRerank
13 | from llama_index.core.indices.query.query_transform.base import (
14 | StepDecomposeQueryTransform,
15 | )
16 |
17 | openai.api_key = os.environ.get("OPENAI_API_KEY")
18 | cohere_api_key = os.environ.get("COHERE_API_KEY")
19 | pinecone_api_key = os.environ.get("PINECONE_API_KEY")
20 |
21 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview")
22 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large")
23 |
24 |
25 | @cl.cache
26 | def load_index():
27 | Settings.llm = OpenAI(
28 | temperature=0.1,
29 | model=MODEL, # streaming=True
30 | )
31 | Settings.embed_model = OpenAIEmbedding(model=EMBEDDING, embed_batch_size=1)
32 | Settings.num_output = 1024
33 | Settings.context_window = 128000
34 | pc = Pinecone(api_key=pinecone_api_key)
35 | pinecone_index = pc.Index("pinecone-index")
36 | vector_store = PineconeVectorStore(
37 | pinecone_index=pinecone_index,
38 | )
39 |
40 | index = VectorStoreIndex.from_vector_store(
41 | vector_store=vector_store,
42 | )
43 | return index
44 |
45 |
46 | @cl.cache
47 | def load_query_engine(index):
48 | # reranker = CohereRerank(api_key=cohere_api_key, top_n=3)
49 | step_decompose_transform = StepDecomposeQueryTransform(llm=MODEL, verbose=True)
50 |
51 | query_engine = index.as_query_engine(
52 | # streaming=True,
53 | similarity_top_k=6,
54 | # node_postprocessors=[reranker], # Reranker would require a non Trial key for evaluation.
55 | vector_store_query_mode="hybrid",
56 | query_transform=step_decompose_transform,
57 | response_synthesizer_mode=ResponseMode.REFINE,
58 | )
59 | return query_engine
60 |
61 |
62 | @cl.on_chat_start
63 | async def start():
64 | index = load_index()
65 | query_engine = load_query_engine(index)
66 |
67 | cl.user_session.set("query_engine", query_engine)
68 |
69 | message_history = []
70 | cl.user_session.set("message_history", message_history)
71 |
72 | await cl.Message(
73 | author="Assistant", content="Hello! Im an AI assistant. How may I help you?"
74 | ).send()
75 |
76 |
77 | @cl.on_message
78 | async def main(message: cl.Message):
79 | query_engine = cl.user_session.get("query_engine")
80 | message_history = cl.user_session.get("message_history")
81 | prompt_template = "Previous messages:\n"
82 |
83 | response_message = cl.Message(content="", author="Assistant")
84 |
85 | user_message = message.content
86 |
87 | for message in message_history:
88 | prompt_template += f"{message['author']}: {message['content']}\n"
89 | prompt_template += f"Human: {user_message}"
90 |
91 | response = await cl.make_async(query_engine.query)(prompt_template)
92 |
93 | for token in response.response_gen:
94 | await response_message.stream_token(token)
95 | if response.response_txt:
96 | response_message.content = response.response_txt
97 | await response_message.send()
98 |
99 | message_history.append({"author": "Human", "content": user_message})
100 | message_history.append({"author": "AI", "content": response_message.content})
101 | message_history = message_history[-6:]
102 | cl.user_session.set("message_history", message_history)
103 |
104 | label_list = []
105 | count = 1
106 |
107 | for sr in response.source_nodes:
108 | elements = [
109 | cl.Text(
110 | name="S" + str(count),
111 | content=f"{sr.node.text}",
112 | display="side",
113 | size="small",
114 | )
115 | ]
116 | response_message.elements = elements
117 | label_list.append("S" + str(count))
118 | await response_message.update()
119 | count += 1
120 | response_message.content += "\n\nSources: " + ", ".join(label_list)
121 | await response_message.update()
122 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/optimization.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from pathlib import Path
4 |
5 |
6 | from llama_index.core import (
7 | Document,
8 | StorageContext,
9 | VectorStoreIndex,
10 | load_index_from_storage,
11 | )
12 | from llama_index.core.evaluation import (
13 | SemanticSimilarityEvaluator,
14 | BatchEvalRunner,
15 | )
16 |
17 | # from llama_index.llms.openai import OpenAI
18 | from llama_index.readers.file import PDFReader
19 | from llama_index.core.evaluation import QueryResponseDataset
20 | from llama_index.core.node_parser import SimpleNodeParser
21 | from llama_index.embeddings.openai import OpenAIEmbedding
22 | from llama_index.core.param_tuner.base import RunResult
23 | from llama_index.experimental.param_tuner import RayTuneParamTuner
24 | from llama_index.core.evaluation.eval_utils import get_responses
25 |
26 | loader = PDFReader()
27 | docs0 = loader.load_data(file=Path("./data/llama2.pdf"))
28 |
29 | doc_text = "\n\n".join([d.get_content() for d in docs0])
30 | docs = [Document(text=doc_text)]
31 |
32 | ######################
33 | ###### Chnage this to work with a datageneration form Llabelleddata
34 | ######################
35 | eval_dataset = QueryResponseDataset.from_json("data/llama2_eval_qr_dataset.json")
36 | eval_qs = eval_dataset.questions
37 | ref_response_strs = [r for (_, r) in eval_dataset.qr_pairs]
38 |
39 |
40 | def _build_index(chunk_size, docs):
41 | index_out_path = f"./storage_{chunk_size}"
42 | if not os.path.exists(index_out_path):
43 | Path(index_out_path).mkdir(parents=True, exist_ok=True)
44 | # parse docs
45 | node_parser = SimpleNodeParser.from_defaults(chunk_size=chunk_size)
46 | base_nodes = node_parser.get_nodes_from_documents(docs)
47 |
48 | # build index
49 | index = VectorStoreIndex(base_nodes)
50 | # save index to disk
51 | index.storage_context.persist(index_out_path)
52 | else:
53 | # rebuild storage context
54 | storage_context = StorageContext.from_defaults(persist_dir=index_out_path)
55 | # load index
56 | index = load_index_from_storage(
57 | storage_context,
58 | )
59 | return index
60 |
61 |
62 | def _get_eval_batch_runner():
63 | evaluator_s = SemanticSimilarityEvaluator(embed_model=OpenAIEmbedding())
64 | eval_batch_runner = BatchEvalRunner(
65 | {"semantic_similarity": evaluator_s}, workers=2, show_progress=True
66 | )
67 |
68 | return eval_batch_runner
69 |
70 |
71 | def objective_function(params_dict):
72 | chunk_size = params_dict["chunk_size"]
73 | docs = params_dict["docs"]
74 | top_k = params_dict["top_k"]
75 | eval_qs = params_dict["eval_qs"]
76 | ref_response_strs = params_dict["ref_response_strs"]
77 |
78 | # build index
79 | index = _build_index(chunk_size, docs)
80 |
81 | # query engine
82 | query_engine = index.as_query_engine(similarity_top_k=top_k)
83 |
84 | # get predicted responses
85 | pred_response_objs = get_responses(eval_qs, query_engine, show_progress=True)
86 |
87 | # run evaluator
88 | # NOTE: can uncomment other evaluators
89 | eval_batch_runner = _get_eval_batch_runner()
90 | eval_results = eval_batch_runner.evaluate_responses(
91 | eval_qs, responses=pred_response_objs, reference=ref_response_strs
92 | )
93 |
94 | # get semantic similarity metric
95 | mean_score = np.array([r.score for r in eval_results["semantic_similarity"]]).mean()
96 |
97 | return RunResult(score=mean_score, params=params_dict)
98 |
99 |
100 | def param_tuner():
101 | param_dict = {"chunk_size": [256, 512, 1024], "top_k": [1, 2, 5]}
102 | fixed_param_dict = {
103 | "docs": docs,
104 | "eval_qs": eval_qs[:10],
105 | "ref_response_strs": ref_response_strs[:10],
106 | }
107 |
108 | param_tuner = RayTuneParamTuner(
109 | param_fn=objective_function,
110 | param_dict=param_dict,
111 | fixed_param_dict=fixed_param_dict,
112 | run_config_dict={"storage_path": "/tmp/custom/ray_tune", "name": "my_exp"},
113 | )
114 | results = param_tuner.tune()
115 | return results
116 |
117 |
118 | def print_results(results):
119 | best_result = results.best_run_result
120 |
121 | best_top_k = results.best_run_result.params["top_k"]
122 | best_chunk_size = results.best_run_result.params["chunk_size"]
123 | print(f"Score: {best_result.score}")
124 | print(f"Top-k: {best_top_k}")
125 | print(f"Chunk size: {best_chunk_size}")
126 |
127 |
128 | def main():
129 | results = param_tuner()
130 | print_results(results)
131 |
132 |
133 | if __name__ == "__main__":
134 | main()
135 |
136 | # NOT TESTED YET
137 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/rag_evaluator_pack/README.md:
--------------------------------------------------------------------------------
1 | DO NOT DELETE
2 | This readme file is needed to install from pyproject.toml.
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/rag_evaluator_pack/llama_index/packs/rag_evaluator/BUILD:
--------------------------------------------------------------------------------
1 | python_sources()
2 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/rag_evaluator_pack/llama_index/packs/rag_evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | from llama_index.packs.rag_evaluator.base import RagEvaluatorPack
2 |
3 | __all__ = ["RagEvaluatorPack"]
4 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/rag_evaluator_pack/llama_index/packs/rag_evaluator/base.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 | import time
4 | import warnings
5 | from collections import deque
6 | from typing import Any, List, Optional
7 |
8 | import pandas as pd
9 | import tqdm
10 | from llama_index.core.evaluation import (
11 | CorrectnessEvaluator,
12 | EvaluationResult,
13 | FaithfulnessEvaluator,
14 | RelevancyEvaluator,
15 | SemanticSimilarityEvaluator,
16 | )
17 | from llama_index.core.evaluation.notebook_utils import (
18 | get_eval_results_df,
19 | )
20 | from llama_index.core.llama_dataset import BaseLlamaDataset, BaseLlamaPredictionDataset
21 | from llama_index.core.llama_pack.base import BaseLlamaPack
22 | from llama_index.core.llms import LLM
23 | from llama_index.core.query_engine import BaseQueryEngine
24 | from llama_index.embeddings.openai import OpenAIEmbedding
25 | from llama_index.llms.openai import OpenAI
26 | from openai import RateLimitError
27 | from tqdm.asyncio import tqdm_asyncio
28 |
29 |
30 | class RagEvaluatorPack(BaseLlamaPack):
31 | """A pack for performing evaluation with your own RAG pipeline.
32 |
33 | Args:
34 | query_engine: The RAG pipeline to evaluate.
35 | rag_dataset: The BaseLlamaDataset to evaluate on.
36 | judge_llm: The LLM to use as the evaluator.
37 | """
38 |
39 | def __init__(
40 | self,
41 | query_engine: BaseQueryEngine,
42 | rag_dataset: BaseLlamaDataset,
43 | judge_llm: Optional[LLM] = None,
44 | show_progress: bool = True,
45 | ):
46 | self.query_engine = query_engine
47 | self.rag_dataset = rag_dataset
48 | self._num_examples = len(self.rag_dataset.examples)
49 | if judge_llm is None:
50 | self.judge_llm = OpenAI(temperature=0, model="gpt-4-1106-preview")
51 | else:
52 | assert isinstance(judge_llm, LLM)
53 | self.judge_llm = judge_llm
54 | self.show_progress = show_progress
55 | self.evals = {
56 | "correctness": [],
57 | "relevancy": [],
58 | "faithfulness": [],
59 | "context_similarity": [],
60 | }
61 | self.eval_queue = deque(range(len(rag_dataset.examples)))
62 | self.prediction_dataset = None
63 |
64 | async def _amake_predictions(
65 | self,
66 | batch_size: int = 20,
67 | sleep_time_in_seconds: int = 1,
68 | ):
69 | """Async make predictions with query engine."""
70 | self.prediction_dataset: BaseLlamaPredictionDataset = (
71 | await self.rag_dataset.amake_predictions_with(
72 | self.query_engine,
73 | show_progress=self.show_progress,
74 | batch_size=batch_size,
75 | sleep_time_in_seconds=sleep_time_in_seconds,
76 | )
77 | )
78 |
79 | def _make_predictions(
80 | self,
81 | batch_size: int = 20,
82 | sleep_time_in_seconds: int = 1,
83 | ):
84 | """Sync make predictions with query engine."""
85 | self.prediction_dataset: BaseLlamaPredictionDataset = (
86 | self.rag_dataset.make_predictions_with(
87 | self.query_engine,
88 | show_progress=self.show_progress,
89 | batch_size=batch_size,
90 | sleep_time_in_seconds=sleep_time_in_seconds,
91 | )
92 | )
93 |
94 | def _prepare_judges(self):
95 | """Construct the evaluators."""
96 | judges = {}
97 | judges["correctness"] = CorrectnessEvaluator(
98 | llm=self.judge_llm,
99 | )
100 | judges["relevancy"] = RelevancyEvaluator(
101 | llm=self.judge_llm,
102 | )
103 | judges["faithfulness"] = FaithfulnessEvaluator(
104 | llm=self.judge_llm,
105 | )
106 | judges["semantic_similarity"] = SemanticSimilarityEvaluator(
107 | embed_model=OpenAIEmbedding()
108 | )
109 | return judges
110 |
111 | async def _areturn_null_eval_result(self, query) -> EvaluationResult:
112 | """A dummy async method that returns None.
113 |
114 | NOTE: this is used to handle case when creating async tasks for evaluating
115 | predictions where contexts do not exist.
116 | """
117 | return EvaluationResult(
118 | query=query,
119 | )
120 |
121 | def _return_null_eval_result(self, query) -> EvaluationResult:
122 | """A dummy async method that returns None.
123 |
124 | NOTE: this is used to handle case when creating async tasks for evaluating
125 | predictions where contexts do not exist.
126 | """
127 | return EvaluationResult(
128 | query=query,
129 | )
130 |
131 | def _create_async_evaluate_example_prediction_tasks(
132 | self, judges, example, prediction, sleep_time_in_seconds
133 | ):
134 | """Collect the co-routines."""
135 | correctness_task = judges["correctness"].aevaluate(
136 | query=example.query,
137 | response=prediction.response,
138 | reference=example.reference_answer,
139 | sleep_time_in_seconds=sleep_time_in_seconds,
140 | )
141 |
142 | relevancy_task = judges["relevancy"].aevaluate(
143 | query=example.query,
144 | response=prediction.response,
145 | contexts=prediction.contexts,
146 | sleep_time_in_seconds=sleep_time_in_seconds,
147 | )
148 |
149 | faithfulness_task = judges["faithfulness"].aevaluate(
150 | query=example.query,
151 | response=prediction.response,
152 | contexts=prediction.contexts,
153 | sleep_time_in_seconds=sleep_time_in_seconds,
154 | )
155 |
156 | if example.reference_contexts and prediction.contexts:
157 | semantic_similarity_task = judges["semantic_similarity"].aevaluate(
158 | query=example.query,
159 | response="\n".join(prediction.contexts),
160 | reference="\n".join(example.reference_contexts),
161 | )
162 | else:
163 | semantic_similarity_task = self._areturn_null_eval_result(
164 | query=example.query
165 | )
166 |
167 | return (
168 | correctness_task,
169 | relevancy_task,
170 | faithfulness_task,
171 | semantic_similarity_task,
172 | )
173 |
174 | def _evaluate_example_prediction(self, judges, example, prediction):
175 | """Collect the co-routines."""
176 | correctness_result = judges["correctness"].evaluate(
177 | query=example.query,
178 | response=prediction.response,
179 | reference=example.reference_answer,
180 | )
181 |
182 | relevancy_result = judges["relevancy"].evaluate(
183 | query=example.query,
184 | response=prediction.response,
185 | contexts=prediction.contexts,
186 | )
187 |
188 | faithfulness_result = judges["faithfulness"].evaluate(
189 | query=example.query,
190 | response=prediction.response,
191 | contexts=prediction.contexts,
192 | )
193 |
194 | if example.reference_contexts and prediction.contexts:
195 | semantic_similarity_result = judges["semantic_similarity"].evaluate(
196 | query=example.query,
197 | response="\n".join(prediction.contexts),
198 | reference="\n".join(example.reference_contexts),
199 | )
200 | else:
201 | semantic_similarity_result = self._return_null_eval_result(
202 | query=example.query
203 | )
204 |
205 | return (
206 | correctness_result,
207 | relevancy_result,
208 | faithfulness_result,
209 | semantic_similarity_result,
210 | )
211 |
212 | def _save_evaluations(self):
213 | """Save evaluation json object."""
214 | # saving evaluations
215 | evaluations_objects = {
216 | "context_similarity": [e.dict() for e in self.evals["context_similarity"]],
217 | "correctness": [e.dict() for e in self.evals["correctness"]],
218 | "faithfulness": [e.dict() for e in self.evals["faithfulness"]],
219 | "relevancy": [e.dict() for e in self.evals["relevancy"]],
220 | }
221 |
222 | with open("_evaluations.json", "w") as json_file:
223 | json.dump(evaluations_objects, json_file)
224 |
225 | def _prepare_and_save_benchmark_results(self):
226 | """Get mean score across all of the evaluated examples-predictions."""
227 | _, mean_correctness_df = get_eval_results_df(
228 | ["base_rag"] * len(self.evals["correctness"]),
229 | self.evals["correctness"],
230 | metric="correctness",
231 | )
232 | _, mean_relevancy_df = get_eval_results_df(
233 | ["base_rag"] * len(self.evals["relevancy"]),
234 | self.evals["relevancy"],
235 | metric="relevancy",
236 | )
237 | _, mean_faithfulness_df = get_eval_results_df(
238 | ["base_rag"] * len(self.evals["faithfulness"]),
239 | self.evals["faithfulness"],
240 | metric="faithfulness",
241 | )
242 | _, mean_context_similarity_df = get_eval_results_df(
243 | ["base_rag"] * len(self.evals["context_similarity"]),
244 | self.evals["context_similarity"],
245 | metric="context_similarity",
246 | )
247 |
248 | mean_scores_df = pd.concat(
249 | [
250 | mean_correctness_df.reset_index(),
251 | mean_relevancy_df.reset_index(),
252 | mean_faithfulness_df.reset_index(),
253 | mean_context_similarity_df.reset_index(),
254 | ],
255 | axis=0,
256 | ignore_index=True,
257 | )
258 | mean_scores_df = mean_scores_df.set_index("index")
259 | mean_scores_df.index = mean_scores_df.index.set_names(["metrics"])
260 |
261 | # save mean_scores_df
262 | mean_scores_df.to_csv("benchmark.csv")
263 | return mean_scores_df
264 |
265 | def _make_evaluations(
266 | self,
267 | batch_size,
268 | sleep_time_in_seconds,
269 | ):
270 | """Sync make evaluations."""
271 | judges = self._prepare_judges()
272 |
273 | start_ix = self.eval_queue[0]
274 | for batch in self._batch_examples_and_preds(
275 | self.rag_dataset.examples,
276 | self.prediction_dataset.predictions,
277 | batch_size=batch_size,
278 | start_position=start_ix,
279 | ):
280 | examples, predictions = batch
281 | for example, prediction in tqdm.tqdm(zip(examples, predictions)):
282 | (
283 | correctness_result,
284 | relevancy_result,
285 | faithfulness_result,
286 | semantic_similarity_result,
287 | ) = self._evaluate_example_prediction(
288 | judges=judges, example=example, prediction=prediction
289 | )
290 |
291 | self.evals["correctness"].append(correctness_result)
292 | self.evals["relevancy"].append(relevancy_result)
293 | self.evals["faithfulness"].append(faithfulness_result)
294 | self.evals["context_similarity"].append(semantic_similarity_result)
295 | time.sleep(sleep_time_in_seconds)
296 |
297 | self._save_evaluations()
298 | return self._prepare_and_save_benchmark_results()
299 |
300 | def _batch_examples_and_preds(
301 | self,
302 | examples: List[Any],
303 | predictions: List[Any],
304 | batch_size: int = 10,
305 | start_position: int = 0,
306 | ):
307 | """Batches examples and predictions with a given batch_size."""
308 | assert self._num_examples == len(predictions)
309 | for ndx in range(start_position, self._num_examples, batch_size):
310 | yield (
311 | examples[ndx : min(ndx + batch_size, self._num_examples)],
312 | predictions[ndx : min(ndx + batch_size, self._num_examples)],
313 | )
314 |
315 | async def _amake_evaluations(self, batch_size, sleep_time_in_seconds):
316 | """Async make evaluations."""
317 | judges = self._prepare_judges()
318 |
319 | ix = self.eval_queue[0]
320 | batch_iterator = self._batch_examples_and_preds(
321 | self.rag_dataset.examples,
322 | self.prediction_dataset.predictions,
323 | batch_size=batch_size,
324 | start_position=ix,
325 | )
326 | total_batches = (self._num_examples - ix + 1) / batch_size + (
327 | (self._num_examples - ix + 1) % batch_size != 0
328 | )
329 | if self.show_progress:
330 | batch_iterator = tqdm_asyncio(
331 | batch_iterator,
332 | desc="Batch processing of evaluations",
333 | total=total_batches,
334 | )
335 |
336 | for batch in batch_iterator:
337 | examples, predictions = batch
338 | tasks = []
339 | for example, prediction in zip(examples, predictions):
340 | (
341 | correctness_task,
342 | relevancy_task,
343 | faithfulness_task,
344 | semantic_similarity_task,
345 | ) = self._create_async_evaluate_example_prediction_tasks(
346 | judges=judges,
347 | example=example,
348 | prediction=prediction,
349 | sleep_time_in_seconds=sleep_time_in_seconds,
350 | )
351 |
352 | tasks += [
353 | correctness_task,
354 | relevancy_task,
355 | faithfulness_task,
356 | semantic_similarity_task,
357 | ]
358 |
359 | # do this in batches to avoid RateLimitError
360 | try:
361 | eval_results: List[EvaluationResult] = await asyncio.gather(*tasks)
362 | except RateLimitError as err:
363 | if self.show_progress:
364 | batch_iterator.close()
365 | raise ValueError(
366 | "You've hit rate limits on your OpenAI subscription. This"
367 | " `RagEvaluatorPack` maintains state of evaluations. Simply"
368 | " re-invoke .arun() in order to continue from where you left"
369 | " off."
370 | ) from err
371 | # store in memory
372 | # since final result of eval_results respects order of inputs
373 | # just take appropriate slices
374 | self.evals["correctness"] += eval_results[::4]
375 | self.evals["relevancy"] += eval_results[1::4]
376 | self.evals["faithfulness"] += eval_results[2::4]
377 | self.evals["context_similarity"] += eval_results[3::4]
378 | # update queue
379 | for _ in range(batch_size):
380 | if self.eval_queue:
381 | self.eval_queue.popleft()
382 | ix += 1
383 | if self.show_progress:
384 | batch_iterator.update()
385 | batch_iterator.refresh()
386 |
387 | self._save_evaluations()
388 | return self._prepare_and_save_benchmark_results()
389 |
390 | def run(self, batch_size: int = 10, sleep_time_in_seconds: int = 1):
391 | if batch_size > 10:
392 | warnings.warn(
393 | "You've set a large batch_size (>10). If using OpenAI GPT-4 as "
394 | " `judge_llm` (which is the default judge_llm),"
395 | " you may experience a RateLimitError. Previous successful eval "
396 | " responses are cached per batch. So hitting a RateLimitError"
397 | " would mean you'd lose all of the current batches successful "
398 | " GPT-4 calls."
399 | )
400 | if self.prediction_dataset is None:
401 | self._make_predictions(batch_size, sleep_time_in_seconds)
402 |
403 | # evaluate predictions
404 | eval_sleep_time_in_seconds = (
405 | sleep_time_in_seconds * 2
406 | ) # since we make 3 evaluator llm calls
407 | eval_batch_size = int(max(batch_size / 4, 1))
408 | return self._make_evaluations(
409 | batch_size=eval_batch_size, sleep_time_in_seconds=eval_sleep_time_in_seconds
410 | )
411 |
412 | async def arun(
413 | self,
414 | batch_size: int = 10,
415 | sleep_time_in_seconds: int = 1,
416 | ):
417 | if batch_size > 10:
418 | warnings.warn(
419 | "You've set a large batch_size (>10). If using OpenAI GPT-4 as "
420 | " `judge_llm` (which is the default judge_llm),"
421 | " you may experience a RateLimitError. Previous successful eval "
422 | " responses are cached per batch. So hitting a RateLimitError"
423 | " would mean you'd lose all of the current batches successful "
424 | " GPT-4 calls."
425 | )
426 |
427 | # make predictions
428 | if self.prediction_dataset is None:
429 | await self._amake_predictions(batch_size, sleep_time_in_seconds)
430 |
431 | # evaluate predictions
432 | eval_sleep_time_in_seconds = (
433 | sleep_time_in_seconds * 2
434 | ) # since we make 3 evaluator llm calls and default is gpt-4
435 | # which is heavily rate-limited
436 | eval_batch_size = int(max(batch_size / 4, 1))
437 | return await self._amake_evaluations(
438 | batch_size=eval_batch_size, sleep_time_in_seconds=eval_sleep_time_in_seconds
439 | )
440 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/rag_evaluator_pack/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | build-backend = "poetry.core.masonry.api"
3 | requires = ["poetry-core"]
4 |
5 | [tool.codespell]
6 | check-filenames = true
7 | check-hidden = true
8 | skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
9 |
10 | [tool.llamahub]
11 | contains_example = true
12 | import_path = "llama_index.packs.rag_evaluator"
13 |
14 | [tool.llamahub.class_authors]
15 | RagEvaluatorPack = "nerdai"
16 |
17 | [tool.mypy]
18 | disallow_untyped_defs = true
19 | exclude = ["_static", "build", "examples", "notebooks", "venv"]
20 | ignore_missing_imports = true
21 | python_version = "3.8"
22 |
23 | [tool.poetry]
24 | authors = ["Your Name "]
25 | description = "llama-index packs rag_evaluator integration"
26 | exclude = ["**/BUILD"]
27 | keywords = ["benchmarks", "evaluation", "rag"]
28 | license = "MIT"
29 | maintainers = ["nerdai"]
30 | name = "llama-index-packs-rag-evaluator"
31 | readme = "README.md"
32 | version = "0.1.3"
33 |
34 | [tool.poetry.dependencies]
35 | python = ">=3.8.1,<4.0"
36 | llama-index-core = "^0.10.1"
37 | llama-index-llms-openai = "^0.1.1"
38 |
39 | [tool.poetry.group.dev.dependencies]
40 | ipython = "8.10.0"
41 | jupyter = "^1.0.0"
42 | mypy = "0.991"
43 | pre-commit = "3.2.0"
44 | pylint = "2.15.10"
45 | pytest = "7.2.1"
46 | pytest-mock = "3.11.1"
47 | ruff = "0.0.292"
48 | tree-sitter-languages = "^1.8.0"
49 | types-Deprecated = ">=0.1.0"
50 | types-PyYAML = "^6.0.12.12"
51 | types-protobuf = "^4.24.0.4"
52 | types-redis = "4.5.5.0"
53 | types-requests = "2.28.11.8"
54 | types-setuptools = "67.1.0.0"
55 |
56 | [tool.poetry.group.dev.dependencies.black]
57 | extras = ["jupyter"]
58 | version = "<=23.9.1,>=23.7.0"
59 |
60 | [tool.poetry.group.dev.dependencies.codespell]
61 | extras = ["toml"]
62 | version = ">=v2.2.6"
63 |
64 | [[tool.poetry.packages]]
65 | include = "llama_index/"
66 |
--------------------------------------------------------------------------------
/4.Evaluation - Generation - Optimization/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | cohere
3 | chainlit
4 | llama-index
5 | pinecone-client
6 |
7 | llama-index-vector-stores-pinecone
8 | torch
9 | pypdf
10 | llmsherpa
11 | llama-hub
12 | transformers
13 | llama-index-postprocessor-cohere-rerank
14 | llama-index-core # Feb 21 2024
15 | llama-index-llms-openai # Feb 21 2024
16 | llama-index-embeddings-openai
17 | spacy
--------------------------------------------------------------------------------
/5.Intent Detection Agent/.chainlit/config.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | # Whether to enable telemetry (default: true). No personal data is collected.
3 | enable_telemetry = true
4 |
5 | # List of environment variables to be provided by each user to use the app.
6 | user_env = []
7 |
8 | # Duration (in seconds) during which the session is saved when the connection is lost
9 | session_timeout = 3600
10 |
11 | # Enable third parties caching (e.g LangChain cache)
12 | cache = false
13 |
14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15 | # follow_symlink = false
16 |
17 | [features]
18 | # Show the prompt playground
19 | prompt_playground = true
20 |
21 | # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22 | unsafe_allow_html = false
23 |
24 | # Process and display mathematical expressions. This can clash with "$" characters in messages.
25 | latex = false
26 |
27 | # Authorize users to upload files with messages
28 | multi_modal = false
29 |
30 | # Allows user to use speech to text
31 | [features.speech_to_text]
32 | enabled = false
33 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34 | # language = "en-US"
35 |
36 | [UI]
37 | # Name of the app and chatbot.
38 | name = "Chatbot"
39 |
40 | # Show the readme while the thread is empty.
41 | show_readme_as_default = false
42 |
43 | # Description of the app and chatbot. This is used for HTML tags.
44 | # description = ""
45 |
46 | # Large size content are by default collapsed for a cleaner ui
47 | default_collapse_content = true
48 |
49 | # The default value for the expand messages settings.
50 | default_expand_messages = false
51 |
52 | # Hide the chain of thought details from the user in the UI.
53 | hide_cot = false
54 |
55 | # Link to your github repo. This will add a github button in the UI's header.
56 | # github = ""
57 |
58 | # Specify a CSS file that can be used to customize the user interface.
59 | # The CSS file can be served from the public directory or via an external link.
60 | # custom_css = "/public/test.css"
61 |
62 | # Override default MUI light theme. (Check theme.ts)
63 | [UI.theme.light]
64 | #background = "#FAFAFA"
65 | #paper = "#FFFFFF"
66 |
67 | [UI.theme.light.primary]
68 | #main = "#F80061"
69 | #dark = "#980039"
70 | #light = "#FFE7EB"
71 |
72 | # Override default MUI dark theme. (Check theme.ts)
73 | [UI.theme.dark]
74 | #background = "#FAFAFA"
75 | #paper = "#FFFFFF"
76 |
77 | [UI.theme.dark.primary]
78 | #main = "#F80061"
79 | #dark = "#980039"
80 | #light = "#FFE7EB"
81 |
82 |
83 | [meta]
84 | generated_by = "1.0.101"
85 |
--------------------------------------------------------------------------------
/5.Intent Detection Agent/.chainlit/translations/en-US.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Settings",
8 | "settingsKey": "S",
9 | "APIKeys": "API Keys",
10 | "logout": "Logout"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "New Chat"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Task List",
22 | "loading": "Loading...",
23 | "error": "An error occured"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Cancel upload",
28 | "removeAttachment": "Remove attachment"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Create new chat?",
32 | "clearChat": "This will clear the current messages and start a new chat.",
33 | "cancel": "Cancel",
34 | "confirm": "Confirm"
35 | },
36 | "settingsModal": {
37 | "expandMessages": "Expand Messages",
38 | "hideChainOfThought": "Hide Chain of Thought",
39 | "darkMode": "Dark Mode"
40 | }
41 | },
42 | "organisms": {
43 | "chat": {
44 | "history": {
45 | "index": {
46 | "lastInputs": "Last Inputs",
47 | "noInputs": "Such empty...",
48 | "loading": "Loading..."
49 | }
50 | },
51 | "inputBox": {
52 | "input": {
53 | "placeholder": "Type your message here..."
54 | },
55 | "speechButton": {
56 | "start": "Start recording",
57 | "stop": "Stop recording"
58 | },
59 | "SubmitButton": {
60 | "sendMessage": "Send message",
61 | "stopTask": "Stop Task"
62 | },
63 | "UploadButton": {
64 | "attachFiles": "Attach files"
65 | },
66 | "waterMark": {
67 | "text": "Built with"
68 | }
69 | },
70 | "Messages": {
71 | "index": {
72 | "running": "Running",
73 | "executedSuccessfully": "executed successfully",
74 | "failed": "failed",
75 | "feedbackUpdated": "Feedback updated",
76 | "updating": "Updating"
77 | }
78 | },
79 | "dropScreen": {
80 | "dropYourFilesHere": "Drop your files here"
81 | },
82 | "index": {
83 | "failedToUpload": "Failed to upload",
84 | "cancelledUploadOf": "Cancelled upload of",
85 | "couldNotReachServer": "Could not reach the server",
86 | "continuingChat": "Continuing previous chat"
87 | },
88 | "settings": {
89 | "settingsPanel": "Settings panel",
90 | "reset": "Reset",
91 | "cancel": "Cancel",
92 | "confirm": "Confirm"
93 | }
94 | },
95 | "threadHistory": {
96 | "sidebar": {
97 | "filters": {
98 | "FeedbackSelect": {
99 | "feedbackAll": "Feedback: All",
100 | "feedbackPositive": "Feedback: Positive",
101 | "feedbackNegative": "Feedback: Negative"
102 | },
103 | "SearchBar": {
104 | "search": "Search"
105 | }
106 | },
107 | "DeleteThreadButton": {
108 | "confirmMessage": "This will delete the thread as well as it's messages and elements.",
109 | "cancel": "Cancel",
110 | "confirm": "Confirm",
111 | "deletingChat": "Deleting chat",
112 | "chatDeleted": "Chat deleted"
113 | },
114 | "index": {
115 | "pastChats": "Past Chats"
116 | },
117 | "ThreadList": {
118 | "empty": "Empty..."
119 | },
120 | "TriggerButton": {
121 | "closeSidebar": "Close sidebar",
122 | "openSidebar": "Open sidebar"
123 | }
124 | },
125 | "Thread": {
126 | "backToChat": "Go back to chat",
127 | "chatCreatedOn": "This chat was created on"
128 | }
129 | },
130 | "header": {
131 | "chat": "Chat",
132 | "readme": "Readme"
133 | }
134 | }
135 | },
136 | "hooks": {
137 | "useLLMProviders": {
138 | "failedToFetchProviders": "Failed to fetch providers:"
139 | }
140 | },
141 | "pages": {
142 | "Design": {},
143 | "Env": {
144 | "savedSuccessfully": "Saved successfully",
145 | "requiredApiKeys": "Required API Keys",
146 | "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
147 | },
148 | "Page": {
149 | "notPartOfProject": "You are not part of this project."
150 | },
151 | "ResumeButton": {
152 | "resumeChat": "Resume Chat"
153 | }
154 | }
155 | }
--------------------------------------------------------------------------------
/5.Intent Detection Agent/.chainlit/translations/pt-BR.json:
--------------------------------------------------------------------------------
1 | {
2 | "components": {
3 | "atoms": {
4 | "buttons": {
5 | "userButton": {
6 | "menu": {
7 | "settings": "Configura\u00e7\u00f5es",
8 | "settingsKey": "S",
9 | "APIKeys": "Chaves de API",
10 | "logout": "Sair"
11 | }
12 | }
13 | }
14 | },
15 | "molecules": {
16 | "newChatButton": {
17 | "newChat": "Nova Conversa"
18 | },
19 | "tasklist": {
20 | "TaskList": {
21 | "title": "\ud83d\uddd2\ufe0f Lista de Tarefas",
22 | "loading": "Carregando...",
23 | "error": "Ocorreu um erro"
24 | }
25 | },
26 | "attachments": {
27 | "cancelUpload": "Cancelar envio",
28 | "removeAttachment": "Remover anexo"
29 | },
30 | "newChatDialog": {
31 | "createNewChat": "Criar novo chat?",
32 | "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.",
33 | "cancel": "Cancelar",
34 | "confirm": "Confirmar"
35 | },
36 | "settingsModal": {
37 | "expandMessages": "Expandir Mensagens",
38 | "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento",
39 | "darkMode": "Modo Escuro"
40 | }
41 | },
42 | "organisms": {
43 | "chat": {
44 | "history": {
45 | "index": {
46 | "lastInputs": "\u00daltimas Entradas",
47 | "noInputs": "Vazio...",
48 | "loading": "Carregando..."
49 | }
50 | },
51 | "inputBox": {
52 | "input": {
53 | "placeholder": "Digite sua mensagem aqui..."
54 | },
55 | "speechButton": {
56 | "start": "Iniciar grava\u00e7\u00e3o",
57 | "stop": "Parar grava\u00e7\u00e3o"
58 | },
59 | "SubmitButton": {
60 | "sendMessage": "Enviar mensagem",
61 | "stopTask": "Parar Tarefa"
62 | },
63 | "UploadButton": {
64 | "attachFiles": "Anexar arquivos"
65 | },
66 | "waterMark": {
67 | "text": "Constru\u00eddo com"
68 | }
69 | },
70 | "Messages": {
71 | "index": {
72 | "running": "Executando",
73 | "executedSuccessfully": "executado com sucesso",
74 | "failed": "falhou",
75 | "feedbackUpdated": "Feedback atualizado",
76 | "updating": "Atualizando"
77 | }
78 | },
79 | "dropScreen": {
80 | "dropYourFilesHere": "Solte seus arquivos aqui"
81 | },
82 | "index": {
83 | "failedToUpload": "Falha ao enviar",
84 | "cancelledUploadOf": "Envio cancelado de",
85 | "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor",
86 | "continuingChat": "Continuando o chat anterior"
87 | },
88 | "settings": {
89 | "settingsPanel": "Painel de Configura\u00e7\u00f5es",
90 | "reset": "Redefinir",
91 | "cancel": "Cancelar",
92 | "confirm": "Confirmar"
93 | }
94 | },
95 | "threadHistory": {
96 | "sidebar": {
97 | "filters": {
98 | "FeedbackSelect": {
99 | "feedbackAll": "Feedback: Todos",
100 | "feedbackPositive": "Feedback: Positivo",
101 | "feedbackNegative": "Feedback: Negativo"
102 | },
103 | "SearchBar": {
104 | "search": "Buscar"
105 | }
106 | },
107 | "DeleteThreadButton": {
108 | "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.",
109 | "cancel": "Cancelar",
110 | "confirm": "Confirmar",
111 | "deletingChat": "Deletando conversa",
112 | "chatDeleted": "Conversa deletada"
113 | },
114 | "index": {
115 | "pastChats": "Conversas Anteriores"
116 | },
117 | "ThreadList": {
118 | "empty": "Vazio..."
119 | },
120 | "TriggerButton": {
121 | "closeSidebar": "Fechar barra lateral",
122 | "openSidebar": "Abrir barra lateral"
123 | }
124 | },
125 | "Thread": {
126 | "backToChat": "Voltar para a conversa",
127 | "chatCreatedOn": "Esta conversa foi criada em"
128 | }
129 | },
130 | "header": {
131 | "chat": "Conversa",
132 | "readme": "Leia-me"
133 | }
134 | },
135 | "hooks": {
136 | "useLLMProviders": {
137 | "failedToFetchProviders": "Falha ao buscar provedores:"
138 | }
139 | },
140 | "pages": {
141 | "Design": {},
142 | "Env": {
143 | "savedSuccessfully": "Salvo com sucesso",
144 | "requiredApiKeys": "Chaves de API necess\u00e1rias",
145 | "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo."
146 | },
147 | "Page": {
148 | "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto."
149 | },
150 | "ResumeButton": {
151 | "resumeChat": "Continuar Conversa"
152 | }
153 | }
154 | }
155 | }
--------------------------------------------------------------------------------
/5.Intent Detection Agent/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY="sk-..."
2 | PINECONE_API_KEY=
3 | COHERE_API_KEY=
--------------------------------------------------------------------------------
/5.Intent Detection Agent/README.md:
--------------------------------------------------------------------------------
1 | # Agent for intent detection
2 |
3 | In this module, we introduce an agent designed to understand and interpret user intentions effectively. The primary goal here is to redirect queries to a more compact and cost-efficient language model. By default, we employ gpt3.5-turbo for this task, due to its efficiency and performance. However, the implementation is modular, allowing for easy substitution with your preferred large language model (LLM).
4 |
5 | This instance represents our initial step into integrating agents within our RAG. It is designed as a foundational example, illustrating basic usage and integration strategies. We plan to build upon this groundwork with more advanced features and use cases in future updates.
--------------------------------------------------------------------------------
/5.Intent Detection Agent/chainlit.md:
--------------------------------------------------------------------------------
1 | # Welcome to Chainlit! 🚀🤖
2 |
3 | Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
4 |
5 | ## Useful Links 🔗
6 |
7 | - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
8 | - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
9 |
10 | We can't wait to see what you create with Chainlit! Happy coding! 💻😊
11 |
12 | ## Welcome screen
13 |
14 | To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
15 |
--------------------------------------------------------------------------------
/5.Intent Detection Agent/images/RAGSources.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipearosr/RAG-LlamaIndex/93ccc57400a8d276a95d7f965080d0517514c25f/5.Intent Detection Agent/images/RAGSources.png
--------------------------------------------------------------------------------
/5.Intent Detection Agent/ingest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import asyncio
4 | import argparse
5 |
6 | from dotenv import load_dotenv
7 | from pinecone import Pinecone, PodSpec
8 |
9 | from llama_index.core import SimpleDirectoryReader, download_loader
10 | from llama_index.llms.openai import OpenAI
11 | from llama_index.embeddings.openai import OpenAIEmbedding
12 | from llama_index.core.ingestion import IngestionPipeline
13 | from llama_index.vector_stores.pinecone import PineconeVectorStore
14 | from llama_index.core.extractors import (
15 | TitleExtractor,
16 | # QuestionsAnsweredExtractor,
17 | # SummaryExtractor,
18 | # KeywordExtractor,
19 | )
20 | from llama_index.core.node_parser import SentenceSplitter
21 | from llama_parse import LlamaParse
22 |
23 | load_dotenv()
24 | openai.api_key = os.environ.get("OPENAI_API_KEY")
25 | pinecone_api_key = os.environ.get("PINECONE_API_KEY")
26 | llama_parse_api_key = os.environ.get("LLAMA_PARSE_API_KEY")
27 |
28 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview")
29 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large")
30 |
31 |
32 | def get_pinecone_index(pc, index_name):
33 | pinecone_index = pc.Index(index_name)
34 | return pinecone_index
35 |
36 |
37 | def get_pinecone_vector_store(pinecone_index):
38 | vector_store = PineconeVectorStore(
39 | pinecone_index=pinecone_index,
40 | add_sparse_vector=True,
41 | )
42 | return vector_store
43 |
44 |
45 | def create_pinecone_pod(pc, index_name):
46 | print("Creating pinecone pod")
47 | pc.create_index(
48 | name=index_name,
49 | dimension=3072,
50 | metric="dotproduct",
51 | spec=PodSpec(environment="gcp-starter"),
52 | )
53 |
54 |
55 | def get_documents(input_dir):
56 | llama_parser = LlamaParse(
57 | api_key=llama_parse_api_key, result_type="markdown", verbose=True
58 | )
59 |
60 | UnstructuredReader = download_loader("UnstructuredReader")
61 |
62 | file_extractor = {
63 | ".pdf": llama_parser,
64 | ".html": UnstructuredReader(),
65 | ".txt": UnstructuredReader(),
66 | }
67 | print("Reading directory")
68 | director_reader = SimpleDirectoryReader(
69 | input_dir=input_dir, file_extractor=file_extractor
70 | )
71 | print("Starting document reading")
72 | documents = director_reader.load_data(show_progress=True)
73 | return documents
74 |
75 |
76 | def run_pipeline(documents, vector_store, llm, num_workers):
77 | pipeline = IngestionPipeline(
78 | transformations=[
79 | SentenceSplitter(chunk_size=512, chunk_overlap=126),
80 | TitleExtractor(llm=llm, num_workers=num_workers),
81 | # QuestionsAnsweredExtractor(questions=3, llm=llm, num_workers=num_workers),
82 | # SummaryExtractor(
83 | # summaries=["prev", "self"], llm=llm, num_workers=num_workers
84 | # ),
85 | # KeywordExtractor(keywords=5, llm=llm, num_workers=num_workers),
86 | OpenAIEmbedding(model=EMBEDDING),
87 | ],
88 | vector_store=vector_store,
89 | )
90 | for doc in documents: # Small patch to remove last_accessed_date from metadata
91 | k = vars(doc)
92 | del k["metadata"]["last_accessed_date"]
93 | pipeline.run(documents=documents, show_progress=True, num_workers=num_workers)
94 |
95 |
96 | async def main():
97 | print("Starting ingestion")
98 | input_dir = "./data/source_files/"
99 | index_name = "rag-index"
100 | num_cores = os.cpu_count()
101 | num_workers = min(4, num_cores)
102 | pc = Pinecone(api_key=pinecone_api_key)
103 | parser = argparse.ArgumentParser(description="Process some integers.")
104 | parser.add_argument(
105 | "--gen",
106 | action="store_true",
107 | help="Generate new pinecone index",
108 | )
109 | args = parser.parse_args()
110 | if args.gen:
111 | create_pinecone_pod(pc, index_name)
112 | llm = OpenAI(temperature=0.1, model=MODEL, max_tokens=1024)
113 | pinecone_index = get_pinecone_index(pc, index_name)
114 | vector_store = get_pinecone_vector_store(pinecone_index)
115 | documents = get_documents(input_dir)
116 | print("Starting ingestion pipeline")
117 | run_pipeline(documents, vector_store, llm, num_workers)
118 |
119 |
120 | if __name__ == "__main__":
121 | asyncio.run(main())
122 |
--------------------------------------------------------------------------------
/5.Intent Detection Agent/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import chainlit as cl
4 |
5 | from pinecone import Pinecone
6 | from llama_index.core import Settings, VectorStoreIndex
7 | from llama_index.llms.openai import OpenAI
8 | from llama_index.embeddings.openai import OpenAIEmbedding
9 | from llama_index.vector_stores.pinecone import PineconeVectorStore
10 | from llama_index.core.response_synthesizers import ResponseMode
11 | from llama_index.postprocessor.cohere_rerank import CohereRerank
12 | from llama_index.core.indices.query.query_transform.base import (
13 | StepDecomposeQueryTransform,
14 | )
15 | from llama_index.core.tools import QueryEngineTool
16 | from llama_index.core.selectors import LLMSingleSelector
17 | from llama_index.core.query_engine import CustomQueryEngine, RouterQueryEngine
18 | from llama_index.core.base.response.schema import StreamingResponse
19 |
20 | openai.api_key = os.environ.get("OPENAI_API_KEY")
21 | cohere_api_key = os.environ.get("COHERE_API_KEY")
22 | pinecone_api_key = os.environ.get("PINECONE_API_KEY")
23 |
24 | MODEL = os.getenv("MODEL", "gpt-4-0125-preview")
25 | EMBEDDING = os.getenv("EMBEDDING", "text-embedding-3-large")
26 |
27 | direct_llm_prompt = (
28 | "Given the user query, respond as best as possible following this guidelines:\n"
29 | "- If the intent of the user is to get information about the abilities of the AI, respond with: "
30 | "The AI is a language model that can answer questions, generate text, summarize documents, and more. \n"
31 | "- If the intent of the user is harmful. Respond with: I cannot help with that. \n"
32 | "- If the intent of the user is to get information outside of the context given, respond with: "
33 | "I cannot help with that. Please ask something that is relevant with the documents in the context givem. \n"
34 | "Query: {query}"
35 | )
36 |
37 |
38 | class LlmQueryEngine(CustomQueryEngine):
39 | """Custom query engine for direct calls to the LLM model."""
40 |
41 | llm: OpenAI
42 | prompt: str
43 |
44 | def custom_query(self, query_str: str):
45 | llm_prompt = self.prompt.format(query=query_str)
46 | llm_response = self.llm.complete(llm_prompt, formatted=False)
47 |
48 | def response_gen(llm_response):
49 | for response_tuple in llm_response:
50 | if response_tuple[0] == "text":
51 | text_response = response_tuple[1].replace("AI: ", "").strip()
52 | yield text_response
53 | continue
54 |
55 | return StreamingResponse(response_gen=response_gen(llm_response))
56 |
57 |
58 | @cl.cache
59 | def load_context():
60 | Settings.llm = OpenAI(temperature=0.1, model=MODEL, streaming=True)
61 | Settings.embed_model = OpenAIEmbedding(model=EMBEDDING, embed_batch_size=1)
62 | Settings.num_output = 1024
63 | Settings.context_window = 128000
64 | pc = Pinecone(api_key=pinecone_api_key)
65 | pinecone_index = pc.Index("rag-index")
66 | vector_store = PineconeVectorStore(
67 | pinecone_index=pinecone_index,
68 | )
69 |
70 | index = VectorStoreIndex.from_vector_store(
71 | vector_store=vector_store,
72 | )
73 | return index
74 |
75 |
76 | @cl.step
77 | async def router_query_engine():
78 | vector_query_engine = cl.user_session.get("vector_query_engine")
79 | llm_query_engine = cl.user_session.get("simple_query_engine")
80 |
81 | list_tool = QueryEngineTool.from_defaults(
82 | query_engine=llm_query_engine,
83 | name="LLM Query Engine",
84 | description=(
85 | "Useful for when the INTENT of the user isnt clear, is broad, "
86 | "or when the user is asking general questions that have nothing "
87 | "to do with SURA insurance. Use this tool when the other tool is not useful."
88 | ),
89 | )
90 |
91 | vector_tool = QueryEngineTool.from_defaults(
92 | query_engine=vector_query_engine,
93 | name="Vector Query Engine",
94 | description=(
95 | "Useful for retrieving specific context about Paul Graham or anything related "
96 | "to startup incubation, essay writing, programming languages, venture funding, "
97 | "Y Combinator, Lisp programming, or anything related to the field of technology "
98 | "entrepreneurship and innovation."
99 | ),
100 | )
101 | query_engine = RouterQueryEngine(
102 | selector=LLMSingleSelector.from_defaults(),
103 | query_engine_tools=[
104 | list_tool,
105 | vector_tool,
106 | ],
107 | )
108 | print("Router query engine created.")
109 | print(query_engine)
110 | return query_engine
111 |
112 |
113 | @cl.on_chat_start
114 | async def start():
115 | index = load_context()
116 |
117 | reranker = CohereRerank(api_key=cohere_api_key, top_n=3)
118 | step_decompose_transform = StepDecomposeQueryTransform(llm=MODEL, verbose=True)
119 |
120 | vector_query_engine = index.as_query_engine(
121 | streaming=True,
122 | similarity_top_k=6,
123 | node_postprocessors=[reranker],
124 | vector_store_query_mode="hybrid",
125 | query_transform=step_decompose_transform,
126 | response_synthesizer_mode=ResponseMode.REFINE,
127 | )
128 |
129 | simple_query_engine = LlmQueryEngine(
130 | llm=OpenAI(model="gpt-3.5-turbo"), prompt=direct_llm_prompt
131 | )
132 |
133 | cl.user_session.set("simple_query_engine", simple_query_engine)
134 | cl.user_session.set("vector_query_engine", vector_query_engine)
135 |
136 | message_history = []
137 | cl.user_session.set("message_history", message_history)
138 |
139 | await cl.Message(
140 | author="Assistant", content="Hello! Im an AI assistant. How may I help you?"
141 | ).send()
142 |
143 |
144 | async def set_sources(response, response_message):
145 | label_list = []
146 | count = 1
147 | for sr in response.source_nodes:
148 | elements = [
149 | cl.Text(
150 | name="S" + str(count),
151 | content=f"{sr.node.text}",
152 | display="side",
153 | size="small",
154 | )
155 | ]
156 | response_message.elements = elements
157 | label_list.append("S" + str(count))
158 | await response_message.update()
159 | count += 1
160 | response_message.content += "\n\nSources: " + ", ".join(label_list)
161 | await response_message.update()
162 |
163 |
164 | @cl.on_message
165 | async def main(message: cl.Message):
166 | query_engine = await router_query_engine()
167 | message_history = cl.user_session.get("message_history")
168 | prompt_template = "Previous messages:\n"
169 |
170 | response_message = cl.Message(content="", author="Assistant")
171 |
172 | user_message = message.content
173 |
174 | for message in message_history:
175 | prompt_template += f"{message['author']}: {message['content']}\n"
176 | prompt_template += f"Human: {user_message}"
177 |
178 | response = await cl.make_async(query_engine.query)(prompt_template)
179 |
180 | for token in response.response_gen:
181 | await response_message.stream_token(token)
182 | if response.response_txt:
183 | response_message.content = response.response_txt
184 | await response_message.send()
185 |
186 | message_history.append({"author": "Human", "content": user_message})
187 | message_history.append({"author": "AI", "content": response_message.content})
188 | message_history = message_history[-6:]
189 | cl.user_session.set("message_history", message_history)
190 |
191 | if response.source_nodes:
192 | await set_sources(response, response_message)
193 |
--------------------------------------------------------------------------------
/5.Intent Detection Agent/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | cohere
3 | chainlit
4 | llama-index
5 | pinecone-client
6 |
7 | llama-index-vector-stores-pinecone
8 | torch
9 | pypdf
10 | llmsherpa
11 | llama-hub
12 | transformers
13 | llama-index-postprocessor-cohere-rerank
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2012-2024 Scott Chacon and others
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RAG workflow. From basic to advanced.
2 |
3 | This project focuses on enhancing the GPT Documents chatbot by introducing several innovative features across different stages of development, aimed at improving user interaction, search accuracy, and response quality.
4 |
5 | 
6 |
7 | ## Project Overview:
8 |
9 | 1. **ChatBot with Streaming, Memory, and Sources**: The initial version introduces streaming for real-time response delivery, memory for contextual conversations, and source indication for transparency. Technologies like Llama-index and Chainlit are utilized to facilitate a more intuitive and informative chatbot experience.
10 |
11 | 2. **Vector DB Integration, Hybrid Retriever, and Advanced Ingestion**: Subsequent updates include Pinecone integration for efficient vector data handling, a hybrid retriever combining dense and sparse vector methods for improved search relevance, and advanced ingestion techniques for better document retrieval and processing.
12 |
13 | 3. **Reranker, Query Transformations, and Response Synthesis**: Further enhancements incorporate the Cohere reranker for semantic document reordering, multi-step query transformations for detailed query processing, and response synthesis methods for generating more accurate and comprehensive answers.
14 |
15 | 4. **Evaluation - Generation - Optimization:** This stage involves the systematic generation and evaluation of the RAG in the following metrics; correctness, relevancy, faithfulness and context similarity.
16 |
17 | 5. **Intent Detection Agent:** Integration of an agent for effective user intent detection, streamlining the query process and enabling more efficient and precise information retrieval by redirecting queries to a more compact and cost-efficient language model.
18 |
19 | ## Key Features and Improvements:
20 |
21 | - **Real-time Interaction**: Implements streaming to deliver answers swiftly, enhancing user experience.
22 |
23 | - **Conversational Memory**: Employs memory capabilities to provide context-aware responses based on previous interactions.
24 |
25 | - **Source Transparency**: Indicates the origin of the chatbot's responses, building user trust.
26 |
27 | - **Efficient Data Handling**: Utilizes Pinecone for optimized vector data management, enabling faster and more relevant search results.
28 |
29 | - **Enhanced Search Accuracy**: Introduces a hybrid retriever that merges dense and sparse search methodologies, offering more precise results.
30 |
31 | - **Improved Document Processing**: Incorporates advanced ingestion techniques for various document types, enhancing the chatbot's understanding and retrieval capabilities.
32 |
33 | - **Semantic Reranking**: Integrates a reranker to adjust search results based on semantic relevance, ensuring responses align more closely with user queries.
34 |
35 | - **Advanced Query Processing**: Applies multi-step query transformations to break down complex inquiries into manageable parts, ensuring thorough exploration of user intents.
36 |
37 | - **Dynamic Response Generation**: Adopts multiple response synthesis methods, tailoring the chatbot's replies to user needs and ensuring comprehensive and detailed answers.
38 |
39 | This project represents a comprehensive approach to developing a sophisticated chatbot capable of real-time interaction, contextual understanding, and accurate information retrieval, all while maintaining transparency and user trust.
40 |
41 |
42 | ## Roadmap
43 |
44 | The order might change, and points might be added.
45 |
46 | - [x] Chat Streaming
47 | - [X] Memory
48 | - [x] Sources
49 | - [x] Pinecone Pod
50 | - [ ] Pinecone Serverless
51 | - [x] Implementing HybridSearch Retriever
52 | - [x] Implementing better ingestion
53 | - [x] Add evaluation
54 | - [x] Create set of documents and questions for evaluation
55 | - [ ] Trying out agents
56 | - [ ] Prompting
57 | - [x] Trying out Query Transformations
58 | - [ ] Implementing a llm router
59 | - [ ] Trying out GPT as a reranker and comparing it with others
60 | - [ ] Adding Mistral and Llama examples
61 | - [ ] Adding jupyter files to each subproject.
62 | - [x] Intent Detection, using 3.5T for some easy tasks.
63 |
--------------------------------------------------------------------------------
/ruff.toml:
--------------------------------------------------------------------------------
1 | # Exclude a variety of commonly ignored directories.
2 | exclude = [
3 | ".bzr",
4 | ".direnv",
5 | ".eggs",
6 | ".git",
7 | ".git-rewrite",
8 | ".hg",
9 | ".ipynb_checkpoints",
10 | ".mypy_cache",
11 | ".nox",
12 | ".pants.d",
13 | ".pyenv",
14 | ".pytest_cache",
15 | ".pytype",
16 | ".ruff_cache",
17 | ".svn",
18 | ".tox",
19 | ".venv",
20 | ".vscode",
21 | "__pypackages__",
22 | "_build",
23 | "buck-out",
24 | "build",
25 | "dist",
26 | "node_modules",
27 | "site-packages",
28 | "venv",
29 | ]
30 |
31 | # Same as Black.
32 | line-length = 88
33 | indent-width = 4
34 |
35 | # Assume Python 3.11
36 | target-version = "py311"
37 |
38 | [lint]
39 | # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
40 | # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
41 | # McCabe complexity (`C901`) by default.
42 | select = ["E4", "E7", "E9", "F"]
43 | ignore = []
44 |
45 | # Allow fix for all enabled rules (when `--fix`) is provided.
46 | fixable = ["ALL"]
47 | unfixable = []
48 |
49 | # Allow unused variables when underscore-prefixed.
50 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
51 |
52 | [format]
53 | # Like Black, use double quotes for strings.
54 | quote-style = "double"
55 |
56 | # Like Black, indent with spaces, rather than tabs.
57 | indent-style = "space"
58 |
59 | # Like Black, respect magic trailing commas.
60 | skip-magic-trailing-comma = false
61 |
62 | # Like Black, automatically detect the appropriate line ending.
63 | line-ending = "auto"
64 |
65 | # Enable auto-formatting of code examples in docstrings. Markdown,
66 | # reStructuredText code/literal blocks and doctests are all supported.
67 | #
68 | # This is currently disabled by default, but it is planned for this
69 | # to be opt-out in the future.
70 | docstring-code-format = false
71 |
72 | # Set the line length limit used when formatting code snippets in
73 | # docstrings.
74 | #
75 | # This only has an effect when the `docstring-code-format` setting is
76 | # enabled.
77 | docstring-code-line-length = "dynamic"
--------------------------------------------------------------------------------