├── .gitignore ├── LICENSE ├── flow └── video_news2linkedin_conent.json ├── main.py ├── readme.md ├── requirements.txt ├── services ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── audio_transcription_service.cpython-311.pyc │ ├── audio_transcription_service.cpython-312.pyc │ ├── video_transcription_service.cpython-311.pyc │ └── video_transcription_service.cpython-312.pyc ├── audio_transcription_service.py └── video_transcription_service.py ├── transcribe_api.py └── utils ├── __init__.py └── measure_time.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .env 3 | .DS_Store 4 | data 5 | transcriptions -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Kameshwara Pavan Kumar Mantha 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /flow/video_news2linkedin_conent.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "video_news2linkedin_conent", 3 | "nodes": [ 4 | { 5 | "parameters": { 6 | "options": {} 7 | }, 8 | "type": "@n8n/n8n-nodes-langchain.chatTrigger", 9 | "typeVersion": 1.1, 10 | "position": [ 11 | -1220, 12 | 160 13 | ], 14 | "id": "e0a68968-6e2f-46df-b8fd-86d0f43b20fb", 15 | "name": "When chat message received", 16 | "webhookId": "030aed5b-557d-46e3-8919-151eff58fca1" 17 | }, 18 | { 19 | "parameters": { 20 | "fileSelector": "/Users/pavanmantha/Pavans/PracticeExamples/DataScience_Practice/Advanced-AI-Apps/Conversational_Media_Platform/transcriptions/*.txt", 21 | "options": {} 22 | }, 23 | "type": "n8n-nodes-base.readWriteFile", 24 | "typeVersion": 1, 25 | "position": [ 26 | 180, 27 | 0 28 | ], 29 | "id": "ad40d4f6-86c3-4cd9-afc1-740351273963", 30 | "name": "Read/Write Files from Disk" 31 | }, 32 | { 33 | "parameters": { 34 | "operation": "text", 35 | "options": {} 36 | }, 37 | "type": "n8n-nodes-base.extractFromFile", 38 | "typeVersion": 1, 39 | "position": [ 40 | 500, 41 | 0 42 | ], 43 | "id": "70f5f1bf-de73-4574-a14e-1cb83c43f9a2", 44 | "name": "Extract from File" 45 | }, 46 | { 47 | "parameters": { 48 | "person": "orAUxxNmwM", 49 | "text": "={{ $json.output }}", 50 | "additionalFields": {} 51 | }, 52 | "id": "6bde4c06-abe6-4524-bca7-219de8747bd5", 53 | "name": "LinkedIn", 54 | "type": "n8n-nodes-base.linkedIn", 55 | "typeVersion": 1, 56 | "position": [ 57 | 1240, 58 | 0 59 | ], 60 | "credentials": { 61 | "linkedInOAuth2Api": { 62 | "id": "aO3Pf28neDNmz3zc", 63 | "name": "LinkedIn account" 64 | } 65 | } 66 | }, 67 | { 68 | "parameters": { 69 | "promptType": "define", 70 | "text": "=Create the summary in the below format from the given context. the summary should be brief and crisp and easy to consume. This summary should look professional as its going to publish in linkedin. Always emphasise on AI aspects of the context.\n-------------------\ncontext:\n{{ $json.data }}\n------------------\nAssistant:\nSummary: \"The actual summary in brief\"\n\"Important bullet points\".\n------------------\nAs final thoughts, place make the content beautiful with nice emojis.", 71 | "options": {} 72 | }, 73 | "type": "@n8n/n8n-nodes-langchain.agent", 74 | "typeVersion": 1.7, 75 | "position": [ 76 | 780, 77 | 0 78 | ], 79 | "id": "3effb4ae-e688-44a8-bb6c-44877b8dabb0", 80 | "name": "AI Agent2" 81 | }, 82 | { 83 | "parameters": { 84 | "model": "gpt-4o-2024-11-20", 85 | "options": {} 86 | }, 87 | "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi", 88 | "typeVersion": 1, 89 | "position": [ 90 | 780, 91 | 200 92 | ], 93 | "id": "aaedf6d3-3e2e-4570-901f-704854acd0d8", 94 | "name": "OpenAI Chat Model", 95 | "credentials": { 96 | "openAiApi": { 97 | "id": "P1FfoID4UTiPdjMD", 98 | "name": "OpenAi account" 99 | } 100 | } 101 | }, 102 | { 103 | "parameters": { 104 | "mode": "insert", 105 | "qdrantCollection": { 106 | "__rl": true, 107 | "value": "media_content", 108 | "mode": "list", 109 | "cachedResultName": "media_content" 110 | }, 111 | "options": {} 112 | }, 113 | "type": "@n8n/n8n-nodes-langchain.vectorStoreQdrant", 114 | "typeVersion": 1, 115 | "position": [ 116 | 820, 117 | -560 118 | ], 119 | "id": "d14f14cb-2c65-426f-94cb-7430229cc21d", 120 | "name": "Qdrant Vector Store", 121 | "credentials": { 122 | "qdrantApi": { 123 | "id": "jbqGna16O2L9iR8V", 124 | "name": "QdrantApi account" 125 | } 126 | } 127 | }, 128 | { 129 | "parameters": { 130 | "model": "nomic-embed-text:latest" 131 | }, 132 | "type": "@n8n/n8n-nodes-langchain.embeddingsOllama", 133 | "typeVersion": 1, 134 | "position": [ 135 | 860, 136 | -340 137 | ], 138 | "id": "58c1a112-a321-4483-a906-c5fcebfa00b4", 139 | "name": "Embeddings Ollama", 140 | "credentials": { 141 | "ollamaApi": { 142 | "id": "3fAFU0fFchwovvbD", 143 | "name": "Ollama account" 144 | } 145 | } 146 | }, 147 | { 148 | "parameters": { 149 | "options": {} 150 | }, 151 | "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader", 152 | "typeVersion": 1, 153 | "position": [ 154 | 1020, 155 | -360 156 | ], 157 | "id": "f253cb7b-0fb9-445b-9bac-364a7513bdd2", 158 | "name": "Default Data Loader" 159 | }, 160 | { 161 | "parameters": { 162 | "chunkSize": 512, 163 | "chunkOverlap": 20, 164 | "options": {} 165 | }, 166 | "type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter", 167 | "typeVersion": 1, 168 | "position": [ 169 | 1120, 170 | -220 171 | ], 172 | "id": "53bcb684-ba7d-4516-aff1-1df91f35fac2", 173 | "name": "Recursive Character Text Splitter" 174 | }, 175 | { 176 | "parameters": { 177 | "content": "", 178 | "height": 300, 179 | "width": 820 180 | }, 181 | "type": "n8n-nodes-base.stickyNote", 182 | "typeVersion": 1, 183 | "position": [ 184 | -1300, 185 | 40 186 | ], 187 | "id": "e584c5f4-d720-4f07-9cdd-cc560b0e4714", 188 | "name": "Sticky Note" 189 | }, 190 | { 191 | "parameters": { 192 | "content": "read transcribed file from disk", 193 | "height": 100, 194 | "width": 200, 195 | "color": 3 196 | }, 197 | "type": "n8n-nodes-base.stickyNote", 198 | "typeVersion": 1, 199 | "position": [ 200 | 80, 201 | -120 202 | ], 203 | "id": "8281e818-c023-437e-88ee-3cde922dd031", 204 | "name": "Sticky Note2" 205 | }, 206 | { 207 | "parameters": { 208 | "content": "read transcribed file content from disk", 209 | "height": 100, 210 | "width": 200, 211 | "color": 3 212 | }, 213 | "type": "n8n-nodes-base.stickyNote", 214 | "typeVersion": 1, 215 | "position": [ 216 | 420, 217 | -120 218 | ], 219 | "id": "da055751-77cd-4c4b-87f4-6c72181767f5", 220 | "name": "Sticky Note3" 221 | }, 222 | { 223 | "parameters": { 224 | "content": "pass content to agent and augment with a desired prompt as per the need\n", 225 | "height": 100, 226 | "width": 200, 227 | "color": 3 228 | }, 229 | "type": "n8n-nodes-base.stickyNote", 230 | "typeVersion": 1, 231 | "position": [ 232 | 940, 233 | 200 234 | ], 235 | "id": "e0c64be8-adb1-4f10-8861-44ad0e306bf3", 236 | "name": "Sticky Note4" 237 | }, 238 | { 239 | "parameters": { 240 | "content": "finally publish the content to linked as feed.", 241 | "height": 100, 242 | "width": 200, 243 | "color": 3 244 | }, 245 | "type": "n8n-nodes-base.stickyNote", 246 | "typeVersion": 1, 247 | "position": [ 248 | 1260, 249 | 160 250 | ], 251 | "id": "9193d603-6e3f-4c6f-911d-7cdda76cf4d7", 252 | "name": "Sticky Note5" 253 | }, 254 | { 255 | "parameters": { 256 | "content": "initialise the qdrant vector store", 257 | "height": 100, 258 | "width": 200, 259 | "color": 3 260 | }, 261 | "type": "n8n-nodes-base.stickyNote", 262 | "typeVersion": 1, 263 | "position": [ 264 | 840, 265 | -680 266 | ], 267 | "id": "f39dc008-7e14-416e-8a78-0d4c74da18a4", 268 | "name": "Sticky Note6" 269 | }, 270 | { 271 | "parameters": { 272 | "content": "pickup the transcribed text from the previous step", 273 | "height": 100, 274 | "width": 200, 275 | "color": 3 276 | }, 277 | "type": "n8n-nodes-base.stickyNote", 278 | "typeVersion": 1, 279 | "position": [ 280 | 1300, 281 | -380 282 | ], 283 | "id": "d88df249-1500-4947-ba68-46e8d375cc9d", 284 | "name": "Sticky Note7" 285 | }, 286 | { 287 | "parameters": { 288 | "content": "use the recursive chunk strategy", 289 | "height": 100, 290 | "width": 200, 291 | "color": 3 292 | }, 293 | "type": "n8n-nodes-base.stickyNote", 294 | "typeVersion": 1, 295 | "position": [ 296 | 1240, 297 | -240 298 | ], 299 | "id": "9091a90f-789d-4f07-ba9a-7f1b9e21d7b3", 300 | "name": "Sticky Note8" 301 | }, 302 | { 303 | "parameters": { 304 | "content": "embedding model provider", 305 | "height": 80, 306 | "width": 200, 307 | "color": 3 308 | }, 309 | "type": "n8n-nodes-base.stickyNote", 310 | "typeVersion": 1, 311 | "position": [ 312 | 820, 313 | -220 314 | ], 315 | "id": "95a17cbb-f124-4f6b-8462-94616e71be22", 316 | "name": "Sticky Note9" 317 | }, 318 | { 319 | "parameters": { 320 | "content": "trigger the flow manually if the transciption already exist", 321 | "height": 100, 322 | "width": 200, 323 | "color": 3 324 | }, 325 | "type": "n8n-nodes-base.stickyNote", 326 | "typeVersion": 1, 327 | "position": [ 328 | -260, 329 | 340 330 | ], 331 | "id": "8cd66716-1d9e-4033-8180-8f77e50465f4", 332 | "name": "Sticky Note10" 333 | }, 334 | { 335 | "parameters": { 336 | "content": "execute transscribe service", 337 | "height": 80, 338 | "width": 220, 339 | "color": 3 340 | }, 341 | "type": "n8n-nodes-base.stickyNote", 342 | "typeVersion": 1, 343 | "position": [ 344 | -840, 345 | 60 346 | ], 347 | "id": "f16b4ac3-8d4d-4ec1-9195-c13fadddfb5f", 348 | "name": "Sticky Note11" 349 | }, 350 | { 351 | "parameters": { 352 | "path": "8895e90e-a0d7-42b9-8aed-db2a21fe1e29", 353 | "options": {} 354 | }, 355 | "type": "n8n-nodes-base.webhook", 356 | "typeVersion": 2, 357 | "position": [ 358 | -220, 359 | 0 360 | ], 361 | "id": "42638866-6ac0-45f3-a6d1-d71671627c57", 362 | "name": "Webhook", 363 | "webhookId": "8895e90e-a0d7-42b9-8aed-db2a21fe1e29" 364 | }, 365 | { 366 | "parameters": {}, 367 | "type": "n8n-nodes-base.manualTrigger", 368 | "typeVersion": 1, 369 | "position": [ 370 | -220, 371 | 180 372 | ], 373 | "id": "07f10b12-b111-45fd-81ce-e90c9e960983", 374 | "name": "When clicking ‘Test workflow’" 375 | }, 376 | { 377 | "parameters": { 378 | "content": "triggered by transcribe serve once the transcription is finished", 379 | "height": 100, 380 | "width": 200, 381 | "color": 3 382 | }, 383 | "type": "n8n-nodes-base.stickyNote", 384 | "typeVersion": 1, 385 | "position": [ 386 | -260, 387 | -120 388 | ], 389 | "id": "654d1fa1-01ea-4b65-a897-c3db7da06da5", 390 | "name": "Sticky Note12" 391 | }, 392 | { 393 | "parameters": { 394 | "content": "push the youtube url from chat to transcribe service", 395 | "height": 80, 396 | "width": 200, 397 | "color": 3 398 | }, 399 | "type": "n8n-nodes-base.stickyNote", 400 | "typeVersion": 1, 401 | "position": [ 402 | -1260, 403 | 60 404 | ], 405 | "id": "40f60316-d13b-4d9a-889b-fb6c9b687178", 406 | "name": "Sticky Note13" 407 | }, 408 | { 409 | "parameters": { 410 | "content": "", 411 | "height": 1200, 412 | "width": 1920 413 | }, 414 | "type": "n8n-nodes-base.stickyNote", 415 | "typeVersion": 1, 416 | "position": [ 417 | -360, 418 | -720 419 | ], 420 | "id": "a27aba9a-1312-404e-a13a-3d83e346562d", 421 | "name": "Sticky Note14" 422 | }, 423 | { 424 | "parameters": { 425 | "method": "POST", 426 | "url": "https://91da-2401-4900-1c27-c6dc-a125-614c-9a57-5f4.ngrok-free.app/api/v1/transcribe", 427 | "sendBody": true, 428 | "bodyParameters": { 429 | "parameters": [ 430 | { 431 | "name": "url", 432 | "value": "={{ $json.chatInput }}" 433 | } 434 | ] 435 | }, 436 | "options": { 437 | "allowUnauthorizedCerts": true, 438 | "timeout": 3600000 439 | } 440 | }, 441 | "type": "n8n-nodes-base.httpRequest", 442 | "typeVersion": 4.2, 443 | "position": [ 444 | -780, 445 | 160 446 | ], 447 | "id": "37c99e5b-5afc-448d-b621-e9f8bbcd6604", 448 | "name": "call transcribe service" 449 | } 450 | ], 451 | "pinData": {}, 452 | "connections": { 453 | "When chat message received": { 454 | "main": [ 455 | [ 456 | { 457 | "node": "call transcribe service", 458 | "type": "main", 459 | "index": 0 460 | } 461 | ] 462 | ] 463 | }, 464 | "Read/Write Files from Disk": { 465 | "main": [ 466 | [ 467 | { 468 | "node": "Extract from File", 469 | "type": "main", 470 | "index": 0 471 | } 472 | ] 473 | ] 474 | }, 475 | "Extract from File": { 476 | "main": [ 477 | [ 478 | { 479 | "node": "AI Agent2", 480 | "type": "main", 481 | "index": 0 482 | }, 483 | { 484 | "node": "Qdrant Vector Store", 485 | "type": "main", 486 | "index": 0 487 | } 488 | ] 489 | ] 490 | }, 491 | "OpenAI Chat Model": { 492 | "ai_languageModel": [ 493 | [ 494 | { 495 | "node": "AI Agent2", 496 | "type": "ai_languageModel", 497 | "index": 0 498 | } 499 | ] 500 | ] 501 | }, 502 | "AI Agent2": { 503 | "main": [ 504 | [ 505 | { 506 | "node": "LinkedIn", 507 | "type": "main", 508 | "index": 0 509 | } 510 | ] 511 | ] 512 | }, 513 | "LinkedIn": { 514 | "main": [ 515 | [] 516 | ] 517 | }, 518 | "Embeddings Ollama": { 519 | "ai_embedding": [ 520 | [ 521 | { 522 | "node": "Qdrant Vector Store", 523 | "type": "ai_embedding", 524 | "index": 0 525 | } 526 | ] 527 | ] 528 | }, 529 | "Default Data Loader": { 530 | "ai_document": [ 531 | [ 532 | { 533 | "node": "Qdrant Vector Store", 534 | "type": "ai_document", 535 | "index": 0 536 | } 537 | ] 538 | ] 539 | }, 540 | "Recursive Character Text Splitter": { 541 | "ai_textSplitter": [ 542 | [ 543 | { 544 | "node": "Default Data Loader", 545 | "type": "ai_textSplitter", 546 | "index": 0 547 | } 548 | ] 549 | ] 550 | }, 551 | "Webhook": { 552 | "main": [ 553 | [ 554 | { 555 | "node": "Read/Write Files from Disk", 556 | "type": "main", 557 | "index": 0 558 | } 559 | ] 560 | ] 561 | }, 562 | "When clicking ‘Test workflow’": { 563 | "main": [ 564 | [ 565 | { 566 | "node": "Read/Write Files from Disk", 567 | "type": "main", 568 | "index": 0 569 | } 570 | ] 571 | ] 572 | }, 573 | "call transcribe service": { 574 | "main": [ 575 | [] 576 | ] 577 | } 578 | }, 579 | "active": false, 580 | "settings": { 581 | "executionOrder": "v1" 582 | }, 583 | "versionId": "0d079d58-28fa-4309-9e92-2d930e9ee963", 584 | "meta": { 585 | "instanceId": "e711fbe877d128d86a078d3ddcaeb0c456781dc70945c5f7c313501777f80a45" 586 | }, 587 | "id": "69H1DAH1BBR7UNjn", 588 | "tags": [] 589 | } -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from services.audio_transcription_service import AudioTranscription 4 | from services.video_transcription_service import Video2AudioConverter 5 | from llama_index.core import ( 6 | SimpleDirectoryReader, 7 | VectorStoreIndex, 8 | StorageContext, 9 | Settings, 10 | get_response_synthesizer) 11 | from llama_index.core.query_engine import RetrieverQueryEngine, TransformQueryEngine 12 | from llama_index.core.node_parser import SentenceSplitter 13 | from llama_index.core.schema import TextNode, MetadataMode 14 | from llama_index.vector_stores.qdrant import QdrantVectorStore 15 | from llama_index.embeddings.ollama import OllamaEmbedding 16 | from llama_index.llms.ollama import Ollama 17 | from llama_index.core.retrievers import VectorIndexRetriever 18 | from llama_index.core.indices.query.query_transform import HyDEQueryTransform 19 | from dotenv import load_dotenv, find_dotenv 20 | import qdrant_client 21 | import logging 22 | 23 | _ = load_dotenv(find_dotenv()) 24 | 25 | # A video about Qdrant from one of the devrel. 26 | youtube_url = "https://www.youtube.com/watch?v=9NtsnzRFJ_o&t=17s" 27 | output_path = './data' 28 | Video2AudioConverter().download_youtube_audio(youtube_url, output_path) 29 | 30 | # transcribe the audio into text 31 | is_audio_transcribed = AudioTranscription().transcribe(audio_file_dir='./data', is_log_enabled=True) 32 | 33 | if is_audio_transcribed: 34 | logging.basicConfig(level=logging.INFO) 35 | logger = logging.getLogger(__name__) 36 | 37 | # load the local data directory and chunk the data for further processing 38 | docs = SimpleDirectoryReader(input_dir="transcriptions", required_exts=[".txt"]).load_data(show_progress=True) 39 | text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=100) 40 | 41 | # Create a local Qdrant vector store 42 | logger.info("initializing the vector store related objects") 43 | client = qdrant_client.QdrantClient(url=os.environ['qdrant_url'], port=6333, api_key=os.environ['qdrant_api_key']) 44 | vector_store = QdrantVectorStore(client=client, collection_name="media_content") 45 | 46 | # local vector embeddings model 47 | logger.info("initializing the OllamaEmbedding") 48 | embed_model = OllamaEmbedding(model_name='nomic-embed-text:latest', base_url='http://localhost:11434') 49 | 50 | logger.info("initializing the global settings") 51 | Settings.embed_model = embed_model 52 | Settings.llm = Ollama(model="gemma2:latest", base_url='http://localhost:11434', request_timeout=600) 53 | Settings.transformations = [text_parser] 54 | 55 | text_chunks = [] 56 | doc_ids = [] 57 | nodes = [] 58 | 59 | logger.info("enumerating docs") 60 | for doc_idx, doc in enumerate(docs): 61 | curr_text_chunks = text_parser.split_text(doc.text) 62 | text_chunks.extend(curr_text_chunks) 63 | doc_ids.extend([doc_idx] * len(curr_text_chunks)) 64 | 65 | logger.info("enumerating text_chunks") 66 | for idx, text_chunk in enumerate(text_chunks): 67 | node = TextNode(text=text_chunk) 68 | src_doc = docs[doc_ids[idx]] 69 | node.metadata = src_doc.metadata 70 | nodes.append(node) 71 | 72 | logger.info("enumerating nodes") 73 | for node in nodes: 74 | node_embedding = embed_model.get_text_embedding( 75 | node.get_content(metadata_mode=MetadataMode.ALL) 76 | ) 77 | node.embedding = node_embedding 78 | 79 | logger.info("initializing the storage context") 80 | storage_context = StorageContext.from_defaults(vector_store=vector_store) 81 | logger.info("indexing the nodes in VectorStoreIndex") 82 | index = VectorStoreIndex( 83 | nodes=nodes, 84 | storage_context=storage_context, 85 | transformations=Settings.transformations, 86 | ) 87 | 88 | logger.info("initializing the VectorIndexRetriever with top_k as 5") 89 | vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=5) 90 | response_synthesizer = get_response_synthesizer() 91 | logger.info("creating the RetrieverQueryEngine instance") 92 | vector_query_engine = RetrieverQueryEngine( 93 | retriever=vector_retriever, 94 | response_synthesizer=response_synthesizer, 95 | ) 96 | logger.info("creating the HyDEQueryTransform instance") 97 | hyde = HyDEQueryTransform(include_original=True) 98 | hyde_query_engine = TransformQueryEngine(vector_query_engine, hyde) 99 | 100 | logger.info("retrieving the response to the query") 101 | 102 | # Start a loop to continually get input from the user 103 | while True: 104 | # Get a query from the user 105 | user_query = input("Enter your query [type 'bye' to 'exit']: ") 106 | 107 | # Check if the user wants to terminate the loop 108 | if user_query.lower() == "bye" or user_query.lower() == "exit": 109 | client.close() 110 | break 111 | 112 | response = hyde_query_engine.query(str_or_query_bundle=user_query) 113 | print(response) 114 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | 2 | Note: This repository assume you have all the prerequisite software 3 | - Ollama with gemma2 installed, if not follow 4 | - https://ollama.com/library/gemma2 5 | - host your qdrant cloud by following below 6 | - https://qdrant.tech/cloud/ 7 | 8 | ### instructions to run the code 9 | #### standalone 10 | - git clone git@github.com:pavanjava/Conversational_Media_Platform.git 11 | - pip install -r requirements.txt 12 | - create a file with `.env` and keep two key-value pairs named `qdrant_api_key`, `qdrant_url` 13 | - open `main.py` and change the value of `youtube_url` 14 | - then run `main.py` 15 | #### Expose as API 16 | - install ngrok (mandatory) 17 | - git clone git@github.com:pavanjava/Conversational_Media_Platform.git 18 | - pip install -r requirements.txt 19 | - create a file with `.env` and keep two key-value pairs named `qdrant_api 20 | - run `python transcribe_api.py` (runs on localhost:8000) 21 | - expose this service as public with `ngrok http http://localhost:8000` 22 | ------ 23 | 24 | you can start conversation with your media content in standalone mode 25 | 26 | - Enter your query [type 'bye' to 'exit']: how is search performed on vector embeddings ? 27 | - bot: Each piece of data is represented as a vector, and your query is also converted into a vector representation. Quadrant then calculates how similar the query vector is to every data vector, surfacing the closest matches in the entire dataset. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | llama-index==0.11.18 2 | llama-index-llms-openai==0.2.15 3 | llama-index-llms-ollama==0.3.4 4 | llama-index-embeddings-openai==0.2.5 5 | llama-index-embeddings-ollama==0.3.1 6 | llama-index-embeddings-huggingface==0.3.1 7 | llama-index-embeddings-fastembed==0.2.0 8 | llama-index-vector-stores-qdrant==0.3.2 9 | qdrant-client==1.12.0 10 | python-dotenv==1.0.1 11 | openai==1.52.0 12 | openai-whisper==20240930 13 | pytubefix==8.8.4 14 | moviepy==1.0.3 15 | fastapi==0.115.6 16 | pydantic==2.10.4 17 | uvicorn==0.34.0 -------------------------------------------------------------------------------- /services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavanjava/Conversational_Media_Platform/c47952d30f6114aa2ab443451d3679bc3aeefa9b/services/__init__.py -------------------------------------------------------------------------------- /services/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavanjava/Conversational_Media_Platform/c47952d30f6114aa2ab443451d3679bc3aeefa9b/services/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /services/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavanjava/Conversational_Media_Platform/c47952d30f6114aa2ab443451d3679bc3aeefa9b/services/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /services/__pycache__/audio_transcription_service.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavanjava/Conversational_Media_Platform/c47952d30f6114aa2ab443451d3679bc3aeefa9b/services/__pycache__/audio_transcription_service.cpython-311.pyc -------------------------------------------------------------------------------- /services/__pycache__/audio_transcription_service.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavanjava/Conversational_Media_Platform/c47952d30f6114aa2ab443451d3679bc3aeefa9b/services/__pycache__/audio_transcription_service.cpython-312.pyc -------------------------------------------------------------------------------- /services/__pycache__/video_transcription_service.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavanjava/Conversational_Media_Platform/c47952d30f6114aa2ab443451d3679bc3aeefa9b/services/__pycache__/video_transcription_service.cpython-311.pyc -------------------------------------------------------------------------------- /services/__pycache__/video_transcription_service.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavanjava/Conversational_Media_Platform/c47952d30f6114aa2ab443451d3679bc3aeefa9b/services/__pycache__/video_transcription_service.cpython-312.pyc -------------------------------------------------------------------------------- /services/audio_transcription_service.py: -------------------------------------------------------------------------------- 1 | import whisper 2 | from pathlib import Path 3 | from utils.measure_time import measure_execution_time 4 | import os 5 | import logging 6 | 7 | # Define the directory and file path 8 | dir_path = './transcriptions' 9 | 10 | 11 | class AudioTranscription: 12 | def __init__(self): 13 | logging.basicConfig(level=logging.INFO) 14 | self.logger = logging.getLogger(__name__) 15 | self.logger.setLevel(logging.INFO) # Set logging level 16 | self.logger.info("Loading Whisper model") # Log model loading 17 | self.logger.info(whisper.available_models()) 18 | self.model = whisper.load_model("medium") 19 | 20 | @measure_execution_time 21 | def transcribe(self, audio_file_dir: str = '', is_log_enabled: bool = False) -> bool: 22 | 23 | for file_path in Path(audio_file_dir).rglob('*'): 24 | if file_path.is_file(): 25 | if is_log_enabled: 26 | self.logger.info(f"audio file path: {file_path} ") 27 | try: 28 | result = self.model.transcribe(audio=f'./{file_path}', word_timestamps=True) 29 | if is_log_enabled: 30 | self.logger.info(result) 31 | 32 | # Ensure the directory exists 33 | self.logger.info("creating the directory if it does not exist") 34 | os.makedirs(dir_path, exist_ok=True) 35 | 36 | with open(file=f'{dir_path}/{file_path.name}_transcript.txt', mode='w') as transcription: 37 | transcription.write(result.get('text')) 38 | 39 | if is_log_enabled: 40 | self.logger.info(f"transcription completed successfully for file: {file_path.name} " 41 | f"in directory: {dir_path}") 42 | 43 | except Exception as e: 44 | self.logger.error('Error', e.__cause__) 45 | return False 46 | return True 47 | -------------------------------------------------------------------------------- /services/video_transcription_service.py: -------------------------------------------------------------------------------- 1 | from pytubefix import YouTube 2 | from moviepy.editor import AudioFileClip 3 | from utils.measure_time import measure_execution_time 4 | import os 5 | import logging 6 | 7 | 8 | class Video2AudioConverter: 9 | def __init__(self): 10 | logging.basicConfig(level=logging.INFO) 11 | self.logger = logging.getLogger(__name__) 12 | self.logger.setLevel(logging.INFO) # Set logging level 13 | 14 | @measure_execution_time 15 | def download_youtube_audio(self, youtube_url, output_path): 16 | 17 | self.logger.info(f"Downloading audio from YouTube URL: {youtube_url}") 18 | # temp audio file name 19 | temp_audio_file = 'temp_audio.mp4' 20 | 21 | # Create a YouTube object 22 | yt = YouTube(youtube_url, use_oauth=False, allow_oauth_cache=True) 23 | 24 | # Select the best audio stream 25 | audio_stream = yt.streams.get_audio_only() 26 | 27 | # Download the audio stream to a temporary file 28 | audio_stream.download(filename=temp_audio_file) 29 | 30 | # Get the audio track name for naming the output file 31 | audio_name = "output.mp3" 32 | 33 | # Convert the downloaded file to MP3 34 | self.logger.info(f"Converting audio to MP3 and saving at: {output_path}/{audio_name}") 35 | clip = AudioFileClip(temp_audio_file) 36 | clip.write_audiofile(output_path+f"/{audio_name}") 37 | 38 | # Remove the temporary MP4 file 39 | self.logger.info("Removing temporary MP4 file.") 40 | clip.close() 41 | # Check if the file exists before attempting to delete it 42 | if os.path.exists(temp_audio_file): 43 | os.remove(temp_audio_file) 44 | print(f"{temp_audio_file} has been deleted successfully.") 45 | else: 46 | print(f"{temp_audio_file} does not exist in the current directory or its already deleted") 47 | -------------------------------------------------------------------------------- /transcribe_api.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from fastapi import FastAPI 3 | from pydantic import BaseModel 4 | from fastapi.middleware.cors import CORSMiddleware 5 | from concurrent.futures import ThreadPoolExecutor, as_completed 6 | from utils.measure_time import measure_execution_time 7 | import uvicorn 8 | import requests 9 | 10 | from services.audio_transcription_service import AudioTranscription 11 | from services.video_transcription_service import Video2AudioConverter 12 | 13 | app = FastAPI() 14 | app.add_middleware(CORSMiddleware, allow_origins=['*'], allow_headers=['*'], allow_methods=['*']) 15 | 16 | output_path = './data' 17 | n8n_webhook = "http://localhost:5678/webhook-test/8895e90e-a0d7-42b9-8aed-db2a21fe1e29" 18 | 19 | 20 | class MessagePayload(BaseModel): 21 | url: str 22 | 23 | 24 | def invoke_transcription(url): 25 | # download the audio from the url 26 | Video2AudioConverter().download_youtube_audio(url, output_path) 27 | 28 | # transcribe the audio into text 29 | is_audio_transcribed = AudioTranscription().transcribe(audio_file_dir=output_path, is_log_enabled=True) 30 | 31 | if is_audio_transcribed: 32 | return {'is_audio_transcribed': True} 33 | else: 34 | return {'is_audio_transcribed': False} 35 | 36 | 37 | def invoke_n8n_workflow(): 38 | # invoke n8n workflow using webhook 39 | requests.get(url=n8n_webhook) 40 | 41 | 42 | @app.post("/api/v1/transcribe") 43 | async def invoke_agents(payload: MessagePayload): 44 | 45 | @measure_execution_time 46 | async def process_transcription(): 47 | with ThreadPoolExecutor(max_workers=10) as executor: 48 | futures = [executor.submit(invoke_transcription, url=payload.url)] 49 | for future in as_completed(futures): 50 | result = future.result() 51 | if result["is_audio_transcribed"]: 52 | invoke_n8n_workflow() 53 | 54 | # Schedule the transcription task to run asynchronously 55 | asyncio.create_task(process_transcription()) 56 | 57 | # Respond immediately 58 | return {"is_transcription_started": True} 59 | 60 | 61 | if __name__ == "__main__": 62 | uvicorn.run(app=app, host="127.0.0.1", port=8000) 63 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavanjava/Conversational_Media_Platform/c47952d30f6114aa2ab443451d3679bc3aeefa9b/utils/__init__.py -------------------------------------------------------------------------------- /utils/measure_time.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import wraps 3 | 4 | def measure_execution_time(func): 5 | @wraps(func) 6 | def wrapper(*args, **kwargs): 7 | start_time = time.perf_counter() # Record the start time 8 | result = func(*args, **kwargs) # Call the original function 9 | end_time = time.perf_counter() # Record the end time 10 | execution_time = end_time - start_time # Calculate execution time 11 | print(f"Function '{func.__name__}' executed in {execution_time:.4f} seconds.") 12 | return result 13 | return wrapper --------------------------------------------------------------------------------