├── .gitignore ├── document.pdf ├── tutorial ├── resource │ ├── image-01.png │ └── rag-docker-ts-langchain.jpg ├── article-en.md └── article.md ├── .editorconfig ├── .env.sample ├── tsconfig.json ├── LICENSE ├── docker-compose.yml ├── package.json ├── src ├── ingest.ts ├── google-client.ts ├── search.ts └── chat.ts ├── README-en.md └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | node_modules/ 3 | dist/ -------------------------------------------------------------------------------- /document.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glaucia86/rag-search-ingestion-langchainjs-gemini/HEAD/document.pdf -------------------------------------------------------------------------------- /tutorial/resource/image-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glaucia86/rag-search-ingestion-langchainjs-gemini/HEAD/tutorial/resource/image-01.png -------------------------------------------------------------------------------- /tutorial/resource/rag-docker-ts-langchain.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glaucia86/rag-search-ingestion-langchainjs-gemini/HEAD/tutorial/resource/rag-docker-ts-langchain.jpg -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: https://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | [*] 7 | indent_style = space 8 | indent_size = 2 9 | end_of_line = crlf 10 | charset = utf-8 11 | trim_trailing_whitespace = false 12 | insert_final_newline = false -------------------------------------------------------------------------------- /.env.sample: -------------------------------------------------------------------------------- 1 | # Google AI Studio API Key (OBRIGATÓRIO) 2 | # Obtenha em: https://makersuite.google.com/app/apikey 3 | GOOGLE_API_KEY=sua_google_api_key_aqui 4 | 5 | # Modelos Google (OPCIONAL - usa padrões se não definido) 6 | GOOGLE_EMBEDDING_MODEL=models/embedding-001 7 | GOOGLE_CHAT_MODEL=gemini-2.0-flash 8 | 9 | # Configuração do Banco (PRÓXIMAS ETAPAS) 10 | DATABASE_URL=postgresql://postgres:postgres@localhost:5432/rag 11 | PG_VECTOR_COLLECTION_NAME=pdf_documents 12 | 13 | # Configuração do PDF (PRÓXIMAS ETAPAS) 14 | PDF_PATH=./document.pdf -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "ESNext", 5 | "moduleResolution": "node", 6 | "outDir": "./dist", 7 | "rootDir": "./src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "resolveJsonModule": true, 13 | "allowSyntheticDefaultImports": true, 14 | "experimentalDecorators": true, 15 | "emitDecoratorMetadata": true, 16 | "declaration": true, 17 | "declarationMap": true, 18 | "sourceMap": true, 19 | "types": ["node"], 20 | "lib": ["ES2022", "DOM"] 21 | }, 22 | "include": [ 23 | "src/**/*" 24 | ], 25 | "exclude": [ 26 | "node_modules", 27 | "dist", 28 | "**/*.test.ts", 29 | "**/*.spec.ts" 30 | ], 31 | "ts-node": { 32 | "esm": true 33 | } 34 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Glaucia Lemos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | # Main service: PostgreSQL with pgVector extension 3 | postgres: 4 | image: pgvector/pgvector:pg17 5 | container_name: postgres_rag_ts 6 | environment: 7 | POSTGRES_USER: postgres 8 | POSTGRES_PASSWORD: postgres 9 | POSTGRES_DB: rag 10 | ports: 11 | - "5432:5432" 12 | volumes: 13 | # Data persistence 14 | - postgres_data:/var/lib/postgresql/data 15 | healthcheck: 16 | # Checks if the database is ready 17 | test: ["CMD-SHELL", "pg_isready -U postgres -d rag"] 18 | interval: 10s 19 | timeout: 5s 20 | retries: 5 21 | restart: unless-stopped 22 | 23 | # Auxiliary service: Initializes pgVector extension 24 | bootstrap_vector_ext: 25 | image: pgvector/pgvector:pg17 26 | depends_on: 27 | postgres: 28 | condition: service_healthy 29 | entrypoint: ["/bin/sh", "-c"] 30 | command: > 31 | PGPASSWORD=postgres 32 | psql "postgresql://postgres@postgres:5432/rag" -v ON_ERROR_STOP=1 33 | -c "CREATE EXTENSION IF NOT EXISTS vector;" 34 | restart: "no" 35 | 36 | volumes: 37 | postgres_data: -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rag-search-ingestion-langchainjs-gemini", 3 | "version": "1.0.0", 4 | "description": "A PDF search ingestion RAG application with Docker + LangChain.js + Gemini", 5 | "type": "module", 6 | "main": "index.js", 7 | "scripts": { 8 | "build": "tsc", 9 | "start": "npm run build && node dist/chat.js", 10 | "ingest": "npm run build && node dist/ingest.js", 11 | "dev:chat": "tsx src/chat.ts", 12 | "dev:ingest": "tsx src/ingest.ts" 13 | }, 14 | "repository": { 15 | "type": "git", 16 | "url": "git+https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini.git" 17 | }, 18 | "keywords": [ 19 | "RAG", 20 | "LangChain.js", 21 | "Gemini", 22 | "PDF Search", 23 | "Docker" 24 | ], 25 | "author": "Glaucia Lemos )", 26 | "license": "MIT", 27 | "bugs": { 28 | "url": "https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini/issues" 29 | }, 30 | "homepage": "https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini#readme", 31 | "devDependencies": { 32 | "@types/node": "^24.3.1", 33 | "@types/pdf-parse": "^1.1.5", 34 | "@types/pg": "^8.15.5", 35 | "tsx": "^4.20.5", 36 | "typescript": "^5.9.2" 37 | }, 38 | "dependencies": { 39 | "@google/generative-ai": "^0.24.1", 40 | "@langchain/community": "^0.3.55", 41 | "@langchain/core": "^0.3.75", 42 | "@langchain/textsplitters": "^0.1.0", 43 | "@types/uuid": "^10.0.0", 44 | "dotenv": "^16.6.1", 45 | "pdf-parse": "^1.1.1", 46 | "pg": "^8.16.3", 47 | "uuid": "^13.0.0" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/ingest.ts: -------------------------------------------------------------------------------- 1 | import { config } from 'dotenv'; 2 | import { Document } from '@langchain/core/documents'; 3 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 4 | import { GoogleEmbeddings } from './google-client'; 5 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 6 | import { PDFLoader as LangChainPDFLoader } from '@langchain/community/document_loaders/fs/pdf'; 7 | 8 | config(); 9 | 10 | class PDFLoader { 11 | 12 | constructor(private filePath: string) {} 13 | 14 | async load(): Promise { 15 | try { 16 | console.log(`Reading PDF file: ${this.filePath}`); 17 | 18 | // Use LangChain PDF loader instead of pdf-parse 19 | const langChainLoader = new LangChainPDFLoader(this.filePath); 20 | const documents = await langChainLoader.load(); 21 | 22 | console.log(`PDF loaded successfully! Found ${documents.length} pages`); 23 | 24 | return documents; 25 | } catch (error) { 26 | console.error('Error loading PDF:', error); 27 | throw error; 28 | } 29 | } 30 | 31 | async ingestToVectorStore(): Promise { 32 | try { 33 | console.log('Starting PDF ingestion process...'); 34 | 35 | // Step 1: Load PDF 36 | console.log(`Loading PDF from: ${this.filePath}`); 37 | const rawDocuments = await this.load(); 38 | console.log(`PDF loaded successfully! Found ${rawDocuments.length} sections`); 39 | 40 | // Step 2: Split documents into chunks 41 | console.log('Splitting documents into chunks...'); 42 | const textSplitter = new RecursiveCharacterTextSplitter({ 43 | chunkSize: 400, 44 | chunkOverlap: 0, 45 | separators: ["\n\n", "\n", " ", ""], 46 | }); 47 | 48 | const splitDocuments = await textSplitter.splitDocuments(rawDocuments); 49 | console.log(`Documents split into ${splitDocuments.length} chunks`); 50 | 51 | // Step 3: Initialize embeddings 52 | console.log('Initializing Google embeddings...'); 53 | const embeddings = new GoogleEmbeddings(); 54 | 55 | // Step 4: Initialize vector store 56 | console.log('Connecting to PostgreSQL vector store...'); 57 | const vectorStore = await PGVectorStore.initialize(embeddings, { 58 | postgresConnectionOptions: { 59 | connectionString: process.env.DATABASE_URL, 60 | }, 61 | tableName: process.env.PG_VECTOR_COLLECTION_NAME || 'pdf_documents', 62 | columns: { 63 | idColumnName: 'id', 64 | vectorColumnName: 'vector', 65 | contentColumnName: 'content', 66 | metadataColumnName: 'metadata', 67 | }, 68 | }); 69 | 70 | // Step 5: Add documents to vector store 71 | console.log('Adding documents to vector store...'); 72 | await vectorStore.addDocuments(splitDocuments); 73 | 74 | console.log('PDF ingestion completed successfully!'); 75 | console.log(`Total chunks processed: ${splitDocuments.length}`); 76 | 77 | // Close the connection 78 | await vectorStore.end(); 79 | 80 | } catch (error) { 81 | console.error('Error during PDF ingestion:', error); 82 | process.exit(1); 83 | } 84 | } 85 | } 86 | 87 | // Main execution function 88 | async function main() { 89 | const pdfPath = './document.pdf'; 90 | const loader = new PDFLoader(pdfPath); 91 | await loader.ingestToVectorStore(); 92 | } 93 | 94 | // Run ingestion 95 | main(); -------------------------------------------------------------------------------- /src/google-client.ts: -------------------------------------------------------------------------------- 1 | import { config } from 'dotenv'; 2 | import { GoogleGenerativeAI } from '@google/generative-ai'; 3 | import { Embeddings } from '@langchain/core/embeddings'; 4 | 5 | config(); 6 | 7 | export interface ChatMessage { 8 | role: 'system' | 'user' | 'assistant'; 9 | content: string; 10 | } 11 | 12 | export class GoogleClient { 13 | private googleApiKey: string; 14 | private embeddingModel: string; 15 | private chatModel: string; 16 | private genAI: GoogleGenerativeAI; 17 | 18 | constructor() { 19 | this.googleApiKey = process.env.GOOGLE_API_KEY || ''; 20 | this.embeddingModel = process.env.GOOGLE_EMBEDDING_MODEL || ''; 21 | this.chatModel = process.env.GOOGLE_CHAT_MODEL || ''; 22 | 23 | if (!this.googleApiKey) { 24 | throw new Error('Google API key is not set in environment variables.'); 25 | } 26 | 27 | // Initialize GoogleGenerativeAI instance 28 | this.genAI = new GoogleGenerativeAI(this.googleApiKey); 29 | } 30 | 31 | async getEmbeddings(texts: string[]): Promise { 32 | const embeddings: number[][] = []; 33 | 34 | for(const text of texts) { 35 | try { 36 | const model = this.genAI.getGenerativeModel({ model: 'embedding-001' }); 37 | const result = await model.embedContent(text); 38 | 39 | if (result.embedding && result.embedding.values) { 40 | embeddings.push(result.embedding.values); 41 | } else { 42 | console.log(`No embedding returned for text: ${text}`); 43 | const dummySize = 768; 44 | embeddings.push(new Array(dummySize).fill(0)); 45 | } 46 | } catch (error) { 47 | console.log(`Error generating embedding for text: ${text}`, error); 48 | const dummySize = 768; 49 | embeddings.push(new Array(dummySize).fill(0)); 50 | } 51 | } 52 | 53 | return embeddings; 54 | } 55 | 56 | async chatCompletions(messages: ChatMessage[], temperature: number = 0.1): Promise { 57 | try { 58 | const model = this.genAI.getGenerativeModel({ 59 | model: this.chatModel, 60 | generationConfig: { 61 | temperature, 62 | maxOutputTokens: 1000, 63 | } 64 | }); 65 | 66 | // Convert messages to a single prompt string 67 | let prompt = ''; 68 | 69 | for (const message of messages) { 70 | const { role, content } = message; 71 | 72 | if (role === 'system') { 73 | prompt += `Instructions: ${content}\n\n`; 74 | } else if (role === 'user') { 75 | prompt += `${content}\n`; 76 | } else if (role === 'assistant') { 77 | prompt += `Assistant: ${content}\n`; 78 | } 79 | } 80 | 81 | // Generate response using the model 82 | const result = await model.generateContent(prompt); 83 | const response = result.response; 84 | 85 | return response.text(); 86 | } catch (error) { 87 | console.log(`Error generating chat completion: ${error}`); 88 | return 'Sorry, an error occurred while generating the response.'; 89 | } 90 | } 91 | } 92 | 93 | export class GoogleEmbeddings extends Embeddings { 94 | private client: GoogleClient; 95 | 96 | constructor() { 97 | super({}); 98 | this.client = new GoogleClient(); 99 | } 100 | 101 | async embedDocuments(texts: string[]): Promise { 102 | console.log(`Generating embeddings for ${texts.length} documents...`); 103 | 104 | const batchSize = 10; // Processing 10 texts at a time for a better optimization 105 | const allEmbeddings: number[][] = []; 106 | 107 | for(let i = 0; i < texts.length; i += batchSize) { 108 | const batchTexts = texts.slice(i, i + batchSize); 109 | const batchEmbeddings = await this.client.getEmbeddings(batchTexts); 110 | allEmbeddings.push(...batchEmbeddings); 111 | 112 | console.log(`Lot ${Math.floor(i / batchSize) + 1}: ${batchTexts.length} processed texts`); 113 | } 114 | 115 | return allEmbeddings; 116 | } 117 | 118 | // Method for embedding a single query 119 | async embedQuery(text: string): Promise { 120 | const embeddings = await this.client.getEmbeddings([text]); 121 | return embeddings[0]; 122 | } 123 | } 124 | 125 | // Factory function to create a GoogleClient instances 126 | export function getGoogleClient(): GoogleClient { 127 | return new GoogleClient(); 128 | } -------------------------------------------------------------------------------- /src/search.ts: -------------------------------------------------------------------------------- 1 | import { config } from 'dotenv'; 2 | import { getGoogleClient, GoogleEmbeddings, ChatMessage } from './google-client'; 3 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 4 | 5 | config(); 6 | 7 | const PROMPT_TEMPLATE = ` 8 | CONTEXTO FORNECIDO: 9 | {contexto} 10 | 11 | INSTRUÇÕES CRÍTICAS: 12 | - Responda EXCLUSIVAMENTE com base no CONTEXTO FORNECIDO acima. 13 | - Se a informação não estiver EXPLICITAMENTE no contexto, responda exatamente: 14 | "Não tenho informações necessárias para responder sua pergunta." 15 | - NUNCA use conhecimento externo ou invente informações. 16 | - NUNCA expresse opiniões pessoais ou interpretações além do texto fornecido. 17 | 18 | EXEMPLOS DE RESPOSTAS CORRETAS PARA PERGUNTAS SEM CONTEXTO: 19 | - "Qual é a capital da França?" -> "Nao tenho informações necessárias para responder sua pergunta." 20 | - "Quantos funcionários a empresa tem?" -> "Não tenho informações necessárias para responder sua pergunta." 21 | - "Você recomenda investir nisso?" -> "Não tenho informações necessárias para responder sua pergunta." 22 | 23 | PERGUNTA DO USUÁRIO: 24 | {pergunta} 25 | 26 | RESPOSTA (baseada apenas no contexto fornecido): 27 | `; 28 | 29 | export interface SearchResult { 30 | content: string; 31 | metadata: any; 32 | score: number; 33 | } 34 | 35 | export class RAGSearch { 36 | private databaseUrl: string; 37 | private collectionName: string; 38 | private embeddings: GoogleEmbeddings; 39 | private googleClient: any; 40 | private vectorStore: PGVectorStore | null = null; 41 | 42 | constructor() { 43 | // Load environment variables 44 | this.databaseUrl = process.env.DATABASE_URL || ''; 45 | this.collectionName = process.env.PG_VECTOR_COLLECTION_NAME || 'pdf_documents'; 46 | 47 | // Initialize main components 48 | this.embeddings = new GoogleEmbeddings(); 49 | this.googleClient = getGoogleClient(); 50 | this.vectorStore = null; 51 | 52 | this._initializeVectorStore(); 53 | } 54 | 55 | private async _initializeVectorStore(): Promise { 56 | try { 57 | // Connect to PostgreSQL vector store 58 | this.vectorStore = await PGVectorStore.initialize(this.embeddings, { 59 | postgresConnectionOptions: { 60 | connectionString: this.databaseUrl, 61 | }, 62 | tableName: this.collectionName, 63 | columns: { 64 | idColumnName: 'id', 65 | vectorColumnName: 'vector', 66 | contentColumnName: 'content', 67 | metadataColumnName: 'metadata', 68 | }, 69 | }); 70 | 71 | console.log('RAG System: Connection to vector database established ') 72 | } catch (error) { 73 | console.log(`Error initializing vector database: ${error}`); 74 | throw error; 75 | } 76 | } 77 | 78 | async searchDocuments(query: string, k: number = 10): Promise { 79 | if (!this.vectorStore) { 80 | throw new Error('Vector bank has not been initialized. Run ingestion first.'); 81 | } 82 | 83 | try { 84 | // Busca semântica silenciosa 85 | 86 | // PHASE 1: SIMILARITY SEARCH WITH SCORES 87 | // Use similaritySearchWithScore to get both documents and scores 88 | const results = await this.vectorStore.similaritySearchWithScore(query, k); 89 | 90 | // PHASE 2: FORMAT RESULTS 91 | const formattedResults: SearchResult[] = []; 92 | 93 | for(const [document, score] of results) { 94 | formattedResults.push({ 95 | content: document.pageContent, // Chunk text 96 | metadata: document.metadata, // Info about page, source, etc. 97 | score: score // Similarity score (lower is more similar) 98 | }); 99 | } 100 | 101 | // ${formattedResults.length} chunks encontrados silenciosamente 102 | 103 | return formattedResults; 104 | } catch (error) { 105 | console.log(`Error during semantic search: ${error}`); 106 | return []; // return empty array on error 107 | } 108 | } 109 | 110 | async generateAnswer(query: string): Promise { 111 | try { 112 | // Pipeline RAG iniciado silenciosamente 113 | 114 | // STEP 1: RETRIEVAL 115 | const documents = await this.searchDocuments(query, 10); 116 | 117 | if (!documents.length) { 118 | console.log('No relevant documents found in the database.'); 119 | return 'I don\'t have the information necessary to answer your question.'; 120 | } 121 | 122 | // STEP 2: CONTEXT ASSEMBLY 123 | const context = documents.map((doc, index) => { 124 | return doc.content; 125 | }) 126 | .join('\n\n'); 127 | 128 | // STEP 3: Estruturando prompts para o LLM 129 | const fullPrompt = PROMPT_TEMPLATE 130 | .replace('{contexto}', context) 131 | .replace('{pergunta}', query); 132 | 133 | // STEP 4: GENERATION USING LLM 134 | const messages: ChatMessage[] = [ 135 | { role: 'user', content: fullPrompt } 136 | ]; 137 | 138 | const response = await this.googleClient.chatCompletions( 139 | messages, 140 | 0.1 141 | ); 142 | 143 | // Pipeline RAG concluído com sucesso 144 | 145 | return response.trim(); 146 | } catch (error) { 147 | console.log(`Error in RAG pipeline: ${error}`); 148 | return 'Internal error: Unable to process your query. Please check if ingestion has been performed.'; 149 | } 150 | } 151 | 152 | // Utility method for checking system status 153 | async getSystemStatus(): Promise<{ isReady: boolean; chunksCount: number}> { 154 | try { 155 | if (!this.vectorStore) { 156 | return { isReady: false, chunksCount: 0 }; 157 | } 158 | 159 | const testResults = await this.vectorStore.similaritySearch("test", 1); 160 | return { 161 | isReady: true, 162 | chunksCount: testResults.length > 0 ? -1 : 0 // -1 means "there are documents, but we don't know how many 163 | } 164 | } catch (error) { 165 | return { isReady: false, chunksCount: 0 }; 166 | } 167 | } 168 | } 169 | 170 | // Factory function to create RAG instance 171 | export async function searchPrompt(question?: string): Promise { 172 | try { 173 | console.log('Initializing RAG Search system...'); 174 | const ragSearch = new RAGSearch(); 175 | 176 | await new Promise(resolve => setTimeout(resolve, 1000)); 177 | 178 | const status = await ragSearch.getSystemStatus(); 179 | if (!status.isReady) { 180 | console.log('System is not ready. Run ingestion first.'); 181 | return null; 182 | } 183 | 184 | console.log('RAG system initialized and ready for use.'); 185 | return ragSearch; 186 | } catch (error) { 187 | console.log(`Error initializing RAG Search system: ${error}`); 188 | return null; 189 | } 190 | } -------------------------------------------------------------------------------- /src/chat.ts: -------------------------------------------------------------------------------- 1 | import { createInterface } from "readline"; 2 | import { searchPrompt, RAGSearch } from "./search"; 3 | 4 | // Function to print initial banner with system informations 5 | function printBanner(): void { 6 | console.log('='.repeat(60)); 7 | console.log('RAG CHAT - PDF Question and Answer System'); 8 | console.log('Powered by Google Gemini + LangChain + pgVector'); 9 | console.log('⚡ TypeScript + Node.js Implementation'); 10 | console.log('='.repeat(60)); 11 | console.log("Special commands:"); 12 | console.log(" • 'exit, quit, exit' - Closes the program"); 13 | console.log(" • 'help' - Shows available commands"); 14 | console.log(" • 'clear' - Clears the screen"); 15 | console.log(" • 'status' - Checks system status"); 16 | console.log('='.repeat(60)); 17 | } 18 | 19 | // Function to print help instructions 20 | function printHelp(): void { 21 | console.log('\n AVAILABLE COMMANDS:'); 22 | console.log(' exit, quit, exit - Closes the program'); 23 | console.log(' help - Shows available commands'); 24 | console.log(' clear - Clears the screen'); 25 | console.log(' status - Checks system status'); 26 | console.log(' [any text] - Asks a question about the PDF'); 27 | console.log('\n TIPS FOR USE:'); 28 | console.log(' • Ask specific questions about the PDF content'); 29 | console.log(' • The system responds only based on the document'); 30 | console.log(' • Out-of-context questions return "I don\'t have information"'); 31 | console.log(); 32 | } 33 | 34 | // Function to clear the console screen 35 | function clearScreen(): void { 36 | console.clear(); 37 | } 38 | 39 | async function checkStatus(searchSystem: RAGSearch | null): Promise { 40 | console.log('\n RAG SYSTEM STATUS:'); 41 | console.log('='.repeat(40)); 42 | 43 | if (!searchSystem) { 44 | console.log('System: NOT INITIALIZED'); 45 | console.log('\n TROUBLESHOOTING CHECKLIST:'); 46 | console.log(' 1. Is PostgreSQL running?'); 47 | console.log(' → Command: docker compose up -d'); 48 | console.log(' 2. Has ingestion been executed?'); 49 | console.log(' → Command: npm run ingest'); 50 | console.log(' 3. Is the API Key configured?'); 51 | console.log(' → File: .env (GOOGLE_API_KEY)'); 52 | console.log(' 4. Are dependencies installed?'); 53 | console.log(' → Command: npm install'); 54 | return; 55 | } 56 | 57 | try { 58 | const systemStatus = await searchSystem.getSystemStatus(); 59 | 60 | console.log('RAG System: OPERATIONAL'); 61 | console.log('PostgreSQL Connection: OK'); 62 | console.log('pgVector Extension: OK'); 63 | console.log('Google Gemini API: OK'); 64 | console.log(`Vector Database: ${systemStatus.isReady ? 'READY' : 'NOT READY'}`); 65 | 66 | if (systemStatus.chunksCount > 0) { 67 | console.log(`Available chunks: ${systemStatus.chunksCount}`); 68 | } 69 | 70 | console.log('\n System ready to answer questions!'); 71 | } catch (error) { 72 | console.log('Status: PARTIALLY OPERATIONAL'); 73 | console.log(`Error checking system status: ${error}`); 74 | } 75 | 76 | console.log('='.repeat(40)); 77 | } 78 | 79 | // Main function to initialize RAG system and handle user input 80 | async function main(): Promise { 81 | console.log('STEP 6: Initializing the RAG Chat CLI Interface'); 82 | 83 | printBanner(); 84 | 85 | console.log('\n PHASE 1: INITIALIZING RAG SYSTEM'); 86 | const searchSystem = await searchPrompt(); 87 | 88 | if (!searchSystem) { 89 | console.log('\n CRITICAL ERROR: RAG system could not be initialized!'); 90 | console.log('\n POSSIBLE CAUSES AND SOLUTIONS:'); 91 | console.log(' 1. PostgreSQL is not running'); 92 | console.log(' → Solution: docker compose up -d'); 93 | console.log(' 2. Ingestion process has not been executed'); 94 | console.log(' → Solution: npm run ingest'); 95 | console.log(' 3. GOOGLE_API_KEY is not configured or invalid'); 96 | console.log(' → Solution: Configure in the .env file'); 97 | console.log(' 4. Node.js dependencies are not installed'); 98 | console.log(' → Solution: npm install'); 99 | console.log(' 5. pgVector extension has not been created'); 100 | console.log(' → Solution: Check Docker logs'); 101 | 102 | process.exit(1); 103 | } 104 | 105 | console.log('PHASE 1: RAG system initialized successfully!\n'); 106 | 107 | // PHASE 2: SETUP COMMAND LINE INTERFACE 108 | const rl = createInterface({ 109 | input: process.stdin, 110 | output: process.stdout, 111 | prompt: '\n Make a question: ' 112 | }); 113 | 114 | // Helper function to capture user input asynchronously 115 | const askQuestion = (prompt: string): Promise => { 116 | return new Promise((resolve) => { 117 | rl.question(prompt, resolve); 118 | }); 119 | }; 120 | 121 | console.log('System ready! Type your question or “help” to see commands.'); 122 | 123 | // PHASE 3: MAIN CHAT LOOP 124 | while(true) { 125 | try { 126 | // Capture user input 127 | const userInput = (await askQuestion('\n Make a question: ')).trim(); 128 | 129 | // PROCESSING COMMAND: Analyze whether it is a special command or a question 130 | const command = userInput.toLowerCase(); 131 | 132 | // Output commands 133 | if (['exit', 'quit', 'sair', 'q'].includes(command)) { 134 | console.log('\n Thank you for using RAG Chat. Goodbye!\n'); 135 | console.log('System shutting down...'); 136 | break; 137 | } 138 | 139 | // Help command 140 | if (['ajuda', 'help', 'h', '?'].includes(command)) { 141 | printHelp(); 142 | continue; 143 | } 144 | 145 | // Clear screen command 146 | if (['limpar', 'clear', 'cls'].includes(command)) { 147 | clearScreen(); 148 | printBanner(); 149 | continue; 150 | } 151 | 152 | // Status command 153 | if (['status', 'info', 's'].includes(command)) { 154 | await checkStatus(searchSystem); 155 | continue; 156 | } 157 | 158 | // Validate empty input 159 | if (!userInput) { 160 | console.log('Empty input. Type a question or “help” to see commands.'); 161 | continue; 162 | } 163 | 164 | // PROCESSING QUESTION: Forward the question to the RAG system 165 | console.log('\n Processing your question...'); 166 | console.log('Searching PDF knowledge...'); 167 | 168 | const startTime = Date.now(); 169 | 170 | // Call the complete RAG pipeline 171 | const answer = await searchSystem.generateAnswer(userInput); 172 | 173 | const endTime = Date.now(); 174 | const responseTime = ((endTime - startTime) / 1000).toFixed(2); 175 | 176 | // FORMATTED DISPLAY OF THE RESPONSE 177 | console.log('\n' + '='.repeat(80)); 178 | console.log(`ASK: ${userInput}`); 179 | console.log('='.repeat(80)); 180 | console.log(`🤖 RESPONSE:`); 181 | console.log(answer); 182 | console.log('='.repeat(80)); 183 | console.log(`⚡ Response time: ${responseTime}s`); 184 | } catch (error) { 185 | // TRATAMENTO DE ERROS 186 | if (error instanceof Error && error.message.includes('SIGINT')) { 187 | // Ctrl+C foi pressionado 188 | console.log('\n\n Interruption detected (Ctrl+C)'); 189 | console.log('👋 Chat closed by user. See you next time!'); 190 | break; 191 | } else { 192 | // Outros erros 193 | console.log(`\n Unexpected error during processing:`); 194 | console.log(` ${error}`); 195 | console.log('\n You can:'); 196 | console.log(' • Try again with another question'); 197 | console.log(' • Type "status" to check the system'); 198 | console.log(' • Type "exit" to quit'); 199 | } 200 | } 201 | } 202 | 203 | rl.close(); 204 | } 205 | 206 | // EVENT HANDLERS: Operating system signal management 207 | 208 | // Handler for Ctrl+C (SIGINT) 209 | process.on('SIGINT', () => { 210 | console.log('\n\n Interrupt signal received (Ctrl+C)'); 211 | console.log('Cleaning up resources...'); 212 | console.log('RAG Chat closed. See you later!'); 213 | process.exit(0); 214 | }); 215 | 216 | // Handler for uncaught errors 217 | process.on('uncaughtException', (error) => { 218 | console.error('\n Uncaught FATAL ERROR:', error); 219 | console.error('Restart the application: npm run start'); 220 | process.exit(1); 221 | }); 222 | 223 | // Handler for rejected promises 224 | process.on('unhandledRejection', (reason, promise) => { 225 | console.error('\n Unhandled rejected promise:', reason); 226 | console.error('Promise:', promise); 227 | }); 228 | 229 | // ENTRY POINT: Run the main function 230 | main().catch((error) => { 231 | console.error('\n FATAL ERROR in main application:', error); 232 | console.error('Try restarting: npm run start'); 233 | process.exit(1); 234 | }); -------------------------------------------------------------------------------- /README-en.md: -------------------------------------------------------------------------------- 1 | # 🤖 RAG Search Ingestion - LangChain.js + Docker + Gemini 2 | 3 | ![Node.js](https://img.shields.io/badge/Node.js-22+-339933?style=for-the-badge&logo=node.js&logoColor=white) 4 | ![TypeScript](https://img.shields.io/badge/TypeScript-5.9+-3178C6?style=for-the-badge&logo=typescript&logoColor=white) 5 | ![LangChain](https://img.shields.io/badge/LangChain.js-0.3+-00A86B?style=for-the-badge&logo=chainlink&logoColor=white) 6 | ![Google Gemini](https://img.shields.io/badge/Google%20Gemini-API-4285F4?style=for-the-badge&logo=google&logoColor=white) 7 | ![PostgreSQL](https://img.shields.io/badge/PostgreSQL-15+-336791?style=for-the-badge&logo=postgresql&logoColor=white) 8 | ![pgVector](https://img.shields.io/badge/pgVector-Extension-336791?style=for-the-badge&logo=postgresql&logoColor=white) 9 | ![Docker](https://img.shields.io/badge/Docker-Compose-2496ED?style=for-the-badge&logo=docker&logoColor=white) 10 | ![License](https://img.shields.io/badge/License-MIT-green?style=for-the-badge) 11 | 12 | A complete **Retrieval-Augmented Generation (RAG)** application for intelligent PDF document search, built with TypeScript, Node.js, and modern AI technologies. 13 | 14 | ## 📋 Table of Contents 15 | 16 | - [Overview](#-overview) 17 | - [Technologies Used](#-technologies-used) 18 | - [Architecture](#-architecture) 19 | - [Prerequisites](#-prerequisites) 20 | - [Setup](#-setup) 21 | - [How to Run](#-how-to-run) 22 | - [How to Use](#-how-to-use) 23 | - [Example Questions](#-example-questions) 24 | - [Project Structure](#-project-structure) 25 | - [Features](#-features) 26 | - [Troubleshooting](#-troubleshooting) 27 | - [Complete Tutorial](#-complete-tutorial) 28 | 29 | ## 🎯 Overview 30 | 31 | This project implements a complete RAG system that allows natural language questions about PDF document content. The system processes documents, creates vector embeddings, stores them in a PostgreSQL database with pgVector, and answers questions using Google Gemini. 32 | 33 | ### How It Works 34 | 35 | 1. **Ingestion**: The system loads and processes PDF documents, splitting them into chunks 36 | 2. **Vectorization**: Each chunk is converted into embeddings using Google Gemini 37 | 3. **Storage**: Embeddings are stored in PostgreSQL with pgVector extension 38 | 4. **Search**: When you ask a question, the system finds the most relevant chunks 39 | 5. **Generation**: Google Gemini generates an answer based on the found context 40 | 41 | ## 🛠 Technologies Used 42 | 43 | ### Backend & Processing 44 | - **Node.js 22+** - JavaScript runtime 45 | - **TypeScript** - Typed programming language 46 | - **LangChain.js** - Framework for AI applications 47 | - **TSX** - TypeScript executor for development 48 | 49 | ### Database & Vectors 50 | - **PostgreSQL 15** - Relational database 51 | - **pgVector** - Extension for vector search 52 | - **Docker & Docker Compose** - Containerization 53 | 54 | ### AI & Machine Learning 55 | - **Google Gemini API** - Language model for embeddings and chat 56 | - **models/embedding-001** - Model for creating embeddings 57 | - **gemini-2.0-flash** - Model for response generation 58 | 59 | ### Document Processing 60 | - **pdf-parse** - PDF text extraction 61 | - **RecursiveCharacterTextSplitter** - Intelligent text splitting 62 | 63 | ## 🏗 Architecture 64 | 65 | ``` 66 | ┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ 67 | │ PDF Document │ │ Text Splitter │ │ Embeddings │ 68 | │ │───▶│ │───▶│ (Gemini) │ 69 | │ document.pdf │ │ LangChain.js │ │ │ 70 | └─────────────────┘ └──────────────────┘ └─────────────────┘ 71 | │ 72 | ▼ 73 | ┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ 74 | │ User Question │ │ Similarity │ │ PostgreSQL │ 75 | │ │───▶│ Search │◀───│ + pgVector │ 76 | │ "Which company │ │ │ │ │ 77 | │ has the │ └──────────────────┘ └─────────────────┘ 78 | │ highest │ │ 79 | │ revenue?" │ ▼ 80 | └─────────────────┘ ▼ 81 | ▲ ┌──────────────────┐ 82 | │ │ Context │ 83 | │ │ Assembly │ 84 | │ │ │ 85 | │ └──────────────────┘ 86 | │ │ 87 | │ ▼ 88 | │ ┌──────────────────┐ 89 | │ │ Google Gemini │ 90 | └──────────────│ Response │ 91 | │ Generation │ 92 | └──────────────────┘ 93 | ``` 94 | 95 | ## 📋 Prerequisites 96 | 97 | - **Node.js 22+** installed 98 | - **Docker & Docker Compose** installed 99 | - **Google AI Studio API Key** (free) 100 | - **Git** to clone the repository 101 | 102 | ## ⚙️ Setup 103 | 104 | ### 1. Clone the Repository 105 | 106 | ```bash 107 | git clone https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini.git 108 | cd rag-search-ingestion-langchainjs-gemini 109 | ``` 110 | 111 | ### 2. Install Dependencies 112 | 113 | ```bash 114 | npm install 115 | ``` 116 | 117 | ### 3. Configure Environment Variables 118 | 119 | Create a `.env` file in the project root: 120 | 121 | ```env 122 | GOOGLE_API_KEY=your_google_api_key_here 123 | GOOGLE_EMBEDDING_MODEL=models/embedding-001 124 | GOOGLE_CHAT_MODEL=gemini-2.0-flash 125 | DATABASE_URL=postgresql://postgres:postgres@localhost:5432/rag 126 | PG_VECTOR_COLLECTION_NAME=pdf_documents 127 | PDF_PATH=./document.pdf 128 | ``` 129 | 130 | ### 4. Get your Google API Key 131 | 132 | 1. Visit [Google AI Studio](https://aistudio.google.com/) 133 | 2. Create a new API Key 134 | 3. Copy and paste it into the `.env` file 135 | 136 | ## 🚀 How to Run 137 | 138 | ### Step 1: Start the Database 139 | 140 | ```bash 141 | docker-compose up -d 142 | ``` 143 | 144 | ### Step 2: Process the PDF (Ingestion) 145 | 146 | ```bash 147 | npm run dev:ingest 148 | ``` 149 | 150 | ### Step 3: Start the Interactive Chat 151 | 152 | ```bash 153 | npm run dev:chat 154 | ``` 155 | 156 | ## 💬 How to Use 157 | 158 | After running the chat, you'll see the interface: 159 | 160 | ``` 161 | ============================================================ 162 | 🤖 RAG CHAT - PDF Question and Answer System 163 | Powered by Google Gemini + LangChain + pgVector 164 | ⚡ TypeScript + Node.js Implementation 165 | ============================================================ 166 | 167 | System ready! Type your question or "help" to see commands. 168 | 169 | 💬 Ask a question: _ 170 | ``` 171 | 172 | ### Special Commands 173 | 174 | - `help` - Shows help and available commands 175 | - `status` - Checks system status 176 | - `clear` - Clears the screen 177 | - `exit` - Exits the chat 178 | 179 | ## 🎯 Example Questions 180 | 181 | ### Revenue Questions 182 | ``` 183 | Which company had the highest revenue? 184 | What is Aliança Energia's revenue? 185 | List the top 5 companies by revenue 186 | Which companies earned more than 1 billion? 187 | ``` 188 | 189 | ### Company Questions 190 | ``` 191 | How many companies are listed in the document? 192 | Which company was founded most recently? 193 | List companies founded in the 1990s 194 | Which company has the lowest revenue? 195 | ``` 196 | 197 | ### Analytical Questions 198 | ``` 199 | Which sector has the most companies? 200 | Compare revenue between different sectors 201 | What is the average revenue of companies? 202 | How many companies were founded in each decade? 203 | ``` 204 | 205 | ### Specific Questions 206 | ``` 207 | Are there any technology companies in the list? 208 | Which companies have "Sustainable" in their name? 209 | List energy sector companies 210 | Which automotive company has the highest revenue? 211 | ``` 212 | 213 | ## 📁 Project Structure 214 | 215 | ``` 216 | rag-search-ingestion-langchainjs-gemini/ 217 | ├── src/ 218 | │ ├── chat.ts # Interactive chat interface 219 | │ ├── search.ts # RAG pipeline and semantic search 220 | │ ├── ingest.ts # PDF processing and ingestion 221 | │ └── google-client.ts # Google Gemini API client 222 | ├── docker-compose.yml # PostgreSQL + pgVector configuration 223 | ├── document.pdf # Sample document 224 | ├── package.json # Dependencies and scripts 225 | ├── tsconfig.json # TypeScript configuration 226 | ├── .env # Environment variables (create) 227 | └── README.md # This documentation 228 | ``` 229 | 230 | ### File Descriptions 231 | 232 | - **`chat.ts`** - Interactive chat interface with readline 233 | - **`search.ts`** - Implements complete RAG pipeline (4 stages) 234 | - **`ingest.ts`** - PDF processing and ingestion 235 | - **`google-client.ts`** - Google Gemini API client 236 | 237 | ## ✨ Features 238 | 239 | ### 🔍 Intelligent Semantic Search 240 | - Finds relevant information even with synonyms 241 | - Contextual search using vector embeddings 242 | - Automatic relevance ranking 243 | 244 | ### 🤖 Natural Responses 245 | - Responses in natural language 246 | - Based exclusively on PDF content 247 | - Context preserved during conversation 248 | 249 | ### ⚡ Optimized Performance 250 | - Embedding cache in PostgreSQL 251 | - Ultra-fast vector search with pgVector 252 | - Asynchronous processing 253 | 254 | ### 🛡️ Error Handling 255 | - Robust input validation 256 | - Fallbacks for API issues 257 | - User-friendly error messages 258 | 259 | ## 🔧 Troubleshooting 260 | 261 | ### Problem: "Error connecting to database" 262 | ```bash 263 | # Check if PostgreSQL is running 264 | docker ps 265 | 266 | # Restart containers 267 | docker-compose down 268 | docker-compose up -d 269 | ``` 270 | 271 | ### Problem: "Google API Key invalid" 272 | 1. Check if the API Key is correct in `.env` 273 | 2. Confirm the API is active in Google AI Studio 274 | 3. Check for extra spaces or characters 275 | 276 | ### Problem: "No documents found" 277 | ```bash 278 | # Run ingestion again 279 | npm run dev:ingest 280 | 281 | # Check documents in database 282 | docker exec postgres_rag_ts psql -U postgres -d rag -c "SELECT COUNT(*) FROM pdf_documents;" 283 | ``` 284 | 285 | ### Problem: "429 Too Many Requests" 286 | - Wait a few minutes (quota limit) 287 | - Check your plan in Google AI Studio 288 | - Consider using a new API Key if available 289 | 290 | ## 📊 Available Scripts 291 | 292 | ```bash 293 | npm run build # Compiles TypeScript to JavaScript 294 | npm run start # Runs compiled version 295 | npm run dev:chat # Interactive chat (development) 296 | npm run dev:ingest # PDF ingestion (development) 297 | ``` 298 | 299 | ## 📚 Complete Tutorial 300 | 301 | A detailed tutorial is available in the [tutorial/article-en.md](./tutorial/article-en.md) file. It covers everything from initial setup to complete execution of the RAG system, with step-by-step explanations and screenshots. 302 | 303 | ## 🤝 Contributing 304 | 305 | Contributions are welcome! Feel free to: 306 | 307 | 1. Fork the project 308 | 2. Create a feature branch 309 | 3. Commit your changes 310 | 4. Create a Pull Request 311 | 312 | ## 📝 License 313 | 314 | This project is under the MIT license. See the `LICENSE` file for more details. 315 | 316 | ## 👨‍💻 Author 317 | 318 | - **Twitter** - [@glaucia86](https://twitter.com/glaucia86) 319 | - **LinkedIn** - [Glaucia Lemos](https://www.linkedin.com/in/glaucialemos/) 320 | - **YouTube** - [Glaucia Lemos](https://www.youtube.com/@GlauciaLemos) 321 | 322 | --- 323 | 324 | ⭐ **If this project was helpful, leave a star on GitHub!** -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🤖 RAG Search Ingestion - LangChain.js + Docker + Gemini 2 | 3 | ![Node.js](https://img.shields.io/badge/Node.js-22+-339933?style=for-the-badge&logo=node.js&logoColor=white) 4 | ![TypeScript](https://img.shields.io/badge/TypeScript-5.9+-3178C6?style=for-the-badge&logo=typescript&logoColor=white) 5 | ![LangChain](https://img.shields.io/badge/LangChain.js-0.3+-00A86B?style=for-the-badge&logo=chainlink&logoColor=white) 6 | ![Google Gemini](https://img.shields.io/badge/Google%20Gemini-API-4285F4?style=for-the-badge&logo=google&logoColor=white) 7 | ![PostgreSQL](https://img.shields.io/badge/PostgreSQL-15+-336791?style=for-the-badge&logo=postgresql&logoColor=white) 8 | ![pgVector](https://img.shields.io/badge/pgVector-Extension-336791?style=for-the-badge&logo=postgresql&logoColor=white) 9 | ![Docker](https://img.shields.io/badge/Docker-Compose-2496ED?style=for-the-badge&logo=docker&logoColor=white) 10 | ![License](https://img.shields.io/badge/License-MIT-green?style=for-the-badge) 11 | ![Copilot Powered](https://img.shields.io/badge/Copilot-Powered%20by-blue?logo=github) 12 | 13 | Uma aplicação completa de **Retrieval-Augmented Generation (RAG)** para busca inteligente em documentos PDF, construída com TypeScript, Node.js e tecnologias modernas de IA. 14 | 15 | ## 📋 Índice 16 | 17 | - [Visão Geral](#-visão-geral) 18 | - [Tecnologias Utilizadas](#-tecnologias-utilizadas) 19 | - [Arquitetura](#-arquitetura) 20 | - [Pré-requisitos](#-pré-requisitos) 21 | - [Configuração](#-configuração) 22 | - [Como Executar](#-como-executar) 23 | - [Como Usar](#-como-usar) 24 | - [Exemplos de Perguntas](#-exemplos-de-perguntas) 25 | - [Estrutura do Projeto](#-estrutura-do-projeto) 26 | - [Funcionalidades](#-funcionalidades) 27 | - [Troubleshooting](#-troubleshooting) 28 | - [Tutorial Completo](#-tutorial-completo) 29 | 30 | ## 🎯 Visão Geral 31 | 32 | Este projeto implementa um sistema RAG completo que permite fazer perguntas em linguagem natural sobre o conteúdo de documentos PDF. O sistema processa documentos, cria embeddings vetoriais, armazena em um banco de dados PostgreSQL com pgVector e responde perguntas usando Google Gemini. 33 | 34 | ### Como Funciona 35 | 36 | 1. **Ingestão**: O sistema carrega e processa documentos PDF, dividindo-os em chunks 37 | 2. **Vetorização**: Cada chunk é convertido em embeddings usando Google Gemini 38 | 3. **Armazenamento**: Os embeddings são armazenados no PostgreSQL com extensão pgVector 39 | 4. **Busca**: Quando você faz uma pergunta, o sistema encontra os chunks mais relevantes 40 | 5. **Geração**: O Google Gemini gera uma resposta baseada no contexto encontrado 41 | 42 | ## 🛠 Tecnologias Utilizadas 43 | 44 | ### Backend & Processamento 45 | - **Node.js 22+** - Runtime JavaScript 46 | - **TypeScript** - Linguagem de programação tipada 47 | - **LangChain.js** - Framework para aplicações de IA 48 | - **TSX** - Executor TypeScript para desenvolvimento 49 | 50 | ### Banco de Dados & Vetores 51 | - **PostgreSQL 15** - Banco de dados relacional 52 | - **pgVector** - Extensão para busca vetorial 53 | - **Docker & Docker Compose** - Containerização 54 | 55 | ### IA & Machine Learning 56 | - **Google Gemini API** - Modelo de linguagem para embeddings e chat 57 | - **models/embedding-001** - Modelo para criar embeddings 58 | - **gemini-2.0-flash** - Modelo para geração de respostas 59 | 60 | ### Processamento de Documentos 61 | - **pdf-parse** - Extração de texto de PDFs 62 | - **RecursiveCharacterTextSplitter** - Divisão inteligente de texto 63 | 64 | ## 🏗 Arquitetura 65 | 66 | ``` 67 | ┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ 68 | │ PDF Document │ │ Text Splitter │ │ Embeddings │ 69 | │ │───▶│ │───▶│ (Gemini) │ 70 | │ document.pdf │ │ LangChain.js │ │ │ 71 | └─────────────────┘ └──────────────────┘ └─────────────────┘ 72 | │ 73 | ▼ 74 | ┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ 75 | │ User Question │ │ Similarity │ │ PostgreSQL │ 76 | │ │───▶│ Search │◀───│ + pgVector │ 77 | │ "Qual empresa │ │ │ │ │ 78 | │ tem maior │ └──────────────────┘ └─────────────────┘ 79 | │ faturamento?" │ │ 80 | └─────────────────┘ ▼ 81 | ▲ ┌──────────────────┐ 82 | │ │ Context │ 83 | │ │ Assembly │ 84 | │ │ │ 85 | │ └──────────────────┘ 86 | │ │ 87 | │ ▼ 88 | │ ┌──────────────────┐ 89 | │ │ Google Gemini │ 90 | └──────────────│ Response │ 91 | │ Generation │ 92 | └──────────────────┘ 93 | ``` 94 | 95 | ## 📋 Pré-requisitos 96 | 97 | - **Node.js 22+** instalado 98 | - **Docker & Docker Compose** instalados 99 | - **Google AI Studio API Key** (gratuita) 100 | - **Git** para clonar o repositório 101 | 102 | ## ⚙️ Configuração 103 | 104 | ### 1. Clone o Repositório 105 | 106 | ```bash 107 | git clone https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini.git 108 | cd rag-search-ingestion-langchainjs-gemini 109 | ``` 110 | 111 | ### 2. Instale as Dependências 112 | 113 | ```bash 114 | npm install 115 | ``` 116 | 117 | ### 3. Configure as Variáveis de Ambiente 118 | 119 | Crie um arquivo `.env` na raiz do projeto: 120 | 121 | ```env 122 | # Google AI API Configuration 123 | GOOGLE_API_KEY=sua_api_key_aqui 124 | 125 | # Database Configuration 126 | DB_HOST=localhost 127 | DB_PORT=5432 128 | DB_NAME=rag 129 | DB_USER=postgres 130 | DB_PASSWORD=postgres 131 | 132 | # Vector Database Configuration 133 | VECTOR_DIMENSION=768 134 | ``` 135 | 136 | ### 4. Obtenha sua Google API Key 137 | 138 | 1. Acesse [Google AI Studio](https://aistudio.google.com/) 139 | 2. Crie uma nova API Key 140 | 3. Copie e cole no arquivo `.env` 141 | 142 | ## 🚀 Como Executar 143 | 144 | ### Passo 1: Iniciar o Banco de Dados 145 | 146 | ```bash 147 | docker-compose up -d 148 | ``` 149 | 150 | ### Passo 2: Processar o PDF (Ingestão) 151 | 152 | ```bash 153 | npm run dev:ingest 154 | ``` 155 | 156 | ### Passo 3: Iniciar o Chat Interativo 157 | 158 | ```bash 159 | npm run dev:chat 160 | ``` 161 | 162 | ## 💬 Como Usar 163 | 164 | Após executar o chat, você verá a interface: 165 | 166 | ``` 167 | ============================================================ 168 | 🤖 RAG CHAT - Sistema de Perguntas e Respostas em PDF 169 | Powered by Google Gemini + LangChain + pgVector 170 | ⚡ TypeScript + Node.js Implementation 171 | ============================================================ 172 | 173 | Sistema pronto! Digite sua pergunta ou "help" para ver comandos. 174 | 175 | 💬 Faça uma pergunta: _ 176 | ``` 177 | 178 | ### Comandos Especiais 179 | 180 | - `help` - Mostra ajuda e comandos disponíveis 181 | - `status` - Verifica o status do sistema 182 | - `clear` - Limpa a tela 183 | - `exit` - Sai do chat 184 | 185 | ## 🎯 Exemplos de Perguntas 186 | 187 | ### Perguntas sobre Faturamento 188 | ``` 189 | Qual empresa teve o maior faturamento? 190 | Qual o faturamento da empresa Aliança Energia? 191 | Liste as 5 empresas com maior receita 192 | Quais empresas faturaram mais de 1 bilhão? 193 | ``` 194 | 195 | ### Perguntas sobre Empresas 196 | ``` 197 | Quantas empresas estão listadas no documento? 198 | Qual empresa foi fundada mais recentemente? 199 | Liste empresas fundadas na década de 1990 200 | Qual empresa tem o menor faturamento? 201 | ``` 202 | 203 | ### Perguntas Analíticas 204 | ``` 205 | Qual setor tem mais empresas? 206 | Compare o faturamento entre diferentes setores 207 | Qual a média de faturamento das empresas? 208 | Quantas empresas foram fundadas em cada década? 209 | ``` 210 | 211 | ### Perguntas Específicas 212 | ``` 213 | Existe alguma empresa de tecnologia na lista? 214 | Quais empresas têm "Sustentável" no nome? 215 | Liste empresas do setor de energia 216 | Qual empresa do setor automotivo tem maior faturamento? 217 | ``` 218 | 219 | ## 📁 Estrutura do Projeto 220 | 221 | ``` 222 | rag-search-ingestion-langchainjs-gemini/ 223 | ├── src/ 224 | │ ├── chat.ts # Interface de chat interativo 225 | │ ├── search.ts # Pipeline RAG e busca semântica 226 | │ ├── ingest.ts # Processamento e ingestão de PDFs 227 | │ └── google-client.ts # Cliente Google Gemini API 228 | ├── docker-compose.yml # Configuração PostgreSQL + pgVector 229 | ├── document.pdf # Documento de exemplo 230 | ├── package.json # Dependências e scripts 231 | ├── tsconfig.json # Configuração TypeScript 232 | ├── .env # Variáveis de ambiente (criar) 233 | └── README.md # Esta documentação 234 | ``` 235 | 236 | ### Descrição dos Arquivos 237 | 238 | - **`chat.ts`** - Interface principal do usuário com readline 239 | - **`search.ts`** - Implementa o pipeline RAG completo (4 etapas) 240 | - **`ingest.ts`** - Processa PDFs e cria embeddings vetoriais 241 | - **`google-client.ts`** - Integração com Google Gemini API 242 | 243 | ## ✨ Funcionalidades 244 | 245 | ### 🔍 Busca Semântica Inteligente 246 | 247 | - Encontra informações relevantes mesmo com sinônimos 248 | - Busca contextual usando embeddings vetoriais 249 | - Ranking de relevância automático 250 | 251 | ### 🤖 Respostas Naturais 252 | 253 | - Respostas em português natural 254 | - Baseadas exclusivamente no conteúdo do PDF 255 | - Contexto preservado durante a conversa 256 | 257 | ### ⚡ Performance Otimizada 258 | 259 | - Cache de embeddings no PostgreSQL 260 | - Busca vetorial ultrarrápida com pgVector 261 | - Processamento assíncrono 262 | 263 | ### 🛡️ Tratamento de Erros 264 | 265 | - Validação de entrada robusta 266 | - Fallbacks para problemas de API 267 | - Mensagens de erro amigáveis 268 | 269 | ## 🔧 Troubleshooting 270 | 271 | ### Problema: "Error connecting to database" 272 | ```bash 273 | # Verificar se PostgreSQL está rodando 274 | docker ps 275 | 276 | # Reiniciar containers 277 | docker-compose down 278 | docker-compose up -d 279 | ``` 280 | 281 | ### Problema: "Google API Key invalid" 282 | 1. Verifique se a API Key está correta no `.env` 283 | 2. Confirme que a API está ativa no Google AI Studio 284 | 3. Verifique se não há espaços ou caracteres extras 285 | 286 | ### Problema: "No documents found" 287 | 288 | ```bash 289 | # Executar ingestão novamente 290 | npm run dev:ingest 291 | 292 | # Verificar documentos no banco 293 | docker exec postgres_rag_ts psql -U postgres -d rag -c "SELECT COUNT(*) FROM pdf_documents;" 294 | ``` 295 | 296 | ### Problema: "429 Too Many Requests" 297 | 298 | - Aguarde alguns minutos (limite de quota) 299 | - Verifique seu plano no Google AI Studio 300 | - Consider usar uma nova API Key se disponível 301 | 302 | ## 📊 Scripts Disponíveis 303 | 304 | ```bash 305 | npm run build # Compila TypeScript para JavaScript 306 | npm run start # Executa versão compilada 307 | npm run dev:chat # Chat interativo (desenvolvimento) 308 | npm run dev:ingest # Ingestão de PDF (desenvolvimento) 309 | ``` 310 | 311 | ## 📚 Tutorial Completo 312 | 313 | Um tutorial detalhado está disponível no arquivo [tutorial/article.md](./tutorial/article.md). Ele cobre desde a configuração inicial até a execução completa do sistema RAG, com explicações passo a passo e capturas de tela. 314 | 315 | ## 🤝 Contribuição 316 | 317 | Contribuições são bem-vindas! Sinta-se à vontade para: 318 | 319 | 1. Fazer fork do projeto 320 | 2. Criar uma feature branch 321 | 3. Fazer commit das mudanças 322 | 4. Criar um Pull Request 323 | 324 | ## 📝 Licença 325 | 326 | Este projeto está sob a licença MIT. Veja o arquivo `LICENSE` para mais detalhes. 327 | 328 | ## 👨‍💻 Autor 329 | 330 | - **Twitter** - [@glaucia86](https://twitter.com/glaucia86) 331 | - **LinkedIn** - [Glaucia Lemos](https://www.linkedin.com/in/glaucialemos/) 332 | - **YouTube** - [Glaucia Lemos](https://www.youtube.com/@GlauciaLemos) 333 | 334 | --- 335 | 336 | ⭐ **Se este projeto foi útil, deixe uma estrela no GitHub!** 337 | -------------------------------------------------------------------------------- /tutorial/article-en.md: -------------------------------------------------------------------------------- 1 | # Complete RAG System: Zero to Hero with TypeScript, Docker, Google Gemini and LangChain.js 2 | 3 | ![alt text](./resource/rag-docker-ts-langchain.jpg) 4 | 5 | The implementation of Retrieval-Augmented Generation (RAG) systems represents one of the most promising approaches to solve the fundamental limitations of modern Large Language Models. This article presents a complete journey in building a robust and scalable RAG system, using **[TypeScript](https://www.typescriptlang.org/)** as the development foundation, **[Docker](https://www.docker.com/)** for infrastructure orchestration, **[Google Gemini](https://ai.google.dev/gemini-api/docs/quickstart?hl=pt-br)** for artificial intelligence, and **[LangChain.js](https://js.langchain.com/docs/introduction/)** as the integration framework. 6 | 7 | Our solution allows users to ask natural language questions about PDF documents, combining advanced semantic search with precise contextual answer generation. The system demonstrates how to integrate cutting-edge technologies to create practical and scalable AI applications, covering everything from document extraction and processing to the generation of contextually relevant responses. 8 | 9 | The main technologies that form the backbone of this implementation include Node.js version 22 or higher for modern JavaScript runtime, TypeScript 5.9 or higher for robust static typing, LangChain.js 0.3 or higher as AI orchestration framework, Google Gemini API for embeddings and text generation, PostgreSQL 15 or higher with the pgVector extension for vectorial storage and search, and Docker for containerization and simplified deployment. 10 | 11 | > Note: as many already know, I'm taking the **[MBA in Software Engineering in A.I at FullCycle](https://ia.fullcycle.com.br/mba-ia/?utm_source=google_search&utm_campaign=search_mba-arquitetura&utm_medium=curso_especifico&utm_content=search_mba-arquitetura&gad_source=1&gad_campaignid=21917349974&gclid=Cj0KCQjww4TGBhCKARIsAFLXndQejvz0K1XTOHQ3CSglzOlQfVH64T2CS1qZnwkiyChx0HoXzaK4KY0aAosOEALw_wcB)**, and this article is based on one of the practical projects from the course. I'm not doing promotion, just sharing the knowledge learned so that others can benefit too. But if you want to know more about the MBA, click on the previous link. 12 | 13 | ## Understanding RAG and its fundamental importance 14 | 15 | ### The Challenge of Traditional LLMs 16 | 17 | Large Language Models like GPT, Claude, and Gemini have revolutionized natural language processing, but face limitations that prevent their direct application in business and specialized scenarios. The knowledge of these models remains static, being limited to training data up to a specific date, creating a temporal gap that can be critical in domains where updated information is essential. 18 | 19 | Additionally, these models tend to produce hallucinations, inventing information when they don't have sufficient knowledge about a topic. This characteristic can be particularly problematic in applications that require factual precision. LLMs also lack specific context about companies' internal data or specialized documents, limiting their usefulness in scenarios where specialized knowledge is necessary. 20 | 21 | The impossibility of post-training updates represents another significant obstacle. Once trained, a model cannot learn new facts or incorporate updated information without a complete retraining process, which is costly and complex. 22 | 23 | ## RAG as an elegant architectural solution 24 | 25 | Retrieval-Augmented Generation emerges as an architecture that elegantly solves these limitations through the combination of two fundamental components. 26 | 27 | - **The Retrieval component:** functions as an intelligent search system that finds relevant information in an external knowledge base. 28 | 29 | - **The Generation component:** uses an LLM to generate responses based exclusively on the retrieved context, ensuring that responses are grounded in verifiable information. 30 | 31 | The processing flow follows a logical sequence where a user query is converted into a vector embedding, which is then used for similarity search in the vector database. The most relevant documents are retrieved and concatenated into a context, which is provided to the LLM along with the original question for final response generation. 32 | 33 | ## Transformative technical advantages 34 | 35 | The RAG architecture offers factuality through responses based on verifiable sources, eliminating the need to rely exclusively on the model's internal knowledge. Updates are guaranteed as the knowledge base can be updated without needing to retrain the model, allowing incorporation of new documents and information in real-time. 36 | 37 | Transparency is a fundamental characteristic, as it allows tracking the sources of information used in response generation. Cost-effectiveness is significant, as it avoids the need for model fine-tuning, which requires massive computational resources and specialized technical expertise. 38 | 39 | ## System architecture: comprehensive technical vision 40 | 41 | ### Detailed high-level architecture 42 | 43 | The RAG system architecture can be visualized as a processing pipeline that transforms PDF documents into a searchable knowledge base and uses this base to answer natural language questions. The process begins with a PDF document that goes through text extraction, followed by intelligent segmentation using LangChain.js. The resulting segments are converted into vector embeddings through the Gemini model. 44 | 45 | > Note: although the article focuses on PDF files, in a RAG application, we could use any data source, such as: relational databases, NoSQL, APIs, Word documents, Excel spreadsheets, among others. 46 | 47 | These embeddings are stored in PostgreSQL with the **[pgVector](https://www.postgresql.org/about/news/pgvector-070-released-2852/)** extension, creating a searchable knowledge base. When a user asks a question, it is converted into an embedding and used for similarity search in the vector database. The most relevant documents are retrieved and assembled into context, which is then sent to Google Gemini along with the question for final response generation. 48 | 49 | ### After all, what are embeddings? 50 | 51 | Embeddings are numerical representations of data, such as text or images, in a high-dimensional vector space. They capture the semantic meaning of data, allowing machines to understand and process information more effectively. In the context of RAG, embeddings are used to transform queries and documents into vectors that can be compared to find similarities. 52 | 53 | - Example: 54 | 55 | ```text 56 | "cat" -> [0.1, 0.3, 0.5, ...] 57 | "dog" -> [0.2, 0.4, 0.6, ...] 58 | ``` 59 | 60 | I recommend the official Gemini documentation that explains embeddings in more detail: **[Embeddings](https://ai.google.dev/gemini-api/docs/embeddings?hl=pt-br)** 61 | 62 | ## Technological components in depth 63 | 64 | To keep the application simple and easy to run, I used interfaces that utilize Node.js with TypeScript for runtime and robust static typing. The Readline Interface provides an interactive CLI for testing and demonstrations, allowing natural interaction with the system. 65 | 66 | For document processing, we use the following libraries: 67 | 68 | - **[LangChain.js](https://js.langchain.com/docs/introduction/):** serves as the main framework for LLM applications, offering high-level abstractions for common tasks. 69 | 70 | - **[RecursiveCharacterTextSplitter](https://js.langchain.com/docs/concepts/text_splitters/):** implements intelligent chunking algorithm that preserves semantic context. 71 | 72 | - **[PDF-Parse](https://www.npmjs.com/package/pdf-parse):** performs clean text extraction from PDF documents. 73 | 74 | Embeddings and AI are managed through the Google Gemini API, using the embedding-001 model for generating 768-dimension embeddings and **[gemini-2.0-flash](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-0-flash?hl=pt-br)** for optimized response generation. 75 | 76 | The vector database combines _PostgreSQL 15_ or higher as a robust relational database with _pgVector_ as an extension for efficient vector search. _HNSW Indexing_ implements approximate search algorithm that offers performance for searches in milliseconds even with large data volumes. 77 | 78 | The infrastructure uses _Docker Compose_ for container orchestration, simplifying deployment and dependency management. Environment Variables provide flexible and secure configuration. 79 | 80 | ### What is HNSW Indexing? 81 | 82 | _HNSW Indexing_ stands for _Hierarchical Navigable Small World Graph Indexing._ 83 | It's a technique widely used in approximate nearest neighbor search _(Approximate Nearest Neighbor Search – ANN)_ in vector databases, such as when you need to retrieve embeddings of text, images, or audio quickly. 84 | 85 | #### How does it work? 86 | 87 | - It organizes vectors in a hierarchical graph structure. 88 | 89 | - In upper levels, you have more general connections between vectors, which allow "long jumps" through the search space. 90 | 91 | - As you go down levels, the graphs become denser and more connected, allowing more precise and local searches. 92 | 93 | - This creates a balance between speed (quick jumps between regions) and precision (fine-tuning at lower levels). 94 | 95 | #### Why is it important? 96 | 97 | - **High performance:** can search neighbors in millions of vectors with low latency. 98 | 99 | - **Scalability:** it's efficient in both memory and time, even on large databases. 100 | 101 | - **Common use:** it's the default index in various vector database libraries, such as FAISS (Facebook AI Similarity Search), Milvus, Weaviate, and Pinecone. 102 | 103 | ### Practical example 104 | 105 | Imagine you have 10 million document embeddings. If you were to compare each query with all of them, it would be unfeasible. 106 | 107 | With HNSW, you can find the semantically closest documents in milliseconds, without going through all vectors. 108 | 109 | I won't go into details about HNSW Indexing, but if you want to take a look at a practical implementation using TypeScript, here's the link to the project repository I created: **[HNSW + Gemini + LangChain.js - Clean Architecture](https://github.com/glaucia86/hnsw-gemini-langchainjs)**. In another article, I can detail more about HNSW Indexing and break this implementation into parts to make it easier to understand. 110 | 111 | ## Detailed RAG Pipeline 112 | 113 | The ingestion pipeline follows the sequence: 114 | 115 | > PDF → Text Extraction → Chunking → Embeddings → Vector Storage. 116 | 117 | Each step is optimized to preserve maximum semantic information while preparing data for efficient search. 118 | 119 | The query pipeline executes: 120 | 121 | > User Query → Query Embedding → Similarity Search → Context Assembly → LLM Generation → Response. 122 | 123 | This process ensures that each response is grounded in specific evidence from the processed documents. 124 | 125 | ## Development Environment Configuration 126 | 127 | ### Essential Technical Prerequisites 128 | 129 | The development environment requires the following minimum versions: 130 | 131 | - **Node.js version 22.0.0 or higher** - For support of the latest JavaScript features and optimized performance 132 | - **NPM version 10.0.0 or higher** - Necessary for modern dependency management 133 | - **Docker version 24.0.0 or higher** - Ensures compatibility with advanced containerization features 134 | - **Git version 2.40.0 or higher** - Essential for version control 135 | 136 | To verify the installed versions, run the following commands in your terminal: 137 | 138 | ```bash 139 | node --version # v22.0.0+ 140 | npm --version # 10.0.0+ 141 | docker --version # 24.0.0+ 142 | git --version # 2.40.0+ 143 | ``` 144 | 145 | ## Complete Project Initialization 146 | 147 | The project structure starts with creating a main directory and subdirectory for source code: 148 | 149 | ```bash 150 | mkdir rag-system-typescript && cd rag-system-typescript 151 | mkdir src 152 | ``` 153 | 154 | Node.js initialization is done through the command: 155 | 156 | ```bash 157 | npm init -y 158 | ``` 159 | 160 | This command creates the `package.json` file with default configurations. 161 | 162 | Production dependencies include essential packages for system functionality: 163 | 164 | ```bash 165 | npm install @google/generative-ai @langchain/core @langchain/community @langchain/textsplitters dotenv pg uuid 166 | ``` 167 | 168 | These libraries provide Google AI integration, LangChain framework, environment variable manipulation, PostgreSQL connection, and unique identifier generation. 169 | 170 | Development dependencies ensure a robust development experience: 171 | 172 | ```bash 173 | npm install -D @types/node @types/pg @types/pdf-parse tsx typescript 174 | ``` 175 | 176 | These include TypeScript type definitions, TypeScript compiler, and tsx development executor. 177 | 178 | ## Advanced TypeScript Configuration 179 | 180 | The `tsconfig.json` file defines compilation configurations that optimize for modern development and performance. 181 | 182 |
tsconfig.json 183 |
184 | 185 | ```json 186 | { 187 | "compilerOptions": { 188 | "target": "ES2022", 189 | "module": "ESNext", 190 | "moduleResolution": "node", 191 | "outDir": "./dist", 192 | "rootDir": "./src", 193 | "strict": true, 194 | "esModuleInterop": true, 195 | "skipLibCheck": true, 196 | "forceConsistentCasingInFileNames": true, 197 | "resolveJsonModule": true, 198 | "allowSyntheticDefaultImports": true, 199 | "experimentalDecorators": true, 200 | "emitDecoratorMetadata": true, 201 | "declaration": true, 202 | "declarationMap": true, 203 | "sourceMap": true, 204 | "types": ["node"], 205 | "lib": ["ES2022", "DOM"] 206 | }, 207 | "include": [ 208 | "src/**/*" 209 | ], 210 | "exclude": [ 211 | "node_modules", 212 | "dist", 213 | "**/*.test.ts", 214 | "**/*.spec.ts" 215 | ], 216 | "ts-node": { 217 | "esm": true 218 | } 219 | } 220 | ``` 221 | 222 |
223 |
224 | 225 | ## Intelligent Automation Scripts 226 | 227 | The scripts in `package.json` automate common tasks: 228 | 229 | ```json 230 | "scripts": { 231 | "build": "tsc", 232 | "start": "npm run build && node dist/chat.js", 233 | "ingest": "npm run build && node dist/ingest.js", 234 | "dev:chat": "tsx src/chat.ts", 235 | "dev:ingest": "tsx src/ingest.ts" 236 | }, 237 | ``` 238 | 239 | ## Infrastructure: PostgreSQL + pgVector 240 | 241 | ### Theoretical Foundations of Vector Databases 242 | 243 | Mathematical embeddings represent a revolution in how computers process and understand natural language. Texts are converted into high-dimensionality vectors, where each dimension captures specific aspects of semantic meaning. For the _Gemini embedding-001_ model, each text is represented by 768 floating-point numbers. 244 | 245 | Proximity in vector space represents semantic similarity, allowing mathematical algorithms to find related texts through distance calculations. For example, the phrases _"company revenue"_ and _"corporate income"_ would produce close vectors in multidimensional space. 246 | 247 | _pgVector_ adds native vector capabilities to PostgreSQL, including vector data type for efficient storage, HNSW (Hierarchical Navigable Small World) indexes for fast search, and similarity operations like cosine distance, Euclidean distance, and inner product. 248 | 249 | ## Advanced Docker Configuration 250 | 251 | The `docker-compose.yml` file defines complete infrastructure for the RAG system. The PostgreSQL service uses the **pgvector/pgvector:pg17** image which includes PostgreSQL 17 with pre-installed pgVector extension. 252 | 253 |
docker-compose.yml 254 |
255 | 256 | ```yaml 257 | services: 258 | # Main service: PostgreSQL with pgVector extension 259 | postgres: 260 | image: pgvector/pgvector:pg17 261 | container_name: postgres_rag_ts 262 | environment: 263 | POSTGRES_USER: postgres 264 | POSTGRES_PASSWORD: postgres 265 | POSTGRES_DB: rag 266 | ports: 267 | - "5432:5432" 268 | volumes: 269 | # Data persistence 270 | - postgres_data:/var/lib/postgresql/data 271 | healthcheck: 272 | # Checks if the database is ready 273 | test: ["CMD-SHELL", "pg_isready -U postgres -d rag"] 274 | interval: 10s 275 | timeout: 5s 276 | retries: 5 277 | restart: unless-stopped 278 | 279 | # Auxiliary service: Initializes pgVector extension 280 | bootstrap_vector_ext: 281 | image: pgvector/pgvector:pg17 282 | depends_on: 283 | postgres: 284 | condition: service_healthy 285 | entrypoint: ["/bin/sh", "-c"] 286 | command: > 287 | PGPASSWORD=postgres 288 | psql "postgresql://postgres@postgres:5432/rag" -v ON_ERROR_STOP=1 289 | -c "CREATE EXTENSION IF NOT EXISTS vector;" 290 | restart: "no" 291 | 292 | volumes: 293 | postgres_data: 294 | ``` 295 | 296 |
297 |
298 | 299 | The `bootstrap_vector_ext` service ensures that the pgVector extension is created automatically after PostgreSQL is operational. The healthcheck monitors database availability before initializing dependencies. 300 | 301 | ## Infrastructure Initialization and Verification 302 | 303 | Infrastructure initialization is done through the command: 304 | 305 | ```bash 306 | docker-compose up -d 307 | ``` 308 | 309 | This command starts containers in daemon mode. Status verification is performed with: 310 | 311 | ```bash 312 | docker ps 313 | ``` 314 | 315 | This command lists active containers. Logs can be monitored with: 316 | 317 | ```bash 318 | docker logs postgres_rag_ts 319 | ``` 320 | 321 | This command allows identifying initialization problems. 322 | 323 | ## Google Gemini Integration: Advanced AI Client 324 | 325 | ### In-depth Theory of Embeddings 326 | 327 | Embeddings represent one of the most significant innovations in natural language processing, converting discrete text representations into continuous vectors of real numbers. These vectors capture complex semantic relationships, allowing mathematical operations on linguistic concepts. 328 | 329 | The 768-number dimensionality for the embedding-001 model offers sufficient space to represent subtle semantic nuances while maintaining computational efficiency. Close vectors in multidimensional space correspond to semantically similar texts, allowing mathematical similarity search. 330 | 331 | Vector operations allow conceptual manipulation, where differences and sums of vectors can reveal analogical relationships. The classic example _"king" - "man" + "woman" ≈ "queen"_ demonstrates how embeddings capture abstract relational structures. 332 | 333 | ### Robust Google Client Implementation 334 | 335 | The Google client implementation encapsulates all communication with Gemini APIs, offering clean interface and robust error handling. 336 | 337 |
src/google-client.ts 338 |
339 | 340 | ```typescript 341 | import { config } from 'dotenv'; 342 | import { GoogleGenerativeAI } from '@google/generative-ai'; 343 | import { Embeddings } from '@langchain/core/embeddings'; 344 | 345 | config(); 346 | 347 | export interface ChatMessage { 348 | role: 'system' | 'user' | 'assistant'; 349 | content: string; 350 | } 351 | 352 | export class GoogleClient { 353 | private googleApiKey: string; 354 | private embeddingModel: string; 355 | private chatModel: string; 356 | private genAI: GoogleGenerativeAI; 357 | 358 | constructor() { 359 | this.googleApiKey = process.env.GOOGLE_API_KEY || ''; 360 | this.embeddingModel = process.env.GOOGLE_EMBEDDING_MODEL || ''; 361 | this.chatModel = process.env.GOOGLE_CHAT_MODEL || ''; 362 | 363 | if (!this.googleApiKey) { 364 | throw new Error('Google API key is not set in environment variables.'); 365 | } 366 | 367 | this.genAI = new GoogleGenerativeAI(this.googleApiKey); 368 | } 369 | 370 | async getEmbeddings(texts: string[]): Promise { 371 | const embeddings: number[][] = []; 372 | 373 | for(const text of texts) { 374 | try { 375 | const model = this.genAI.getGenerativeModel({ model: 'embedding-001' }); 376 | const result = await model.embedContent(text); 377 | 378 | if (result.embedding && result.embedding.values) { 379 | embeddings.push(result.embedding.values); 380 | } else { 381 | console.log(`No embedding returned for text: ${text}`); 382 | const dummySize = 768; 383 | embeddings.push(new Array(dummySize).fill(0)); 384 | } 385 | } catch (error) { 386 | console.log(`Error generating embedding: ${error}`); 387 | const dummySize = 768; 388 | embeddings.push(new Array(dummySize).fill(0)); 389 | } 390 | } 391 | 392 | return embeddings; 393 | } 394 | 395 | async chatCompletions(messages: ChatMessage[], temperature: number = 0.1): Promise { 396 | try { 397 | const model = this.genAI.getGenerativeModel({ 398 | model: this.chatModel, 399 | generationConfig: { 400 | temperature, 401 | maxOutputTokens: 1000, 402 | } 403 | }); 404 | 405 | let prompt = ''; 406 | for (const message of messages) { 407 | const { role, content } = message; 408 | 409 | if (role === 'system') { 410 | prompt += `Instructions: ${content}\n\n`; 411 | } else if (role === 'user') { 412 | prompt += `${content}\n`; 413 | } else if (role === 'assistant') { 414 | prompt += `Assistant: ${content}\n`; 415 | } 416 | } 417 | 418 | const result = await model.generateContent(prompt); 419 | return result.response.text(); 420 | } catch (error) { 421 | console.log(`Error generating chat completion: ${error}`); 422 | return 'Sorry, an error occurred while generating the response.'; 423 | } 424 | } 425 | } 426 | ``` 427 | 428 |
429 |
430 | 431 | The `GoogleClient` class manages configuration and communication with Gemini APIs. The `getEmbeddings` method processes texts in batches, implementing graceful error handling and fallback for failure cases. `chatCompletions` converts structured messages into prompts optimized for Gemini. 432 | 433 | The `GoogleEmbeddings` class extends LangChain.js abstractions for seamless integration with existing frameworks. 434 | 435 |
src/google-embeddings.ts
436 | 437 | ```typescript 438 | export class GoogleEmbeddings extends Embeddings { 439 | private client: GoogleClient; 440 | 441 | constructor() { 442 | super({}); 443 | this.client = new GoogleClient(); 444 | } 445 | 446 | async embedDocuments(texts: string[]): Promise { 447 | console.log(`Generating embeddings for ${texts.length} documents...`); 448 | 449 | const batchSize = 10; // Processing 10 texts at a time for better optimization 450 | const allEmbeddings: number[][] = []; 451 | 452 | for(let i = 0; i < texts.length; i += batchSize) { 453 | const batchTexts = texts.slice(i, i + batchSize); 454 | const batchEmbeddings = await this.client.getEmbeddings(batchTexts); 455 | allEmbeddings.push(...batchEmbeddings); 456 | 457 | console.log(`Batch ${Math.floor(i / batchSize) + 1}: ${batchTexts.length} processed texts`); 458 | } 459 | 460 | return allEmbeddings; 461 | } 462 | 463 | // Method for embedding a single query 464 | async embedQuery(text: string): Promise { 465 | const embeddings = await this.client.getEmbeddings([text]); 466 | return embeddings[0]; 467 | } 468 | } 469 | 470 | // Factory function to create GoogleClient instances 471 | export function getGoogleClient(): GoogleClient { 472 | return new GoogleClient(); 473 | } 474 | ``` 475 | 476 |
477 |
478 | 479 | ## Secure Environment Configuration 480 | 481 | The `.env` file centralizes sensitive configuration, separating credentials from source code for security and deployment flexibility. 482 | 483 | ```text 484 | GOOGLE_API_KEY=your_google_api_key_here 485 | GOOGLE_EMBEDDING_MODEL=models/embedding-001 486 | GOOGLE_CHAT_MODEL=gemini-2.0-flash 487 | DATABASE_URL=postgresql://postgres:postgres@localhost:5432/rag 488 | PG_VECTOR_COLLECTION_NAME=pdf_documents 489 | PDF_PATH=./document.pdf 490 | ``` 491 | 492 | > Note: to create a Google Gemini API Key, follow the steps described in the official documentation: **[AI Studio - Google](https://aistudio.google.com/apikey)** and click on: `Create API Key`. 493 | 494 | ## Ingestion System: PDF to Intelligent Vectors 495 | 496 | ### Advanced Chunking Theory 497 | 498 | Chunking represents one of the most critical aspects in RAG systems, determining the quality and relevance of responses. The fundamental challenge is that LLMs have limited context windows, while documents can be extensive, creating the need for intelligent segmentation. 499 | 500 | The chunking strategy must balance context size with information specificity. Chunks that are too large may contain irrelevant information that dilutes relevance. Chunks that are too small may lack sufficient context for complete understanding. 501 | 502 | The `RecursiveCharacterTextSplitter` (from LangChain.js) is very useful in textual documents, as it preserves the natural structure of paragraphs and sentences. In this case, parameters like `chunk_size` around 1,000 characters and `chunk_overlap` of 150–200 work as a good starting point, maintaining balance between context and specificity. 503 | 504 | However, since this project works with _tabular PDF_, this strategy is not the most effective. For tables, we prefer to break the document line by line, ensuring that each record is an independent chunk. Additionally, we include the table header in each fragment to maintain semantic clarity. This way, overlap is unnecessary (kept at 0) and separators are adapted to prioritize line breaks. 505 | 506 | This approach ensures that each tabular entry is preserved integrally and improves precision when retrieving information via RAG. 507 | 508 | ## Detailed `RecursiveCharacterTextSplitter` Algorithm 509 | 510 | The algorithm follows an intelligent fallback strategy that tries to break by natural separators before resorting to artificial breaks. First, it tries to break by paragraphs using double line breaks. If resulting chunks still exceed maximum size, then it breaks by simple lines. For still large chunks, it breaks by spaces between words. As a last resort, it breaks character by character. 511 | 512 | This approach ensures that related information stays together whenever possible, preserving semantic coherence necessary for effective retrieval. 513 | 514 | ## Complete Ingestion Implementation 515 | 516 | The ingestion implementation combines PDF extraction, intelligent segmentation, embedding generation, and vector storage in an integrated pipeline. 517 | 518 |
src/ingest.ts 519 |
520 | 521 | ```typescript 522 | import { config } from 'dotenv'; 523 | import { Document } from '@langchain/core/documents'; 524 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 525 | import { GoogleEmbeddings } from './google-client'; 526 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 527 | import { PDFLoader as LangChainPDFLoader } from '@langchain/community/document_loaders/fs/pdf'; 528 | 529 | config(); 530 | 531 | class PDFLoader { 532 | constructor(private filePath: string) {} 533 | 534 | async load(): Promise { 535 | try { 536 | console.log(`Reading PDF file: ${this.filePath}`); 537 | 538 | const langChainLoader = new LangChainPDFLoader(this.filePath); 539 | const documents = await langChainLoader.load(); 540 | 541 | console.log(`PDF loaded successfully! Found ${documents.length} pages`); 542 | return documents; 543 | } catch (error) { 544 | console.error('Error loading PDF:', error); 545 | throw error; 546 | } 547 | } 548 | 549 | async ingestToVectorStore(): Promise { 550 | try { 551 | console.log('Starting PDF ingestion process...'); 552 | 553 | const rawDocuments = await this.load(); 554 | console.log(`PDF loaded: ${rawDocuments.length} sections`); 555 | 556 | console.log('Splitting documents into chunks...'); 557 | const textSplitter = new RecursiveCharacterTextSplitter({ 558 | chunkSize: 400, 559 | chunkOverlap: 0, 560 | }); 561 | 562 | const splitDocuments = await textSplitter.splitDocuments(rawDocuments); 563 | console.log(`Documents split into ${splitDocuments.length} chunks`); 564 | 565 | console.log('Initializing Google embeddings...'); 566 | const embeddings = new GoogleEmbeddings(); 567 | 568 | console.log('Connecting to PostgreSQL vector store...'); 569 | const vectorStore = await PGVectorStore.initialize(embeddings, { 570 | postgresConnectionOptions: { 571 | connectionString: process.env.DATABASE_URL, 572 | }, 573 | tableName: process.env.PG_VECTOR_COLLECTION_NAME || 'pdf_documents', 574 | columns: { 575 | idColumnName: 'id', 576 | vectorColumnName: 'vector', 577 | contentColumnName: 'content', 578 | metadataColumnName: 'metadata', 579 | }, 580 | }); 581 | 582 | console.log('Adding documents to vector store...'); 583 | await vectorStore.addDocuments(splitDocuments); 584 | 585 | console.log('PDF ingestion completed successfully!'); 586 | console.log(`Total chunks processed: ${splitDocuments.length}`); 587 | 588 | await vectorStore.end(); 589 | 590 | } catch (error) { 591 | console.error('Error during PDF ingestion:', error); 592 | process.exit(1); 593 | } 594 | } 595 | } 596 | 597 | async function main() { 598 | const pdfPath = './document.pdf'; 599 | const loader = new PDFLoader(pdfPath); 600 | await loader.ingestToVectorStore(); 601 | } 602 | 603 | // Run ingestion 604 | main(); 605 | ``` 606 | 607 |
608 |
609 | 610 | The `PDFLoader` class encapsulates the entire ingestion process, from file loading to storage in the vector database. The `load` method uses LangChain.js PDFLoader for robust text extraction. `ingestToVectorStore` coordinates the complete processing pipeline. 611 | 612 | ## Automatic PostgreSQL Schema 613 | 614 | The `PGVectorStore` automatically creates an optimized schema for vector storage and search. The pdf_documents table includes: 615 | 616 | - **id -** UUID primary key for unique identification 617 | - **content -** Original text of the chunk extracted from PDF 618 | - **vector -** 768-dimension embeddings generated by Gemini 619 | - **metadata -** Structural information like page, source, and context 620 | 621 | ```sql 622 | CREATE TABLE pdf_documents ( 623 | id UUID PRIMARY KEY, 624 | content TEXT, 625 | vector VECTOR(768), 626 | metadata JSONB 627 | ); 628 | 629 | CREATE INDEX ON pdf_documents USING hnsw (vector vector_cosine_ops); 630 | ``` 631 | 632 | The `HNSW index` optimizes vector search, offering logarithmic complexity versus traditional linear search. 633 | 634 | ## RAG Search System: Intelligent Retrieval + Generation 635 | 636 | ### Advanced Semantic Search Theory 637 | 638 | The semantic search pipeline represents a fundamental transformation in how computational systems find relevant information. Unlike traditional keyword search, semantic search uses vector representations to capture conceptual meaning. 639 | 640 | The process begins with converting the user's question into a vector embedding using the same model used during ingestion. This query embedding is then compared with all stored embeddings using mathematical similarity metrics. The HNSW algorithm accelerates this comparison, reducing complexity from O(n) to O(log n). 641 | 642 | Results are ranked by _similarity score_, where lower values indicate greater similarity in cosine space. _Context assembly_ concatenates the most relevant chunks, creating rich context for response generation. 643 | 644 | ## RAG Search System: Intelligent Retrieval + Generation 645 | 646 | ### Advanced Semantic Search Theory 647 | 648 | The semantic search pipeline represents a fundamental transformation in how computational systems find relevant information. Unlike traditional keyword search, semantic search uses vector representations to capture conceptual meaning. 649 | 650 | The process begins with converting the user's question into a vector embedding using the same model used during ingestion. This query embedding is then compared with all stored embeddings using mathematical similarity metrics. The HNSW algorithm accelerates this comparison, reducing complexity from O(n) to O(log n). 651 | 652 | Results are ranked by similarity score, where lower values indicate greater similarity in cosine space. Context assembly concatenates the most relevant chunks, creating rich context for response generation. 653 | 654 | ## Anti-Hallucination Prompt Engineering 655 | 656 | The prompt template implements sophisticated strategies to _prevent hallucinations_ and ensure response factuality. Explicit instructions emphasize exclusive use of provided context. Fallback response provides default answer for cases where information is not available. Low temperature of 0.1 reduces creativity and increases determinism. Negative examples demonstrate cases where the correct answer is "I don't know". 657 | 658 | This approach ensures that the system always recognizes limitations of available knowledge, preferring to admit ignorance rather than invent information. 659 | 660 | ## CLI Interface: Exceptional User Experience 661 | 662 | ### User-Centered Design 663 | 664 | The CLI interface was designed considering user experience principles applied to AI systems. Immediate feedback through progress indicators keeps users informed about ongoing operations. Special commands like `help, status, clear, and exit` offer intuitive control. Graceful error handling presents informative messages that guide users in problem resolution. Non-blocking asynchronous interface maintains responsiveness even during computationally intensive operations. 665 | 666 | ### Interactive Interface Implementation 667 | 668 | The implementation combines native Node.js readline with advanced command logic to create a fluid and intuitive experience. 669 | 670 |
src/chat.ts 671 |
672 | 673 | ```typescript 674 | import { createInterface } from "readline"; 675 | import { searchPrompt, RAGSearch } from "./search"; 676 | 677 | // Function to print initial banner with system information 678 | function printBanner(): void { 679 | console.log('='.repeat(60)); 680 | console.log('RAG CHAT - PDF Question and Answer System'); 681 | console.log('Powered by Google Gemini + LangChain + pgVector'); 682 | console.log('⚡ TypeScript + Node.js Implementation'); 683 | console.log('='.repeat(60)); 684 | console.log("Special commands:"); 685 | console.log(" • 'exit, quit, exit' - Closes the program"); 686 | console.log(" • 'help' - Shows available commands"); 687 | console.log(" • 'clear' - Clears the screen"); 688 | console.log(" • 'status' - Checks system status"); 689 | console.log('='.repeat(60)); 690 | } 691 | 692 | // Function to print help instructions 693 | function printHelp(): void { 694 | console.log('\n AVAILABLE COMMANDS:'); 695 | console.log(' exit, quit, exit - Closes the program'); 696 | console.log(' help - Shows available commands'); 697 | console.log(' clear - Clears the screen'); 698 | console.log(' status - Checks system status'); 699 | console.log(' [any text] - Asks a question about the PDF'); 700 | console.log('\n USAGE TIPS:'); 701 | console.log(' • Ask specific questions about the PDF content'); 702 | console.log(' • The system responds only based on the document'); 703 | console.log(' • Out-of-context questions return "I don\'t have information"'); 704 | console.log(); 705 | } 706 | 707 | // Function to clear the console screen 708 | function clearScreen(): void { 709 | console.clear(); 710 | } 711 | 712 | async function checkStatus(searchSystem: RAGSearch | null): Promise { 713 | console.log('\n RAG SYSTEM STATUS:'); 714 | console.log('='.repeat(40)); 715 | 716 | if (!searchSystem) { 717 | console.log('System: NOT INITIALIZED'); 718 | console.log('\n TROUBLESHOOTING CHECKLIST:'); 719 | console.log(' 1. Is PostgreSQL running?'); 720 | console.log(' → Command: docker compose up -d'); 721 | console.log(' 2. Has ingestion been executed?'); 722 | console.log(' → Command: npm run ingest'); 723 | console.log(' 3. Is the API Key configured?'); 724 | console.log(' → File: .env (GOOGLE_API_KEY)'); 725 | console.log(' 4. Are dependencies installed?'); 726 | console.log(' → Command: npm install'); 727 | return; 728 | } 729 | 730 | try { 731 | const systemStatus = await searchSystem.getSystemStatus(); 732 | 733 | console.log('RAG System: OPERATIONAL'); 734 | console.log('PostgreSQL Connection: OK'); 735 | console.log('pgVector Extension: OK'); 736 | console.log('Google Gemini API: OK'); 737 | console.log(`Vector Database: ${systemStatus.isReady ? 'READY' : 'NOT READY'}`); 738 | 739 | if (systemStatus.chunksCount > 0) { 740 | console.log(`Available chunks: ${systemStatus.chunksCount}`); 741 | } 742 | 743 | console.log('\n System ready to answer questions!'); 744 | } catch (error) { 745 | console.log('Status: PARTIALLY OPERATIONAL'); 746 | console.log(`Error checking system status: ${error}`); 747 | } 748 | 749 | console.log('='.repeat(40)); 750 | } 751 | 752 | // Main function to initialize RAG system and handle user input 753 | async function main(): Promise { 754 | console.log('STEP 6: Initializing the RAG Chat CLI Interface'); 755 | 756 | printBanner(); 757 | 758 | console.log('\n PHASE 1: INITIALIZING RAG SYSTEM'); 759 | const searchSystem = await searchPrompt(); 760 | 761 | if (!searchSystem) { 762 | console.log('\n CRITICAL ERROR: RAG system could not be initialized!'); 763 | console.log('\n POSSIBLE CAUSES AND SOLUTIONS:'); 764 | console.log(' 1. PostgreSQL is not running'); 765 | console.log(' → Solution: docker compose up -d'); 766 | console.log(' 2. Ingestion process has not been executed'); 767 | console.log(' → Solution: npm run ingest'); 768 | console.log(' 3. GOOGLE_API_KEY is not configured or invalid'); 769 | console.log(' → Solution: Configure in the .env file'); 770 | console.log(' 4. Node.js dependencies are not installed'); 771 | console.log(' → Solution: npm install'); 772 | console.log(' 5. pgVector extension has not been created'); 773 | console.log(' → Solution: Check Docker logs'); 774 | 775 | process.exit(1); 776 | } 777 | 778 | console.log('PHASE 1: RAG system initialized successfully!\n'); 779 | 780 | // PHASE 2: SETUP COMMAND LINE INTERFACE 781 | const rl = createInterface({ 782 | input: process.stdin, 783 | output: process.stdout, 784 | prompt: '\n Ask a question: ' 785 | }); 786 | 787 | // Helper function to capture user input asynchronously 788 | const askQuestion = (prompt: string): Promise => { 789 | return new Promise((resolve) => { 790 | rl.question(prompt, resolve); 791 | }); 792 | }; 793 | 794 | console.log('System ready! Type your question or "help" to see commands.'); 795 | 796 | // PHASE 3: MAIN CHAT LOOP 797 | while(true) { 798 | try { 799 | // Capture user input 800 | const userInput = (await askQuestion('\n Ask a question: ')).trim(); 801 | 802 | // COMMAND PROCESSING: Analyze whether it is a special command or a question 803 | const command = userInput.toLowerCase(); 804 | 805 | // Exit commands 806 | if (['exit', 'quit', 'sair', 'q'].includes(command)) { 807 | console.log('\n Thank you for using RAG Chat. Goodbye!\n'); 808 | console.log('System shutting down...'); 809 | break; 810 | } 811 | 812 | // Help command 813 | if (['ajuda', 'help', 'h', '?'].includes(command)) { 814 | printHelp(); 815 | continue; 816 | } 817 | 818 | // Clear screen command 819 | if (['limpar', 'clear', 'cls'].includes(command)) { 820 | clearScreen(); 821 | printBanner(); 822 | continue; 823 | } 824 | 825 | // Status command 826 | if (['status', 'info', 's'].includes(command)) { 827 | await checkStatus(searchSystem); 828 | continue; 829 | } 830 | 831 | // Validate empty input 832 | if (!userInput) { 833 | console.log('Empty input. Type a question or "help" to see commands.'); 834 | continue; 835 | } 836 | 837 | // QUESTION PROCESSING: Forward the question to the RAG system 838 | console.log('\n Processing your question...'); 839 | console.log('Searching PDF knowledge...'); 840 | 841 | const startTime = Date.now(); 842 | 843 | // Call the complete RAG pipeline 844 | const answer = await searchSystem.generateAnswer(userInput); 845 | 846 | const endTime = Date.now(); 847 | const responseTime = ((endTime - startTime) / 1000).toFixed(2); 848 | 849 | // FORMATTED RESPONSE DISPLAY 850 | console.log('\n' + '='.repeat(80)); 851 | console.log(`QUESTION: ${userInput}`); 852 | console.log('='.repeat(80)); 853 | console.log(`🤖 RESPONSE:`); 854 | console.log(answer); 855 | console.log('='.repeat(80)); 856 | console.log(`⚡ Response time: ${responseTime}s`); 857 | } catch (error) { 858 | // ERROR HANDLING 859 | if (error instanceof Error && error.message.includes('SIGINT')) { 860 | // Ctrl+C was pressed 861 | console.log('\n\n Interruption detected (Ctrl+C)'); 862 | console.log('👋 Chat closed by user. See you next time!'); 863 | break; 864 | } else { 865 | // Other errors 866 | console.log(`\n Unexpected error during processing:`); 867 | console.log(` ${error}`); 868 | console.log('\n You can:'); 869 | console.log(' • Try again with another question'); 870 | console.log(' • Type "status" to check the system'); 871 | console.log(' • Type "exit" to quit'); 872 | } 873 | } 874 | } 875 | 876 | rl.close(); 877 | } 878 | 879 | // EVENT HANDLERS: Operating system signal management 880 | 881 | // Handler for Ctrl+C (SIGINT) 882 | process.on('SIGINT', () => { 883 | console.log('\n\n Interrupt signal received (Ctrl+C)'); 884 | console.log('Cleaning up resources...'); 885 | console.log('RAG Chat closed. See you later!'); 886 | process.exit(0); 887 | }); 888 | 889 | // Handler for uncaught errors 890 | process.on('uncaughtException', (error) => { 891 | console.error('\n Uncaught FATAL ERROR:', error); 892 | console.error('Restart the application: npm run start'); 893 | process.exit(1); 894 | }); 895 | 896 | // Handler for rejected promises 897 | process.on('unhandledRejection', (reason, promise) => { 898 | console.error('\n Unhandled rejected promise:', reason); 899 | console.error('Promise:', promise); 900 | }); 901 | 902 | // ENTRY POINT: Run the main function 903 | main().catch((error) => { 904 | console.error('\n FATAL ERROR in main application:', error); 905 | console.error('Try restarting: npm run start'); 906 | process.exit(1); 907 | }); 908 | ``` 909 | 910 |
911 |
912 | 913 | The `RAGSearch` class encapsulates complete search and generation functionality. `searchDocuments` executes vector search and returns formatted results with scores. `generateAnswer` orchestrates the complete RAG pipeline. 914 | 915 | The `printBanner` function presents essential information about the system and available commands. `checkStatus` offers detailed component diagnostics, facilitating troubleshooting. The main loop processes commands and questions with robust error handling. 916 | 917 | ## Comprehensive Execution and Validation 918 | 919 | ### Optimized Execution Sequence 920 | 921 | Execution follows a logical sequence that ensures correct initialization of all components. First, initialize infrastructure: 922 | 923 | ```bash 924 | docker-compose up -d 925 | ``` 926 | 927 | This command brings up PostgreSQL with pgVector. Verify container status: 928 | 929 | ```bash 930 | docker ps 931 | ``` 932 | 933 | This command confirms correct operation. Execute ingestion to process PDF documents: 934 | 935 | ```bash 936 | npm run dev:ingest 937 | ``` 938 | 939 | Finally, start interactive chat for system interaction: 940 | 941 | ```bash 942 | npm run dev:chat 943 | ``` 944 | 945 | ## Comprehensive Test Scenarios 946 | 947 | The system supports various test scenarios that validate complete functionality. Questions within the PDF context should return responses based exclusively on processed content. Questions outside the context should result in the default response "I don't have the necessary information to answer your question." Special commands like status, help, and clear should work correctly. 948 | 949 | ## Systematic Troubleshooting 950 | 951 | Common problems have well-defined solutions that can be identified through specific error messages: 952 | 953 | - **Error: "Google API key is not set"**: This error indicates the need to configure the GOOGLE_API_KEY environment variable in the .env file. Verify that the file contains the valid API key obtained from Google AI Studio. 954 | 955 | - **Error: "Vector store not initialized"**: This message suggests that PostgreSQL is not operational or the ingestion process was not executed. Confirm that Docker containers are running and execute document ingestion. 956 | 957 | - **Error: "No documents found"**: This problem indicates that the ingestion process needs to be executed to populate the vector database with processed PDF chunks. 958 | 959 | - **Error: "Connection refused"**: This failure points to PostgreSQL being offline, resolvable by checking Docker container status and reinitializing infrastructure if necessary. 960 | 961 | ## Advanced Production Considerations 962 | 963 | ### Optimized Performance and Scalability 964 | 965 | The implemented optimizations ensure adequate performance for production use. Batch processing during ingestion implements rate limiting for external APIs, avoiding throttling. Connection pooling in PostgreSQL allows multiple simultaneous connections. HNSW indexing offers sub-second search even with millions of vectors. Asynchronous operations maintain application responsiveness. 966 | 967 | Performance metrics demonstrate system efficiency. Ingestion processes a 50-page PDF in approximately 30 seconds. Search returns results in 2-3 seconds per question. Throughput supports more than 100 questions per minute on modest hardware. 968 | 969 | ## Robust Security and Reliability 970 | 971 | Security implementations follow best practices for production applications. Environment variables isolate secrets from source code. Input validation and sanitization prevent injection attacks. Robust error handling prevents leakage of sensitive information. Graceful shutdown handling ensures proper resource cleanup. 972 | 973 | Recommended monitoring includes structured logs using libraries like `Winston` or `Pino`. Performance metrics can be collected with `Prometheus`. Automatic health checks monitor component availability. Rate limiting per user prevents resource abuse. This remains a tip for future improvements. 974 | 975 | ## Future Improvements Roadmap 976 | 977 | The technical roadmap identifies evolution opportunities. Migration from CLI to REST API will facilitate integration with web applications. `React` or `Next.js` interface will offer modern visual experience. Multi-tenancy support will allow multiple users and documents. `Redis` cache for frequent responses will reduce latency. `OpenTelemetry` integration will provide complete observability. 978 | 979 | ## References and Resources for Deep Dive 980 | 981 | ### Project Documentation and Repository 982 | 983 | The complete code for this RAG system is available in the official repository **[rag-search-ingestion-langchainjs-gemini](https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini)**, where you will find functional implementation, detailed installation instructions, usage examples, and complete documentation of all developed components. The repository includes Docker configuration files ready for production, automation scripts for development, and specific test cases that demonstrate practical application of the concepts presented in this article. 984 | 985 | ### RAG Theoretical Foundations 986 | 987 | For in-depth understanding of theoretical foundations, the original paper "**[Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks" by Lewis et al](https://dl.acm.org/doi/abs/10.5555/3495724.3496517)**. at the NeurIPS 2020 conference establishes the fundamental principles of RAG architecture. The research "**[Dense Passage Retrieval for Open-Domain Question Answering" by Karpukhin et al](https://arxiv.org/abs/2004.04906)**. explores advanced dense retrieval techniques that underpin modern semantic search systems. The work "**[In-Context Retrieval-Augmented Language Models](https://arxiv.org/abs/2302.00083)**" presents recent evolutions in dynamic context integration in language models. 988 | 989 | ### Technologies and Frameworks 990 | 991 | The official LangChain.js documentation at **[https://js.langchain.com/](https://js.langchain.com/)** offers complete guides on AI pipeline implementation, including specific tutorials on integration with different embedding providers and language models. The Google AI Developer Documentation at **[https://ai.google.dev/docs](https://ai.google.dev/docs)** provides detailed technical specifications about Gemini APIs, including rate limits, prompt engineering best practices, and performance optimizations. 992 | 993 | For PostgreSQL and pgVector, the official documentation at **[https://github.com/pgvector/pgvector](https://github.com/pgvector/pgvector)** contains technical specifications about HNSW index implementation, performance configurations, and scaling strategies for large volumes of vector data. The PostgreSQL Documentation at **[https://www.postgresql.org/docs/](https://www.postgresql.org/docs/)** offers fundamentals on database administration, query optimization, and advanced configurations for high-performance applications. 994 | 995 | ### Embedding Models and Vector Search 996 | 997 | Deep understanding of embeddings can be expanded through the research "**[Attention Is All You Need](https://arxiv.org/abs/1706.03762)**" which introduces the Transformer architecture fundamental to modern embedding models. The paper "**[Efficient Estimation of Word Representations in Vector Space" by Mikolov et al](https://arxiv.org/abs/1301.3781)**. establishes mathematical foundations of semantic vector representations. For vector search algorithms, "**[Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs](https://arxiv.org/abs/1603.09320)**" details implementation and optimizations of the HNSW algorithm used by pgVector. 998 | 999 | ### Prompt Engineering and Hallucination Control 1000 | 1001 | The research "**[Constitutional AI: Harmlessness from AI Feedback](https://arxiv.org/abs/2212.08073)**" explores advanced techniques for behavior control in language models. "**[Chain-of-Thought Prompting Elicits Reasoning in Large Language Models](https://arxiv.org/abs/2201.11903)**" demonstrates prompt structuring strategies for complex reasoning. "**[Instruction Following with Large Language Models](https://arxiv.org/abs/2506.13734)**" offers insights on effective instruction design for RAG systems. 1002 | 1003 | ### Practical Resources and Tutorials 1004 | 1005 | LangChain Cookbook at **[https://github.com/langchain-ai/langchain/tree/master/cookbook](https://github.com/langchain-ai/langchain/tree/master/cookbook)** contains practical examples of implementing different RAG patterns. Pinecone Learning Center at **[https://www.pinecone.io/learn/](https://www.pinecone.io/learn/)** offers tutorials on vector databases and semantic search applications. Weaviate Documentation at **[https://weaviate.io/developers/weaviate/](https://weaviate.io/developers/weaviate/)** presents alternatives for vector storage and their technical specificities. 1006 | 1007 | ## Author and Contributions 1008 | 1009 | This project was developed by Glaucia Lemos, A.I Developer Specialist, who shares knowledge through multiple platforms. Her social media profiles include Twitter at **[https://twitter.com/glaucia86](https://twitter.com/glaucia86)** for technical updates and development insights, LinkedIn at **[https://www.linkedin.com/in/glaucialemos/](https://www.linkedin.com/in/glaucialemos/)** for professional networking and technical articles, and YouTube at **[https://www.youtube.com/@GlauciaLemos](https://www.youtube.com/@GlauciaLemos)** for video tutorials and technical talks about modern development. 1010 | -------------------------------------------------------------------------------- /tutorial/article.md: -------------------------------------------------------------------------------- 1 | # Sistema RAG Completo: Zero to Hero com TypeScript, Docker, Google Gemini e LangChain.js 2 | 3 | ![alt text](./resource/rag-docker-ts-langchain.jpg) 4 | 5 | A implementação de sistemas de Retrieval-Augmented Generation (RAG) representa uma das abordagens mais promissoras para resolver as limitações fundamentais dos Large Language Models modernos. Este artigo apresenta uma jornada completa na construção de um sistema RAG robusto e escalável, utilizando **[TypeScript](https://www.typescriptlang.org/)** como base de desenvolvimento, **[Docker](https://www.docker.com/)** para orquestração de infraestrutura, **[Google Gemini](https://ai.google.dev/gemini-api/docs/quickstart?hl=pt-br)** para inteligência artificial e **[LangChain.js](https://js.langchain.com/docs/introduction/)** como framework de integração. 6 | 7 | Nossa solução permite que usuários façam perguntas em linguagem natural sobre documentos PDF, combinando busca semântica avançada com geração de respostas contextuais precisas. O sistema demonstra como integrar tecnologias de ponta para criar aplicações de IA práticas e escaláveis, abordando desde a extração e processamento de documentos até a geração de respostas contextualmente relevantes. 8 | 9 | As tecnologias principais que formam o backbone desta implementação incluem Node.js versão 22 ou superior para runtime JavaScript moderno, TypeScript 5.9 ou superior para tipagem estática robusta, LangChain.js 0.3 ou superior como framework de orquestração de IA, Google Gemini API para embeddings e geração de texto, PostgreSQL 15 ou superior com a extensão pgVector para armazenamento e busca vetorial, e Docker para containerização e implantação simplificada. 10 | 11 | > observação: como muitos já sabem, estou fazendo o **[MBA em Engenheria de Software em A.I na FullCycle](https://ia.fullcycle.com.br/mba-ia/?utm_source=google_search&utm_campaign=search_mba-arquitetura&utm_medium=curso_especifico&utm_content=search_mba-arquitetura&gad_source=1&gad_campaignid=21917349974&gclid=Cj0KCQjww4TGBhCKARIsAFLXndQejvz0K1XTOHQ3CSglzOlQfVH64T2CS1qZnwkiyChx0HoXzaK4KY0aAosOEALw_wcB)**, e este artigo é baseado em um dos projetos práticos do curso. Não estou fazendo jabá, apenas compartilhando o conhecimento aprendido e para que outros possam se beneficiar também. Mas, caso queira saber mais sobre o MBA, clique no link anterior. 12 | 13 | ## Compreendendo RAG e sua importância fundamental 14 | 15 | ### O Desafio dos LLMs Tradicionais 16 | 17 | Large Language Models como GPT, Claude e Gemini revolucionaram o processamento de linguagem natural, mas enfrentam limitações que impedem sua aplicação direta em cenários empresariais e especializados. O conhecimento destes modelos permanece estático, sendo limitado aos dados de treinamento até uma data específica, criando uma lacuna temporal que pode ser crítica em domínios onde informações atualizadas são essenciais. 18 | 19 | Além disso, estes modelos tendem a produzir alucinações, inventando informações quando não possuem conhecimento suficiente sobre um tópico. Esta característica pode ser particularmente problemática em aplicações que exigem precisão factual. Os LLMs também carecem de contexto específico sobre dados internos de empresas ou documentos especializados, limitando sua utilidade em cenários onde conhecimento especializado é necessário. 20 | 21 | A impossibilidade de atualização pós-treinamento representa outro obstáculo significativo. Uma vez treinado, um modelo não pode aprender novos fatos ou incorporar informações atualizadas sem um processo completo de retreinamento, que é custoso e complexo. 22 | 23 | ## RAG como solução arquitetural elegante 24 | 25 | Retrieval-Augmented Generation emerge como uma arquitetura que resolve elegantemente essas limitações através da combinação de dois componentes fundamentais. 26 | 27 | - **O componente de Retrieval (Recuperação):** funciona como um sistema de busca inteligente que encontra informações relevantes em uma base de conhecimento externa. 28 | 29 | - **O componente de Generation (Geração):** utiliza um LLM para gerar respostas baseadas exclusivamente no contexto recuperado, garantindo que as respostas sejam fundamentadas em informações verificáveis. 30 | 31 | O fluxo de processamento segue uma sequência lógica onde uma consulta do usuário é convertida em embedding vetorial, que é então usado para busca por similaridade no banco vetorial. Os documentos mais relevantes são recuperados e concatenados em um contexto, que é fornecido ao LLM junto com a pergunta original para geração da resposta final. 32 | 33 | ## Vantagens técnicas transformadoras 34 | 35 | A arquitetura RAG oferece factualidade através de respostas baseadas em fontes verificáveis, eliminando a necessidade de confiar exclusivamente no conhecimento interno do modelo. A atualização é garantida pois a base de conhecimento pode ser atualizada sem necessidade de retreinar o modelo, permitindo incorporação de novos documentos e informações em tempo real. 36 | 37 | A transparência é uma característica fundamental, pois permite rastrear as fontes das informações utilizadas na geração das respostas. A custo-efetividade é significativa, pois evita a necessidade de fine-tuning de modelos, que requer recursos computacionais massivos e expertise técnica especializada. 38 | 39 | ## Arquitetura do sistema: visão técnica abragente 40 | 41 | ### Arquitetura de alto nível detalhada 42 | 43 | A arquitetura do sistema RAG pode ser visualizada como um pipeline de processamento que transforma documentos PDF em uma base de conhecimento pesquisável e utiliza essa base para responder perguntas em linguagem natural. O processo começa com um documento PDF que passa por extração de texto, seguida por segmentação inteligente usando LangChain.js. Os segmentos resultantes são convertidos em embeddings vetoriais através do modelo Gemini. 44 | 45 | > observação: embora o artigo enfoque em arquivos PDF, numa aplicação RAG, poderíamos utilizar qualquer fonte de dados, como: bancos de dados relacionais, NoSQL, APIs, documentos Word, planilhas Excel, entre outros. 46 | 47 | Estes embeddings são armazenados em PostgreSQL com a extensão **[pgVector](https://www.postgresql.org/about/news/pgvector-070-released-2852/)**, criando uma base de conhecimento pesquisável. Quando um usuário faz uma pergunta, ela é convertida em embedding e usada para busca por similaridade no banco vetorial. Os documentos mais relevantes são recuperados e montados em contexto, que é então enviado para o Google Gemini junto com a pergunta para geração da resposta final. 48 | 49 | ### Afinal, o que são embeddings? 50 | 51 | Embeddings são representações numéricas de dados, como texto ou imagens, em um espaço vetorial de alta dimensão. Eles capturam o significado semântico dos dados, permitindo que máquinas compreendam e processem informações de maneira mais eficaz. No contexto de RAG, embeddings são usados para transformar consultas e documentos em vetores que podem ser comparados para encontrar similaridades. 52 | 53 | - Exemplo: 54 | 55 | ```text 56 | "gato" -> [0.1, 0.3, 0.5, ...] 57 | "cachorro" -> [0.2, 0.4, 0.6, ...] 58 | ``` 59 | 60 | Deixo a recomendação da documentação oficial do Gemini que explica com mais detalhes sobre embeddings: **[Embeddings](https://ai.google.dev/gemini-api/docs/embeddings?hl=pt-br)** 61 | 62 | ## Componentes tecnológicos em profundidade 63 | 64 | Para deixar a aplicação simples e fácil de executar, utilizei de interface que utilizam Node.js com TypeScript para runtime e tipagem estática robusta. A Readline Interface fornece uma CLI interativa para testes e demonstrações, permitindo interação natural com o sistema. 65 | 66 | Para processamento de documentos, usamos as seguintes bibliotecas: 67 | 68 | - **[LangChain.js](https://js.langchain.com/docs/introduction/):** serve como framework principal para aplicações LLM, oferecendo abstrações de alto nível para tarefas comuns. 69 | 70 | - **[RecursiveCharacterTextSplitter](https://js.langchain.com/docs/concepts/text_splitters/):** implementa algoritmo inteligente de chunking que preserva contexto semântico. 71 | 72 | - **[PDF-Parse](https://www.npmjs.com/package/pdf-parse):** realiza extração limpa de texto de documentos PDF. 73 | 74 | Os embeddings e IA são gerenciados através da Google Gemini API, utilizando o modelo embedding-001 para geração de embeddings de 768 dimensões e **[gemini-2.0-flash](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-0-flash?hl=pt-br)** para geração de respostas otimizadas. 75 | 76 | O banco de dados vetorial combina _PostgreSQL 15_ ou superior como banco relacional robusto com _pgVector_ como extensão para busca vetorial eficiente. _HNSW Indexing_ implementa algoritmo de busca aproximada que oferece performance para buscas em milissegundos mesmo em grandes volumes de dados. 77 | 78 | A infraestrutura utiliza _Docker Compose_ para orquestração de containers, simplificando deployment e gerenciamento de dependências. Environment Variables proporcionam configuração flexível e segura. 79 | 80 | ### O que é HNSW Indexing? 81 | 82 | _HNSW Indexing_ significa _Hierarchical Navigable Small World Graph Indexing._ 83 | É uma técnica muito usada em busca aproximada por vizinhos mais próximos _(Approximate Nearest Neighbor Search – ANN)_ em bases vetoriais, como quando você precisa recuperar embeddings de texto, imagens ou áudio de forma rápida. 84 | 85 | #### Como funciona? 86 | 87 | - Ele organiza os vetores em uma estrutura de grafo hierárquico. 88 | 89 | - Nos níveis superiores, você tem conexões mais gerais entre vetores, que permitem "pulos longos" pelo espaço de busca. 90 | 91 | - Conforme vai descendo nos níveis, os grafos ficam mais densos e conectados, permitindo buscas mais precisas e locais. 92 | 93 | - Isso cria um equilíbrio entre velocidade (saltos rápidos entre regiões) e precisão (ajuste fino nos níveis inferiores). 94 | 95 | #### Por que é importante? 96 | 97 | - **Alta performance:** consegue buscar vizinhos em milhões de vetores com baixa latência. 98 | 99 | - **Escalabilidade:** é eficiente tanto em memória quanto em tempo, mesmo em bases grandes. 100 | 101 | - **Uso comum:** é o índice padrão em várias bibliotecas de bases vetoriais, como FAISS (Facebook AI Similarity Search), Milvus, Weaviate e Pinecone. 102 | 103 | ### Exemplo prático 104 | 105 | Imagine que você tem 10 milhões de embeddings de documentos. Se fosse comparar cada consulta com todos, seria inviável. 106 | 107 | Com HNSW, você consegue encontrar os documentos semanticamente mais próximos em milissegundos, sem percorrer todos os vetores. 108 | 109 | Não estarei entrando em detalhes sobre o HNSW Indexing, mas caso queira dar uma olhada numa implementação prática usando TypeScript, deixo o link do repositório do projeto que criei: **[HNSW + Gemini + LangChain.js - Clean Architecture](https://github.com/glaucia86/hnsw-gemini-langchainjs)**. Num outro artigo, posso detalhar mais sobre o HNSW Indexing e quebrar em partes a essa implementação para que fique mais fácil de entender. 110 | 111 | ## Pipeline RAG Detalhado 112 | 113 | O pipeline de ingestão segue a sequência: 114 | 115 | > PDF → Text Extraction → Chunking → Embeddings → Vector Storage. 116 | 117 | Cada etapa é otimizada para preservar máxima informação semântica enquanto prepara os dados para busca eficiente. 118 | 119 | O pipeline de consulta executa: 120 | 121 | > User Query → Query Embedding → Similarity Search → Context Assembly → LLM Generation → Response. 122 | 123 | Este processo garante que cada resposta seja fundamentada em evidências específicas dos documentos processados. 124 | 125 | ## Configuração do Ambiente de Desenvolvimento 126 | 127 | ### Pré-requisitos Técnicos Essenciais 128 | 129 | O ambiente de desenvolvimento requer as seguintes versões mínimas: 130 | 131 | - **Node.js versão 22.0.0 ou superior** - Para suporte às funcionalidades mais recentes do JavaScript e performance otimizada 132 | - **NPM versão 10.0.0 ou superior** - Necessário para gerenciamento de dependências moderno 133 | - **Docker versão 24.0.0 ou superior** - Garante compatibilidade com recursos de containerização avançados 134 | - **Git versão 2.40.0 ou superior** - Essencial para controle de versão 135 | 136 | ara verificar as versões instaladas, execute os seguintes comandos em seu terminal: 137 | 138 | ```bash 139 | node --version # v22.0.0+ 140 | npm --version # 10.0.0+ 141 | docker --version # 24.0.0+ 142 | git --version # 2.40.0+ 143 | ``` 144 | 145 | ## Inicialização Completa do Projeto 146 | 147 | A estrutura do projeto começa com a criação de um diretório principal e subdiretório para código fonte: 148 | 149 | ```bash 150 | mkdir rag-system-typescript && cd rag-system-typescript 151 | mkdir src 152 | ``` 153 | 154 | A inicialização do Node.js é feita através do comando: 155 | 156 | ```bash 157 | npm init -y 158 | ``` 159 | 160 | Este comando cria o arquivo `package.json` com configurações padrão. 161 | 162 | As dependências de produção incluem pacotes essenciais para funcionalidade do sistema: 163 | 164 | ```bash 165 | npm install @google/generative-ai @langchain/core @langchain/community @langchain/textsplitters dotenv pg uuid 166 | ``` 167 | 168 | Estas bibliotecas fornecem integração com Google AI, framework LangChain, manipulação de variáveis de ambiente, conexão PostgreSQL e geração de identificadores únicos. 169 | 170 | As dependências de desenvolvimento garantem experiência de desenvolvimento robusta: 171 | 172 | ```bash 173 | npm install -D @types/node @types/pg @types/pdf-parse tsx typescript 174 | ``` 175 | 176 | Estas incluem definições de tipos TypeScript, compilador TypeScript e executor de desenvolvimento tsx. 177 | 178 | ## Configuração TypeScript Avançada 179 | 180 | O arquivo `tsconfig.json` define configurações de compilação que otimizam para desenvolvimento moderno e performance. 181 | 182 |
tsonfig.json 183 |
184 | 185 | ```json 186 | { 187 | "compilerOptions": { 188 | "target": "ES2022", 189 | "module": "ESNext", 190 | "moduleResolution": "node", 191 | "outDir": "./dist", 192 | "rootDir": "./src", 193 | "strict": true, 194 | "esModuleInterop": true, 195 | "skipLibCheck": true, 196 | "forceConsistentCasingInFileNames": true, 197 | "resolveJsonModule": true, 198 | "allowSyntheticDefaultImports": true, 199 | "experimentalDecorators": true, 200 | "emitDecoratorMetadata": true, 201 | "declaration": true, 202 | "declarationMap": true, 203 | "sourceMap": true, 204 | "types": ["node"], 205 | "lib": ["ES2022", "DOM"] 206 | }, 207 | "include": [ 208 | "src/**/*" 209 | ], 210 | "exclude": [ 211 | "node_modules", 212 | "dist", 213 | "**/*.test.ts", 214 | "**/*.spec.ts" 215 | ], 216 | "ts-node": { 217 | "esm": true 218 | } 219 | } 220 | ``` 221 | 222 |
223 |
224 | 225 | ## Scripts de Automação Inteligentes 226 | 227 | Os scripts no `package.json` automatizam tarefas comuns: 228 | 229 | ```json 230 | "scripts": { 231 | "build": "tsc", 232 | "start": "npm run build && node dist/chat.js", 233 | "ingest": "npm run build && node dist/ingest.js", 234 | "dev:chat": "tsx src/chat.ts", 235 | "dev:ingest": "tsx src/ingest.ts" 236 | }, 237 | ``` 238 | 239 | ## Infraestrutura: PostgreSQL + pgVector 240 | 241 | ### Fundamentos Teóricos dos Bancos Vetoriais 242 | 243 | Embeddings matemáticos representam uma revolução na forma como computadores processam e compreendem linguagem natural. Textos são convertidos em vetores de alta dimensionalidade, onde cada dimensão captura aspectos específicos do significado semântico. Para o modelo _Gemini embedding-001_, cada texto é representado por 768 números de ponto flutuante. 244 | 245 | A proximidade no espaço vetorial representa similaridade semântica, permitindo que algoritmos matemáticos encontrem textos relacionados através de cálculos de distância. Por exemplo, as frases _"empresa faturamento"_ e _"receita corporativa"_ produziriam vetores próximos no espaço multidimensional. 246 | 247 | O _pgVector_ adiciona capacidades vetoriais nativas ao PostgreSQL, incluindo tipo de dados vector para armazenamento eficiente, índices HNSW (Hierarchical Navigable Small World) para busca rápida, e operações de similaridade como distância coseno, euclidiana e produto interno. 248 | 249 | ## Configuração Docker Avançada 250 | 251 | O arquivo `docker-compose.yml` define infraestrutura completa para o sistema RAG. O serviço PostgreSQL utiliza imagem **pgvector/pgvector:pg17** que inclui PostgreSQL 17 com extensão pgVector pré-instalada. 252 | 253 |
docker-compose.yml 254 |
255 | 256 | ```yaml 257 | services: 258 | # Main service: PostgreSQL with pgVector extension 259 | postgres: 260 | image: pgvector/pgvector:pg17 261 | container_name: postgres_rag_ts 262 | environment: 263 | POSTGRES_USER: postgres 264 | POSTGRES_PASSWORD: postgres 265 | POSTGRES_DB: rag 266 | ports: 267 | - "5432:5432" 268 | volumes: 269 | # Data persistence 270 | - postgres_data:/var/lib/postgresql/data 271 | healthcheck: 272 | # Checks if the database is ready 273 | test: ["CMD-SHELL", "pg_isready -U postgres -d rag"] 274 | interval: 10s 275 | timeout: 5s 276 | retries: 5 277 | restart: unless-stopped 278 | 279 | # Auxiliary service: Initializes pgVector extension 280 | bootstrap_vector_ext: 281 | image: pgvector/pgvector:pg17 282 | depends_on: 283 | postgres: 284 | condition: service_healthy 285 | entrypoint: ["/bin/sh", "-c"] 286 | command: > 287 | PGPASSWORD=postgres 288 | psql "postgresql://postgres@postgres:5432/rag" -v ON_ERROR_STOP=1 289 | -c "CREATE EXTENSION IF NOT EXISTS vector;" 290 | restart: "no" 291 | 292 | volumes: 293 | postgres_data: 294 | ``` 295 | 296 |
297 |
298 | 299 | O serviço `bootstrap_vector_ext` garante que a extensão pgVector seja criada automaticamente após PostgreSQL estar operacional. O healthcheck monitora disponibilidade do banco antes de inicializar dependências. 300 | 301 | ## Inicialização e Verificação da Infraestrutura 302 | 303 | A inicialização da infraestrutura é feita através do comando: 304 | 305 | ```bash 306 | docker-compose up -d 307 | ``` 308 | 309 | Este comando inicia containers em modo daemon. A verificação do status é realizada com: 310 | 311 | ```bash 312 | docker ps 313 | ``` 314 | 315 | Este comando lista containers ativos. Os logs podem ser monitorados com: 316 | 317 | ```bash 318 | docker logs postgres_rag_ts 319 | ``` 320 | 321 | Este comando permite identificar problemas de inicialização. 322 | 323 | ## Integração Google Gemini: Cliente de IA Avançado 324 | 325 | ### Teoria Aprofundada dos Embeddings 326 | 327 | Embeddings representam uma das inovações mais significativas em processamento de linguagem natural, convertendo representações discretas de texto em vetores contínuos de números reais. Estes vetores capturam relações semânticas complexas, permitindo operações matemáticas sobre conceitos linguísticos. 328 | 329 | A dimensionalidade de 768 números para o modelo embedding-001 oferece espaço suficiente para representar nuances semânticas sutis enquanto mantém eficiência computacional. Vetores próximos no espaço multidimensional correspondem a textos semanticamente similares, permitindo busca por similaridade matemática. 330 | 331 | Operações vetoriais permitem manipulação conceitual, onde diferenças e somas de vetores podem revelar relações analógicas. O exemplo clássico _"rei" - "homem" + "mulher" ≈ "rainha"_ demonstra como embeddings capturam estruturas relacionais abstratas. 332 | 333 | ### Implementação Robusta do Cliente Google 334 | 335 | A implementação do cliente Google encapsula toda comunicação com APIs Gemini, oferecendo interface limpa e tratamento de erros robusto. 336 | 337 |
src/google-client.ts 338 |
339 | 340 | ```typescript 341 | import { config } from 'dotenv'; 342 | import { GoogleGenerativeAI } from '@google/generative-ai'; 343 | import { Embeddings } from '@langchain/core/embeddings'; 344 | 345 | config(); 346 | 347 | export interface ChatMessage { 348 | role: 'system' | 'user' | 'assistant'; 349 | content: string; 350 | } 351 | 352 | export class GoogleClient { 353 | private googleApiKey: string; 354 | private embeddingModel: string; 355 | private chatModel: string; 356 | private genAI: GoogleGenerativeAI; 357 | 358 | constructor() { 359 | this.googleApiKey = process.env.GOOGLE_API_KEY || ''; 360 | this.embeddingModel = process.env.GOOGLE_EMBEDDING_MODEL || ''; 361 | this.chatModel = process.env.GOOGLE_CHAT_MODEL || ''; 362 | 363 | if (!this.googleApiKey) { 364 | throw new Error('Google API key is not set in environment variables.'); 365 | } 366 | 367 | this.genAI = new GoogleGenerativeAI(this.googleApiKey); 368 | } 369 | 370 | async getEmbeddings(texts: string[]): Promise { 371 | const embeddings: number[][] = []; 372 | 373 | for(const text of texts) { 374 | try { 375 | const model = this.genAI.getGenerativeModel({ model: 'embedding-001' }); 376 | const result = await model.embedContent(text); 377 | 378 | if (result.embedding && result.embedding.values) { 379 | embeddings.push(result.embedding.values); 380 | } else { 381 | console.log(`No embedding returned for text: ${text}`); 382 | const dummySize = 768; 383 | embeddings.push(new Array(dummySize).fill(0)); 384 | } 385 | } catch (error) { 386 | console.log(`Error generating embedding: ${error}`); 387 | const dummySize = 768; 388 | embeddings.push(new Array(dummySize).fill(0)); 389 | } 390 | } 391 | 392 | return embeddings; 393 | } 394 | 395 | async chatCompletions(messages: ChatMessage[], temperature: number = 0.1): Promise { 396 | try { 397 | const model = this.genAI.getGenerativeModel({ 398 | model: this.chatModel, 399 | generationConfig: { 400 | temperature, 401 | maxOutputTokens: 1000, 402 | } 403 | }); 404 | 405 | let prompt = ''; 406 | for (const message of messages) { 407 | const { role, content } = message; 408 | 409 | if (role === 'system') { 410 | prompt += `Instructions: ${content}\n\n`; 411 | } else if (role === 'user') { 412 | prompt += `${content}\n`; 413 | } else if (role === 'assistant') { 414 | prompt += `Assistant: ${content}\n`; 415 | } 416 | } 417 | 418 | const result = await model.generateContent(prompt); 419 | return result.response.text(); 420 | } catch (error) { 421 | console.log(`Error generating chat completion: ${error}`); 422 | return 'Sorry, an error occurred while generating the response.'; 423 | } 424 | } 425 | } 426 | ``` 427 | 428 |
429 |
430 | 431 | A classe `GoogleClient` gerencia configuração e comunicação com APIs Gemini. O método `getEmbeddings` processa textos em lotes, implementando tratamento de erros gracioso e fallback para casos de falha. `chatCompletions` converte mensagens estruturadas em prompts otimizados para Gemini. 432 | 433 | A classe `GoogleEmbeddings` estende abstrações LangChain.js para integração seamless com frameworks existentes. 434 | 435 | 436 |
src/google-embeddings.ts
437 | 438 | ```typescript 439 | export class GoogleEmbeddings extends Embeddings { 440 | private client: GoogleClient; 441 | 442 | constructor() { 443 | super({}); 444 | this.client = new GoogleClient(); 445 | } 446 | 447 | async embedDocuments(texts: string[]): Promise { 448 | console.log(`Generating embeddings for ${texts.length} documents...`); 449 | 450 | const batchSize = 10; // Processing 10 texts at a time for a better optimization 451 | const allEmbeddings: number[][] = []; 452 | 453 | for(let i = 0; i < texts.length; i += batchSize) { 454 | const batchTexts = texts.slice(i, i + batchSize); 455 | const batchEmbeddings = await this.client.getEmbeddings(batchTexts); 456 | allEmbeddings.push(...batchEmbeddings); 457 | 458 | console.log(`Lot ${Math.floor(i / batchSize) + 1}: ${batchTexts.length} processed texts`); 459 | } 460 | 461 | return allEmbeddings; 462 | } 463 | 464 | // Method for embedding a single query 465 | async embedQuery(text: string): Promise { 466 | const embeddings = await this.client.getEmbeddings([text]); 467 | return embeddings[0]; 468 | } 469 | } 470 | 471 | // Factory function to create a GoogleClient instances 472 | export function getGoogleClient(): GoogleClient { 473 | return new GoogleClient(); 474 | } 475 | ``` 476 | 477 |
478 |
479 | 480 | ## Configuração de Ambiente Segura 481 | 482 | O arquivo `.env` centraliza configuração sensível, separando credenciais do código fonte para segurança e flexibilidade de deployment. 483 | 484 | ```text 485 | GOOGLE_API_KEY=sua_google_api_key_aqui 486 | GOOGLE_EMBEDDING_MODEL=models/embedding-001 487 | GOOGLE_CHAT_MODEL=gemini-2.0-flash 488 | DATABASE_URL=postgresql://postgres:postgres@localhost:5432/rag 489 | PG_VECTOR_COLLECTION_NAME=pdf_documents 490 | PDF_PATH=./document.pdf 491 | ``` 492 | 493 | > observação: para criar uma API Key do Google Gemini, siga os passos descritos na documentação oficial: **[AI Studio - Google](https://aistudio.google.com/apikey)** e clique em: `Create API Key`. 494 | 495 | ## Sistema de Ingestão: PDF para Vetores Inteligentes 496 | 497 | ### Teoria Avançada do Chunking 498 | 499 | O chunking representa um dos aspectos mais críticos em sistemas RAG, determinando qualidade e relevância das respostas. O desafio fundamental é que LLMs possuem janelas de contexto limitadas, enquanto documentos podem ser extensos, criando necessidade de segmentação inteligente. 500 | 501 | A estratégia de chunking deve balançar tamanho de contexto com especificidade de informação. Chunks muito grandes podem conter informações irrelevantes que diluem a relevância. Chunks muito pequenos podem carecer de contexto suficiente para compreensão completa. 502 | 503 | O `RecursiveCharacterTextSplitter` (do LangChain.js) é muito útil em documentos textuais, já que preserva a estrutura natural de parágrafos e frases. Nesse caso, parâmetros como `chunk_size` em torno de 1.000 caracteres e `chunk_overlap` de 150–200 funcionam como um bom ponto de partida, mantendo equilíbrio entre contexto e especificidade. 504 | 505 | No entanto, como este projeto trabalha com _PDF tabular_, essa estratégia não é a mais eficaz. Para tabelas, preferimos quebrar o documento linha a linha, garantindo que cada registro seja um chunk independente. Além disso, incluímos o cabeçalho da tabela em cada fragmento para manter clareza semântica. Dessa forma, o overlap é desnecessário (mantido em 0) e os separadores são adaptados para priorizar quebras de linha. 506 | 507 | Essa abordagem garante que cada entrada tabular seja preservada integralmente e melhora a precisão na hora de recuperar informações via RAG. 508 | 509 | ## Algoritmo `RecursiveCharacterTextSplitter` detalhado 510 | 511 | O algoritmo segue estratégia de fallback inteligente que tenta quebrar por separadores naturais antes de recorrer a quebras artificiais. Primeiro, tenta quebrar por parágrafos usando quebras duplas de linha. Se chunks resultantes ainda excedem tamanho máximo, então quebra por linhas simples. Para chunks ainda grandes, quebra por espaços entre palavras. Como último recurso, quebra caractere por caractere. 512 | 513 | Esta abordagem garante que a informação relacionada permaneça junta sempre que possível, preservando coerência semântica necessária para recuperação eficaz. 514 | 515 | ## Implementação Completa da Ingestão 516 | 517 | A implementação da ingestão combina extração de PDF, segmentação inteligente, geração de embeddings e armazenamento vetorial em pipeline integrado. 518 | 519 |
src/ingest.ts 520 |
521 | 522 | ```typescript 523 | import { config } from 'dotenv'; 524 | import { Document } from '@langchain/core/documents'; 525 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 526 | import { GoogleEmbeddings } from './google-client'; 527 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 528 | import { PDFLoader as LangChainPDFLoader } from '@langchain/community/document_loaders/fs/pdf'; 529 | 530 | config(); 531 | 532 | class PDFLoader { 533 | constructor(private filePath: string) {} 534 | 535 | async load(): Promise { 536 | try { 537 | console.log(`Reading PDF file: ${this.filePath}`); 538 | 539 | const langChainLoader = new LangChainPDFLoader(this.filePath); 540 | const documents = await langChainLoader.load(); 541 | 542 | console.log(`PDF loaded successfully! Found ${documents.length} pages`); 543 | return documents; 544 | } catch (error) { 545 | console.error('Error loading PDF:', error); 546 | throw error; 547 | } 548 | } 549 | 550 | async ingestToVectorStore(): Promise { 551 | try { 552 | console.log('Starting PDF ingestion process...'); 553 | 554 | const rawDocuments = await this.load(); 555 | console.log(`PDF loaded: ${rawDocuments.length} sections`); 556 | 557 | console.log('Splitting documents into chunks...'); 558 | const textSplitter = new RecursiveCharacterTextSplitter({ 559 | chunkSize: 400, 560 | chunkOverlap: 0, 561 | }); 562 | 563 | const splitDocuments = await textSplitter.splitDocuments(rawDocuments); 564 | console.log(`Documents split into ${splitDocuments.length} chunks`); 565 | 566 | console.log('Initializing Google embeddings...'); 567 | const embeddings = new GoogleEmbeddings(); 568 | 569 | console.log('Connecting to PostgreSQL vector store...'); 570 | const vectorStore = await PGVectorStore.initialize(embeddings, { 571 | postgresConnectionOptions: { 572 | connectionString: process.env.DATABASE_URL, 573 | }, 574 | tableName: process.env.PG_VECTOR_COLLECTION_NAME || 'pdf_documents', 575 | columns: { 576 | idColumnName: 'id', 577 | vectorColumnName: 'vector', 578 | contentColumnName: 'content', 579 | metadataColumnName: 'metadata', 580 | }, 581 | }); 582 | 583 | console.log('Adding documents to vector store...'); 584 | await vectorStore.addDocuments(splitDocuments); 585 | 586 | console.log('PDF ingestion completed successfully!'); 587 | console.log(`Total chunks processed: ${splitDocuments.length}`); 588 | 589 | await vectorStore.end(); 590 | 591 | } catch (error) { 592 | console.error('Error during PDF ingestion:', error); 593 | process.exit(1); 594 | } 595 | } 596 | } 597 | 598 | async function main() { 599 | const pdfPath = './document.pdf'; 600 | const loader = new PDFLoader(pdfPath); 601 | await loader.ingestToVectorStore(); 602 | } 603 | 604 | // Run ingestion 605 | main(); 606 | ``` 607 | 608 |
609 |
610 | 611 | 612 | A classe `PDFLoader` encapsula todo processo de ingestão, desde carregamento do arquivo até armazenamento no banco vetorial. O método `load` utiliza LangChain.js PDFLoader para extração robusta de texto. `ingestToVectorStore` coordena pipeline completo de processamento. 613 | 614 | ## Schema PostgreSQL Automático 615 | 616 | O `PGVectorStore` cria automaticamente schema otimizado para armazenamento e busca vetorial. A tabela pdf_documents inclui: 617 | 618 | - **id -** Chave primária UUID para identificação única 619 | - **content -** Texto original do chunk extraído do PDF 620 | - **vector -** Embeddings de 768 dimensões gerados pelo Gemini 621 | - **metadata -** Informações estruturais como página, fonte e contexto 622 | 623 | ```sql 624 | CREATE TABLE pdf_documents ( 625 | id UUID PRIMARY KEY, 626 | content TEXT, 627 | vector VECTOR(768), 628 | metadata JSONB 629 | ); 630 | 631 | CREATE INDEX ON pdf_documents USING hnsw (vector vector_cosine_ops); 632 | ``` 633 | 634 | O `índice HNSW` otimiza busca vetorial, oferecendo complexidade logarítmica versus busca linear tradicional. 635 | 636 | ## Sistema de Busca RAG: Retrieval + Generation Inteligente 637 | 638 | ### Teoria da Busca Semântica Avançada 639 | 640 | O pipeline de busca semântica representa transformação fundamental na forma como sistemas computacionais encontram informação relevante. Diferentemente de busca por palavras-chave tradicional, busca semântica utiliza representações vetoriais para capturar significado conceitual. 641 | 642 | O processo inicia com conversão da pergunta do usuário em embedding vetorial usando mesmo modelo utilizado durante a ingestão. Este query embedding é então comparado com todos embeddings armazenados usando métricas de similaridade matemática. O algoritmo HNSW acelera esta comparação, reduzindo complexidade de O(n) para O(log n). 643 | 644 | Resultados são classificados por _score de similaridade_, onde valores menores indicam maior similaridade no espaço coseno. _Context assembly_ concatena chunks mais relevantes, criando contexto rico para geração da resposta. 645 | 646 | ## Sistema de Busca RAG: Retrieval + Generation Inteligente 647 | 648 | ### Teoria da Busca Semântica Avançada 649 | 650 | O pipeline de busca semântica representa transformação fundamental na forma como sistemas computacionais encontram informação relevante. Diferentemente de busca por palavras-chave tradicional, busca semântica utiliza representações vetoriais para capturar significado conceitual. 651 | 652 | O processo inicia com conversão da pergunta do usuário em embedding vetorial usando mesmo modelo utilizado durante ingestão. Este query embedding é então comparado com todos embeddings armazenados usando métricas de similaridade matemática. O algoritmo HNSW acelera esta comparação, reduzindo complexidade de O(n) para O(log n). 653 | 654 | Resultados são classificados por score de similaridade, onde valores menores indicam maior similaridade no espaço coseno. Context assembly concatena chunks mais relevantes, criando contexto rico para geração da resposta. 655 | 656 | ## Prompt Engineering Anti-Alucinação 657 | 658 | O template de prompt implementa estratégias sofisticadas para _prevenir alucinações_ e garantir factualidade das respostas. Instruções explícitas enfatizam uso exclusivo do contexto fornecido. Fallback response fornece resposta padrão para casos onde informação não está disponível. Temperature baixa de 0.1 reduz criatividade e aumenta determinismo. Exemplos negativos demonstram casos onde resposta correta é "não sei". 659 | 660 | Esta abordagem garante que sistema sempre reconheça limitações do conhecimento disponível, preferindo admitir ignorância a inventar informações. 661 | 662 | ## Interface CLI: Experiência do Usuário Excepcional 663 | 664 | ### Design Centrado no Usuário 665 | 666 | A interface CLI foi projetada considerando princípios de experiência do usuário aplicados a sistemas de IA. Feedback imediato através de indicadores de progresso mantém usuários informados sobre operações em andamento. Comandos especiais como `help, status, clear e exit` oferecem controle intuitivo. Error handling graceful apresenta mensagens informativas que guiam usuários na resolução de problemas. Interface assíncrona não-bloqueante mantém responsividade mesmo durante operações computacionalmente intensivas. 667 | 668 | ### Implementação da Interface Interativa 669 | 670 | A implementação combina readline nativo do Node.js com lógica de comando avançada para criar experiência fluida e intuitiva. 671 | 672 |
src/chat.ts 673 |
674 | 675 | ```typescript 676 | import { createInterface } from "readline"; 677 | import { searchPrompt, RAGSearch } from "./search"; 678 | 679 | // Function to print initial banner with system informations 680 | function printBanner(): void { 681 | console.log('='.repeat(60)); 682 | console.log('RAG CHAT - PDF Question and Answer System'); 683 | console.log('Powered by Google Gemini + LangChain + pgVector'); 684 | console.log('⚡ TypeScript + Node.js Implementation'); 685 | console.log('='.repeat(60)); 686 | console.log("Special commands:"); 687 | console.log(" • 'exit, quit, exit' - Closes the program"); 688 | console.log(" • 'help' - Shows available commands"); 689 | console.log(" • 'clear' - Clears the screen"); 690 | console.log(" • 'status' - Checks system status"); 691 | console.log('='.repeat(60)); 692 | } 693 | 694 | // Function to print help instructions 695 | function printHelp(): void { 696 | console.log('\n AVAILABLE COMMANDS:'); 697 | console.log(' exit, quit, exit - Closes the program'); 698 | console.log(' help - Shows available commands'); 699 | console.log(' clear - Clears the screen'); 700 | console.log(' status - Checks system status'); 701 | console.log(' [any text] - Asks a question about the PDF'); 702 | console.log('\n TIPS FOR USE:'); 703 | console.log(' • Ask specific questions about the PDF content'); 704 | console.log(' • The system responds only based on the document'); 705 | console.log(' • Out-of-context questions return "I don\'t have information"'); 706 | console.log(); 707 | } 708 | 709 | // Function to clear the console screen 710 | function clearScreen(): void { 711 | console.clear(); 712 | } 713 | 714 | async function checkStatus(searchSystem: RAGSearch | null): Promise { 715 | console.log('\n RAG SYSTEM STATUS:'); 716 | console.log('='.repeat(40)); 717 | 718 | if (!searchSystem) { 719 | console.log('System: NOT INITIALIZED'); 720 | console.log('\n TROUBLESHOOTING CHECKLIST:'); 721 | console.log(' 1. Is PostgreSQL running?'); 722 | console.log(' → Command: docker compose up -d'); 723 | console.log(' 2. Has ingestion been executed?'); 724 | console.log(' → Command: npm run ingest'); 725 | console.log(' 3. Is the API Key configured?'); 726 | console.log(' → File: .env (GOOGLE_API_KEY)'); 727 | console.log(' 4. Are dependencies installed?'); 728 | console.log(' → Command: npm install'); 729 | return; 730 | } 731 | 732 | try { 733 | const systemStatus = await searchSystem.getSystemStatus(); 734 | 735 | console.log('RAG System: OPERATIONAL'); 736 | console.log('PostgreSQL Connection: OK'); 737 | console.log('pgVector Extension: OK'); 738 | console.log('Google Gemini API: OK'); 739 | console.log(`Vector Database: ${systemStatus.isReady ? 'READY' : 'NOT READY'}`); 740 | 741 | if (systemStatus.chunksCount > 0) { 742 | console.log(`Available chunks: ${systemStatus.chunksCount}`); 743 | } 744 | 745 | console.log('\n System ready to answer questions!'); 746 | } catch (error) { 747 | console.log('Status: PARTIALLY OPERATIONAL'); 748 | console.log(`Error checking system status: ${error}`); 749 | } 750 | 751 | console.log('='.repeat(40)); 752 | } 753 | 754 | // Main function to initialize RAG system and handle user input 755 | async function main(): Promise { 756 | console.log('STEP 6: Initializing the RAG Chat CLI Interface'); 757 | 758 | printBanner(); 759 | 760 | console.log('\n PHASE 1: INITIALIZING RAG SYSTEM'); 761 | const searchSystem = await searchPrompt(); 762 | 763 | if (!searchSystem) { 764 | console.log('\n CRITICAL ERROR: RAG system could not be initialized!'); 765 | console.log('\n POSSIBLE CAUSES AND SOLUTIONS:'); 766 | console.log(' 1. PostgreSQL is not running'); 767 | console.log(' → Solution: docker compose up -d'); 768 | console.log(' 2. Ingestion process has not been executed'); 769 | console.log(' → Solution: npm run ingest'); 770 | console.log(' 3. GOOGLE_API_KEY is not configured or invalid'); 771 | console.log(' → Solution: Configure in the .env file'); 772 | console.log(' 4. Node.js dependencies are not installed'); 773 | console.log(' → Solution: npm install'); 774 | console.log(' 5. pgVector extension has not been created'); 775 | console.log(' → Solution: Check Docker logs'); 776 | 777 | process.exit(1); 778 | } 779 | 780 | console.log('PHASE 1: RAG system initialized successfully!\n'); 781 | 782 | // PHASE 2: SETUP COMMAND LINE INTERFACE 783 | const rl = createInterface({ 784 | input: process.stdin, 785 | output: process.stdout, 786 | prompt: '\n Make a question: ' 787 | }); 788 | 789 | // Helper function to capture user input asynchronously 790 | const askQuestion = (prompt: string): Promise => { 791 | return new Promise((resolve) => { 792 | rl.question(prompt, resolve); 793 | }); 794 | }; 795 | 796 | console.log('System ready! Type your question or “help” to see commands.'); 797 | 798 | // PHASE 3: MAIN CHAT LOOP 799 | while(true) { 800 | try { 801 | // Capture user input 802 | const userInput = (await askQuestion('\n Make a question: ')).trim(); 803 | 804 | // PROCESSING COMMAND: Analyze whether it is a special command or a question 805 | const command = userInput.toLowerCase(); 806 | 807 | // Output commands 808 | if (['exit', 'quit', 'sair', 'q'].includes(command)) { 809 | console.log('\n Thank you for using RAG Chat. Goodbye!\n'); 810 | console.log('System shutting down...'); 811 | break; 812 | } 813 | 814 | // Help command 815 | if (['ajuda', 'help', 'h', '?'].includes(command)) { 816 | printHelp(); 817 | continue; 818 | } 819 | 820 | // Clear screen command 821 | if (['limpar', 'clear', 'cls'].includes(command)) { 822 | clearScreen(); 823 | printBanner(); 824 | continue; 825 | } 826 | 827 | // Status command 828 | if (['status', 'info', 's'].includes(command)) { 829 | await checkStatus(searchSystem); 830 | continue; 831 | } 832 | 833 | // Validate empty input 834 | if (!userInput) { 835 | console.log('Empty input. Type a question or “help” to see commands.'); 836 | continue; 837 | } 838 | 839 | // PROCESSING QUESTION: Forward the question to the RAG system 840 | console.log('\n Processing your question...'); 841 | console.log('Searching PDF knowledge...'); 842 | 843 | const startTime = Date.now(); 844 | 845 | // Call the complete RAG pipeline 846 | const answer = await searchSystem.generateAnswer(userInput); 847 | 848 | const endTime = Date.now(); 849 | const responseTime = ((endTime - startTime) / 1000).toFixed(2); 850 | 851 | // FORMATTED DISPLAY OF THE RESPONSE 852 | console.log('\n' + '='.repeat(80)); 853 | console.log(`ASK: ${userInput}`); 854 | console.log('='.repeat(80)); 855 | console.log(`🤖 RESPONSE:`); 856 | console.log(answer); 857 | console.log('='.repeat(80)); 858 | console.log(`⚡ Response time: ${responseTime}s`); 859 | } catch (error) { 860 | // TRATAMENTO DE ERROS 861 | if (error instanceof Error && error.message.includes('SIGINT')) { 862 | // Ctrl+C foi pressionado 863 | console.log('\n\n Interruption detected (Ctrl+C)'); 864 | console.log('👋 Chat closed by user. See you next time!'); 865 | break; 866 | } else { 867 | // Outros erros 868 | console.log(`\n Unexpected error during processing:`); 869 | console.log(` ${error}`); 870 | console.log('\n You can:'); 871 | console.log(' • Try again with another question'); 872 | console.log(' • Type "status" to check the system'); 873 | console.log(' • Type "exit" to quit'); 874 | } 875 | } 876 | } 877 | 878 | rl.close(); 879 | } 880 | 881 | // EVENT HANDLERS: Operating system signal management 882 | 883 | // Handler for Ctrl+C (SIGINT) 884 | process.on('SIGINT', () => { 885 | console.log('\n\n Interrupt signal received (Ctrl+C)'); 886 | console.log('Cleaning up resources...'); 887 | console.log('RAG Chat closed. See you later!'); 888 | process.exit(0); 889 | }); 890 | 891 | // Handler for uncaught errors 892 | process.on('uncaughtException', (error) => { 893 | console.error('\n Uncaught FATAL ERROR:', error); 894 | console.error('Restart the application: npm run start'); 895 | process.exit(1); 896 | }); 897 | 898 | // Handler for rejected promises 899 | process.on('unhandledRejection', (reason, promise) => { 900 | console.error('\n Unhandled rejected promise:', reason); 901 | console.error('Promise:', promise); 902 | }); 903 | 904 | // ENTRY POINT: Run the main function 905 | main().catch((error) => { 906 | console.error('\n FATAL ERROR in main application:', error); 907 | console.error('Try restarting: npm run start'); 908 | process.exit(1); 909 | }); 910 | ``` 911 | 912 |
913 |
914 | 915 | A classe `RAGSearch` encapsula funcionalidade completa de busca e geração. `searchDocuments` executa busca vetorial e retorna resultados formatados com scores. `generateAnswer` orquestra pipeline completo de RAG. 916 | 917 | A função `printBanner` apresenta informações essenciais sobre sistema e comandos disponíveis. `checkStatus` oferece diagnóstico detalhado de componentes, facilitando troubleshooting. O loop principal processa comandos e perguntas com tratamento robusto de erros. 918 | 919 | ## Execução e Validação Comprehensive 920 | 921 | ### Sequência de Execução Otimizada 922 | 923 | A execução segue sequência lógica que garante inicialização correta de todos componentes. Primeiro, inicialize infraestrutura: 924 | 925 | ```bash 926 | docker-compose up -d 927 | ``` 928 | 929 | Este comando sobe PostgreSQL com pgVector. Verifique status dos containers: 930 | 931 | ```bash 932 | docker ps 933 | ``` 934 | 935 | Este comando confirma operação correta. Execute ingestão para processar documentos PDF: 936 | 937 | ```bash 938 | npm run dev:ingest 939 | ``` 940 | 941 | Finalmente, inicie chat interativo para interação com sistema: 942 | 943 | ```bash 944 | npm run dev:chat 945 | ``` 946 | 947 | ## Cenários de Teste Abrangentes 948 | 949 | O sistema suporta diversos cenários de teste que validam funcionalidade completa. Perguntas dentro do contexto do PDF devem retornar respostas baseadas exclusivamente no conteúdo processado. Perguntas fora do contexto devem resultar na resposta padrão "Não tenho informações necessárias para responder sua pergunta." Comandos especiais como status, help e clear devem funcionar corretamente. 950 | 951 | ## Troubleshooting Sistemático 952 | 953 | Problemas comuns possuem soluções bem definidas que podem ser identificadas através de mensagens de erro específicas: 954 | 955 | - **Erro: "Google API key is not set"**: Este erro indica necessidade de configurar a variável de ambiente GOOGLE_API_KEY no arquivo .env. Verifique se o arquivo contém a chave API válida obtida no Google AI Studio. 956 | 957 | - **Erro: "Vector store not initialized"**: Esta mensagem sugere que PostgreSQL não está operacional ou o processo de ingestão não foi executado. Confirme que os containers Docker estão rodando e execute a ingestão de documentos. 958 | 959 | - **Erro: "No documents found"**: Este problema indica que o processo de ingestão precisa ser executado para popular o banco vetorial com chunks do PDF processado. 960 | 961 | - **Erro: "Connection refused"**: Esta falha aponta para PostgreSQL offline, resolvível verificando status dos containers Docker e reinicializando a infraestrutura se necessário. 962 | 963 | ## Considerações de Produção Avançadas 964 | 965 | ### Performance e Escalabilidade Otimizada 966 | 967 | As otimizações implementadas garantem performance adequada para uso produtivo. Batch processing durante ingestão implementa rate limiting para APIs externas, evitando throttling. Connection pooling no PostgreSQL permite múltiplas conexões simultâneas. HNSW indexing oferece busca sub-segundo mesmo com milhões de vetores. Operações assíncronas mantêm responsividade da aplicação. 968 | 969 | Métricas de performance demonstram eficiência do sistema. Ingestão processa PDF de 50 páginas em aproximadamente 30 segundos. Busca retorna resultados em 2-3 segundos por pergunta. Throughput suporta mais de 100 perguntas por minuto em hardware modesto. 970 | 971 | ## Segurança e Confiabilidade Robusta 972 | 973 | Implementações de segurança seguem best practices para aplicações produtivas. Environment variables isolam secrets do código fonte. Input validation e sanitization previnem ataques de injeção. Error handling robusto previne vazamento de informações sensíveis. Graceful shutdown handling garante limpeza adequada de recursos. 974 | 975 | Monitoramento recomendado inclui logs estruturados usando bibliotecas como `Winston` ou `Pino`. Métricas de performance podem ser coletadas com `Prometheus`. Health checks automáticos monitoram disponibilidade de componentes. Rate limiting por usuário previne abuso de recursos. Fica a dica para futuras melhorias. 976 | 977 | ## Roadmap de Melhorias Futuras 978 | 979 | O roadmap técnico identifica oportunidades de evolução. Migração da CLI para API REST facilitará integração com aplicações web. Interface `React` ou `Next.js` oferecerá experiência visual moderna. Suporte multi-tenancy permitirá múltiplos usuários e documentos. `Cache Redis` para respostas frequentes reduzirá latência. Integração `OpenTelemetry` proporcionará observabilidade completa. 980 | 981 | ## Referências e Recursos para Aprofundamento 982 | 983 | ### Documentação e Repositório do Projeto 984 | 985 | O código completo deste sistema RAG está disponível no repositório oficial **[rag-search-ingestion-langchainjs-gemini](https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini)**, onde você encontrará implementação funcional, instruções detalhadas de instalação, exemplos de uso, e documentação completa de todos os componentes desenvolvidos. O repositório inclui arquivos de configuração Docker prontos para produção, scripts de automação para desenvolvimento, e casos de teste específicos que demonstram a aplicação prática dos conceitos apresentados neste artigo. 986 | 987 | ### Fundamentos Teóricos de RAG 988 | 989 | Para compreensão aprofundada dos fundamentos teóricos, o paper original "**[Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks" por Lewis et al](https://dl.acm.org/doi/abs/10.5555/3495724.3496517)**. na conferência NeurIPS 2020 estabelece os princípios fundamentais da arquitetura RAG. A pesquisa "**[Dense Passage Retrieval for Open-Domain Question Answering" por Karpukhin et al](https://arxiv.org/abs/2004.04906)**. explora técnicas avançadas de recuperação densa que fundamentam sistemas de busca semântica modernos. O trabalho "**[In-Context Retrieval-Augmented Language Models](https://arxiv.org/abs/2302.00083)**" apresenta evoluções recentes na integração de contexto dinânico em modelos de linguagem. 990 | 991 | ### Tecnologias e Frameworks 992 | 993 | A documentação oficial do LangChain.js em **[https://js.langchain.com/](https://js.langchain.com/)** oferece guias completos sobre implementação de pipelines de IA, incluindo tutoriais específicos sobre integração com diferentes provedores de embeddings e modelos de linguagem. O Google AI Developer Documentation em **[https://ai.google.dev/docs](https://ai.google.dev/docs)** fornece especificações técnicas detalhadas sobre APIs Gemini, incluindo rate limits, melhores práticas de prompt engineering, e otimizações de performance. 994 | Para PostgreSQL e pgVector, a documentação oficial em **[https://github.com/pgvector/pgvector](https://github.com/pgvector/pgvector)** contém especificações técnicas sobre implementação de índices HNSW, configurações de performance, e estratégias de escalonamento para grandes volumes de dados vetoriais. O PostgreSQL Documentation em **[https://www.postgresql.org/docs/](https://www.postgresql.org/docs/)** oferece fundamentos sobre administração de banco de dados, otimização de queries, e configurações avançadas para aplicações de alta performance. 995 | 996 | ### Embedding Models e Busca Vetorial 997 | 998 | A compreensão profunda de embeddings pode ser expandida através da pesquisa "**[Attention Is All You Need](https://arxiv.org/abs/1706.03762)**" que introduz arquitetura Transformer fundamental para modelos de embedding modernos. O paper "**[Efficient Estimation of Word Representations in Vector Space" por Mikolov et al](https://arxiv.org/abs/1301.3781)**. estabelece fundamentos matemáticos de representações vetoriais semânticas. Para algoritmos de busca vetorial, "**[Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs](https://arxiv.org/abs/1603.09320)**" detalha implementação e otimizações do algoritmo HNSW utilizado pelo pgVector. 999 | 1000 | ### Prompt Engineering e Controle de Alucinações 1001 | 1002 | A pesquisa "**[Constitutional AI: Harmlessness from AI Feedback](https://arxiv.org/abs/2212.08073)**" explora técnicas avançadas para controle de comportamento em modelos de linguagem. "**[Chain-of-Thought Prompting Elicits Reasoning in Large Language Models](https://arxiv.org/abs/2201.11903)**" demonstra estratégias de estruturação de prompts para raciocínio complexo. "**[Instruction Following with Large Language Models](https://arxiv.org/abs/2506.13734)**" oferece insights sobre design de instruções eficazes para sistemas RAG. 1003 | 1004 | ### Recursos Práticos e Tutoriais 1005 | 1006 | LangChain Cookbook em **[https://github.com/langchain-ai/langchain/tree/master/cookbook](https://github.com/langchain-ai/langchain/tree/master/cookbook)** contém exemplos práticos de implementação de diferentes padrões RAG. Pinecone Learning Center em **[https://www.pinecone.io/learn/](https://www.pinecone.io/learn/)** oferece tutoriais sobre bancos de dados vetoriais e aplicações de busca semântica. Weaviate Documentation em **[https://weaviate.io/developers/weaviate/](https://weaviate.io/developers/weaviate/)** apresenta alternativas para armazenamento vetorial e suas especificidades técnicas. 1007 | 1008 | ## Autora e Contribuições 1009 | 1010 | Este projeto foi desenvolvido por Glaucia Lemos, A.I Developer Specialist, que compartilha conhecimento através de múltiplas plataformas. Seus perfis nas redes sociais incluem Twitter em **[https://twitter.com/glaucia86](https://twitter.com/glaucia86)** para atualizações técnicas e insights sobre desenvolvimento, LinkedIn em **[https://www.linkedin.com/in/glaucialemos/](https://www.linkedin.com/in/glaucialemos/)** para networking profissional e artigos técnicos, e YouTube em **[https://www.youtube.com/@GlauciaLemos](https://www.youtube.com/@GlauciaLemos)** para tutoriais em vídeo e palestras técnicas sobre desenvolvimento moderno. 1011 | 1012 | --------------------------------------------------------------------------------