├── .gitignore
├── document.pdf
├── tutorial
    ├── resource
    │   ├── image-01.png
    │   └── rag-docker-ts-langchain.jpg
    ├── article-en.md
    └── article.md
├── .editorconfig
├── .env.sample
├── tsconfig.json
├── LICENSE
├── docker-compose.yml
├── package.json
├── src
    ├── ingest.ts
    ├── google-client.ts
    ├── search.ts
    └── chat.ts
├── README-en.md
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | node_modules/
3 | dist/


--------------------------------------------------------------------------------
/document.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glaucia86/rag-search-ingestion-langchainjs-gemini/HEAD/document.pdf


--------------------------------------------------------------------------------
/tutorial/resource/image-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glaucia86/rag-search-ingestion-langchainjs-gemini/HEAD/tutorial/resource/image-01.png


--------------------------------------------------------------------------------
/tutorial/resource/rag-docker-ts-langchain.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/glaucia86/rag-search-ingestion-langchainjs-gemini/HEAD/tutorial/resource/rag-docker-ts-langchain.jpg


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is awesome: https://EditorConfig.org
 2 | 
 3 | # top-most EditorConfig file
 4 | root = true
 5 | 
 6 | [*]
 7 | indent_style = space
 8 | indent_size = 2
 9 | end_of_line = crlf
10 | charset = utf-8
11 | trim_trailing_whitespace = false
12 | insert_final_newline = false


--------------------------------------------------------------------------------
/.env.sample:
--------------------------------------------------------------------------------
 1 | # Google AI Studio API Key (OBRIGATÓRIO)
 2 | # Obtenha em: https://makersuite.google.com/app/apikey
 3 | GOOGLE_API_KEY=sua_google_api_key_aqui
 4 | 
 5 | # Modelos Google (OPCIONAL - usa padrões se não definido)
 6 | GOOGLE_EMBEDDING_MODEL=models/embedding-001
 7 | GOOGLE_CHAT_MODEL=gemini-2.0-flash
 8 | 
 9 | # Configuração do Banco (PRÓXIMAS ETAPAS)
10 | DATABASE_URL=postgresql://postgres:postgres@localhost:5432/rag
11 | PG_VECTOR_COLLECTION_NAME=pdf_documents
12 | 
13 | # Configuração do PDF (PRÓXIMAS ETAPAS)  
14 | PDF_PATH=./document.pdf


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "ESNext", 
 5 |     "moduleResolution": "node",
 6 |     "outDir": "./dist",           
 7 |     "rootDir": "./src",         
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true,
12 |     "resolveJsonModule": true,
13 |     "allowSyntheticDefaultImports": true,
14 |     "experimentalDecorators": true,
15 |     "emitDecoratorMetadata": true,
16 |     "declaration": true,
17 |     "declarationMap": true,
18 |     "sourceMap": true,
19 |     "types": ["node"],
20 |     "lib": ["ES2022", "DOM"]
21 |   },
22 |   "include": [
23 |     "src/**/*"
24 |   ],
25 |   "exclude": [
26 |     "node_modules",
27 |     "dist",
28 |     "**/*.test.ts",
29 |     "**/*.spec.ts"
30 |   ],
31 |   "ts-node": {
32 |     "esm": true
33 |   }
34 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Glaucia Lemos
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   # Main service: PostgreSQL with pgVector extension
 3 |   postgres:
 4 |     image: pgvector/pgvector:pg17
 5 |     container_name: postgres_rag_ts
 6 |     environment:
 7 |       POSTGRES_USER: postgres
 8 |       POSTGRES_PASSWORD: postgres  
 9 |       POSTGRES_DB: rag
10 |     ports:
11 |       - "5432:5432"
12 |     volumes:
13 |       # Data persistence
14 |       - postgres_data:/var/lib/postgresql/data
15 |     healthcheck:
16 |       # Checks if the database is ready
17 |       test: ["CMD-SHELL", "pg_isready -U postgres -d rag"]
18 |       interval: 10s
19 |       timeout: 5s
20 |       retries: 5
21 |     restart: unless-stopped
22 | 
23 |   # Auxiliary service: Initializes pgVector extension
24 |   bootstrap_vector_ext:
25 |     image: pgvector/pgvector:pg17
26 |     depends_on:
27 |       postgres:
28 |         condition: service_healthy
29 |     entrypoint: ["/bin/sh", "-c"]
30 |     command: >
31 |       PGPASSWORD=postgres
32 |       psql "postgresql://postgres@postgres:5432/rag" -v ON_ERROR_STOP=1
33 |       -c "CREATE EXTENSION IF NOT EXISTS vector;"
34 |     restart: "no"
35 | 
36 | volumes:
37 |   postgres_data:


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "rag-search-ingestion-langchainjs-gemini",
 3 |   "version": "1.0.0",
 4 |   "description": "A PDF search ingestion RAG application with Docker + LangChain.js + Gemini",
 5 |   "type": "module",
 6 |   "main": "index.js",
 7 |   "scripts": {
 8 |     "build": "tsc",
 9 |     "start": "npm run build && node dist/chat.js",
10 |     "ingest": "npm run build && node dist/ingest.js",
11 |     "dev:chat": "tsx src/chat.ts",
12 |     "dev:ingest": "tsx src/ingest.ts"
13 |   },
14 |   "repository": {
15 |     "type": "git",
16 |     "url": "git+https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini.git"
17 |   },
18 |   "keywords": [
19 |     "RAG",
20 |     "LangChain.js",
21 |     "Gemini",
22 |     "PDF Search",
23 |     "Docker"
24 |   ],
25 |   "author": "Glaucia Lemos <Twitter: @glaucia86>)",
26 |   "license": "MIT",
27 |   "bugs": {
28 |     "url": "https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini/issues"
29 |   },
30 |   "homepage": "https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini#readme",
31 |   "devDependencies": {
32 |     "@types/node": "^24.3.1",
33 |     "@types/pdf-parse": "^1.1.5",
34 |     "@types/pg": "^8.15.5",
35 |     "tsx": "^4.20.5",
36 |     "typescript": "^5.9.2"
37 |   },
38 |   "dependencies": {
39 |     "@google/generative-ai": "^0.24.1",
40 |     "@langchain/community": "^0.3.55",
41 |     "@langchain/core": "^0.3.75",
42 |     "@langchain/textsplitters": "^0.1.0",
43 |     "@types/uuid": "^10.0.0",
44 |     "dotenv": "^16.6.1",
45 |     "pdf-parse": "^1.1.1",
46 |     "pg": "^8.16.3",
47 |     "uuid": "^13.0.0"
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/ingest.ts:
--------------------------------------------------------------------------------
 1 | import { config } from 'dotenv';
 2 | import { Document } from '@langchain/core/documents';
 3 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
 4 | import { GoogleEmbeddings } from './google-client';
 5 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
 6 | import { PDFLoader as LangChainPDFLoader } from '@langchain/community/document_loaders/fs/pdf';
 7 | 
 8 | config();
 9 | 
10 | class PDFLoader {
11 | 
12 |   constructor(private filePath: string) {}
13 | 
14 |   async load(): Promise<Document[]> {
15 |     try {
16 |       console.log(`Reading PDF file: ${this.filePath}`);
17 |       
18 |       // Use LangChain PDF loader instead of pdf-parse
19 |       const langChainLoader = new LangChainPDFLoader(this.filePath);
20 |       const documents = await langChainLoader.load();
21 |       
22 |       console.log(`PDF loaded successfully! Found ${documents.length} pages`);
23 |       
24 |       return documents;
25 |     } catch (error) {
26 |       console.error('Error loading PDF:', error);
27 |       throw error;
28 |     }
29 |   }
30 | 
31 |   async ingestToVectorStore(): Promise<void> {
32 |     try {
33 |       console.log('Starting PDF ingestion process...');
34 |       
35 |       // Step 1: Load PDF
36 |       console.log(`Loading PDF from: ${this.filePath}`);
37 |       const rawDocuments = await this.load();
38 |       console.log(`PDF loaded successfully! Found ${rawDocuments.length} sections`);
39 | 
40 |       // Step 2: Split documents into chunks
41 |       console.log('Splitting documents into chunks...');
42 |       const textSplitter = new RecursiveCharacterTextSplitter({
43 |         chunkSize: 400,
44 |         chunkOverlap: 0,
45 |         separators: ["\n\n", "\n", " ", ""],
46 |       });
47 | 
48 |       const splitDocuments = await textSplitter.splitDocuments(rawDocuments);
49 |       console.log(`Documents split into ${splitDocuments.length} chunks`);
50 | 
51 |       // Step 3: Initialize embeddings
52 |       console.log('Initializing Google embeddings...');
53 |       const embeddings = new GoogleEmbeddings();
54 | 
55 |       // Step 4: Initialize vector store
56 |       console.log('Connecting to PostgreSQL vector store...');
57 |       const vectorStore = await PGVectorStore.initialize(embeddings, {
58 |         postgresConnectionOptions: {
59 |           connectionString: process.env.DATABASE_URL,
60 |         },
61 |         tableName: process.env.PG_VECTOR_COLLECTION_NAME || 'pdf_documents',
62 |         columns: {
63 |           idColumnName: 'id',
64 |           vectorColumnName: 'vector',
65 |           contentColumnName: 'content',
66 |           metadataColumnName: 'metadata',
67 |         },
68 |       });
69 | 
70 |       // Step 5: Add documents to vector store
71 |       console.log('Adding documents to vector store...');
72 |       await vectorStore.addDocuments(splitDocuments);
73 | 
74 |       console.log('PDF ingestion completed successfully!');
75 |       console.log(`Total chunks processed: ${splitDocuments.length}`);
76 |       
77 |       // Close the connection
78 |       await vectorStore.end();
79 |       
80 |     } catch (error) {
81 |       console.error('Error during PDF ingestion:', error);
82 |       process.exit(1);
83 |     }
84 |   }
85 | }
86 | 
87 | // Main execution function
88 | async function main() {
89 |   const pdfPath = './document.pdf';
90 |   const loader = new PDFLoader(pdfPath);
91 |   await loader.ingestToVectorStore();
92 | }
93 | 
94 | // Run ingestion
95 | main();


--------------------------------------------------------------------------------
/src/google-client.ts:
--------------------------------------------------------------------------------
  1 | import { config } from 'dotenv';
  2 | import { GoogleGenerativeAI } from '@google/generative-ai';
  3 | import { Embeddings } from '@langchain/core/embeddings';
  4 | 
  5 | config();
  6 | 
  7 | export interface ChatMessage {
  8 |   role: 'system' | 'user' | 'assistant';
  9 |   content: string;
 10 | }
 11 | 
 12 | export class GoogleClient {
 13 |   private googleApiKey: string;
 14 |   private embeddingModel: string;
 15 |   private chatModel: string;
 16 |   private genAI: GoogleGenerativeAI;
 17 | 
 18 |   constructor() {
 19 |     this.googleApiKey = process.env.GOOGLE_API_KEY || '';
 20 |     this.embeddingModel = process.env.GOOGLE_EMBEDDING_MODEL || '';
 21 |     this.chatModel = process.env.GOOGLE_CHAT_MODEL || '';
 22 | 
 23 |     if (!this.googleApiKey) {
 24 |       throw new Error('Google API key is not set in environment variables.');
 25 |     }
 26 | 
 27 |     // Initialize GoogleGenerativeAI instance
 28 |     this.genAI = new GoogleGenerativeAI(this.googleApiKey);
 29 |   }
 30 | 
 31 |   async getEmbeddings(texts: string[]): Promise<number[][]> {
 32 |     const embeddings: number[][] = [];
 33 | 
 34 |     for(const text of texts) {
 35 |       try {
 36 |         const model = this.genAI.getGenerativeModel({ model: 'embedding-001' });
 37 |         const result = await model.embedContent(text);
 38 |         
 39 |         if (result.embedding && result.embedding.values) {
 40 |           embeddings.push(result.embedding.values);
 41 |         } else {
 42 |           console.log(`No embedding returned for text: ${text}`);
 43 |           const dummySize = 768;
 44 |           embeddings.push(new Array(dummySize).fill(0));
 45 |         }
 46 |       } catch (error) {
 47 |         console.log(`Error generating embedding for text: ${text}`, error);
 48 |         const dummySize = 768;
 49 |         embeddings.push(new Array(dummySize).fill(0));
 50 |       }
 51 |     }
 52 | 
 53 |     return embeddings;
 54 |   }
 55 | 
 56 |   async chatCompletions(messages: ChatMessage[], temperature: number = 0.1): Promise<string> {
 57 |     try {
 58 |       const model = this.genAI.getGenerativeModel({
 59 |         model: this.chatModel,
 60 |         generationConfig: {
 61 |           temperature,
 62 |           maxOutputTokens: 1000,
 63 |         }
 64 |       });
 65 | 
 66 |       // Convert messages to a single prompt string
 67 |       let prompt = '';
 68 | 
 69 |       for (const message of messages) {
 70 |         const { role, content  } = message;
 71 | 
 72 |         if (role === 'system') {
 73 |           prompt += `Instructions: ${content}\n\n`;
 74 |         } else if (role === 'user') {
 75 |           prompt += `${content}\n`;
 76 |         } else if (role === 'assistant') {
 77 |           prompt += `Assistant: ${content}\n`;
 78 |         }
 79 |       }
 80 | 
 81 |       // Generate response using the model
 82 |       const result = await model.generateContent(prompt);
 83 |       const response = result.response;
 84 | 
 85 |       return response.text();
 86 |     } catch (error) {
 87 |       console.log(`Error generating chat completion: ${error}`);
 88 |       return 'Sorry, an error occurred while generating the response.';
 89 |     }
 90 |   }
 91 | }
 92 | 
 93 | export class GoogleEmbeddings extends Embeddings {
 94 |   private client: GoogleClient;
 95 | 
 96 |   constructor() {
 97 |     super({});
 98 |     this.client = new GoogleClient();
 99 |   }
100 | 
101 |   async embedDocuments(texts: string[]): Promise<number[][]> {
102 |     console.log(`Generating embeddings for ${texts.length} documents...`);
103 | 
104 |     const batchSize = 10; // Processing 10 texts at a time for a better optimization
105 |     const allEmbeddings: number[][] = [];
106 | 
107 |     for(let i = 0; i < texts.length; i += batchSize) {
108 |       const batchTexts = texts.slice(i, i + batchSize);
109 |       const batchEmbeddings = await this.client.getEmbeddings(batchTexts);
110 |       allEmbeddings.push(...batchEmbeddings);
111 | 
112 |       console.log(`Lot ${Math.floor(i / batchSize) + 1}: ${batchTexts.length} processed texts`);  
113 |     }
114 | 
115 |     return allEmbeddings;
116 |   }
117 | 
118 |   // Method for embedding a single query
119 |   async embedQuery(text: string): Promise<number[]> {
120 |     const embeddings = await this.client.getEmbeddings([text]);
121 |     return embeddings[0];
122 |   }
123 | }
124 | 
125 | // Factory function to create a GoogleClient instances
126 | export function getGoogleClient(): GoogleClient {
127 |   return new GoogleClient();
128 | }


--------------------------------------------------------------------------------
/src/search.ts:
--------------------------------------------------------------------------------
  1 | import { config } from 'dotenv';
  2 | import { getGoogleClient, GoogleEmbeddings, ChatMessage } from './google-client';
  3 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
  4 | 
  5 | config();
  6 | 
  7 | const PROMPT_TEMPLATE = `
  8 |   CONTEXTO FORNECIDO:
  9 |   {contexto}
 10 | 
 11 |   INSTRUÇÕES CRÍTICAS:
 12 |   - Responda EXCLUSIVAMENTE com base no CONTEXTO FORNECIDO acima.
 13 |   - Se a informação não estiver EXPLICITAMENTE no contexto, responda exatamente:
 14 |     "Não tenho informações necessárias para responder sua pergunta."
 15 |   - NUNCA use conhecimento externo ou invente informações.
 16 |   - NUNCA expresse opiniões pessoais ou interpretações além do texto fornecido.
 17 | 
 18 |   EXEMPLOS DE RESPOSTAS CORRETAS PARA PERGUNTAS SEM CONTEXTO:
 19 |   - "Qual é a capital da França?" -> "Nao tenho informações necessárias para responder sua pergunta."
 20 |   - "Quantos funcionários a empresa tem?" -> "Não tenho informações necessárias para responder sua pergunta."
 21 |   - "Você recomenda investir nisso?" -> "Não tenho informações necessárias para responder sua pergunta."
 22 | 
 23 |   PERGUNTA DO USUÁRIO:
 24 |   {pergunta}
 25 | 
 26 |   RESPOSTA (baseada apenas no contexto fornecido):
 27 | `;
 28 | 
 29 | export interface SearchResult {
 30 |   content: string;
 31 |   metadata: any;
 32 |   score: number;
 33 | }
 34 | 
 35 | export class RAGSearch {
 36 |   private databaseUrl: string;
 37 |   private collectionName: string;
 38 |   private embeddings: GoogleEmbeddings;
 39 |   private googleClient: any;
 40 |   private vectorStore: PGVectorStore | null = null;
 41 | 
 42 |   constructor() {
 43 |     // Load environment variables
 44 |     this.databaseUrl = process.env.DATABASE_URL || '';
 45 |     this.collectionName = process.env.PG_VECTOR_COLLECTION_NAME || 'pdf_documents';
 46 | 
 47 |     // Initialize main components
 48 |     this.embeddings = new GoogleEmbeddings();
 49 |     this.googleClient = getGoogleClient();
 50 |     this.vectorStore = null;
 51 | 
 52 |     this._initializeVectorStore();
 53 |   }
 54 | 
 55 |   private async _initializeVectorStore(): Promise<void> {
 56 |     try {
 57 |       // Connect to PostgreSQL vector store
 58 |       this.vectorStore = await PGVectorStore.initialize(this.embeddings, {
 59 |         postgresConnectionOptions: {
 60 |           connectionString: this.databaseUrl,
 61 |         },
 62 |         tableName: this.collectionName,
 63 |         columns: {
 64 |           idColumnName: 'id',
 65 |           vectorColumnName: 'vector',
 66 |           contentColumnName: 'content',
 67 |           metadataColumnName: 'metadata',
 68 |         },
 69 |       });
 70 | 
 71 |       console.log('RAG System: Connection to vector database established ')
 72 |     } catch (error) {
 73 |       console.log(`Error initializing vector database: ${error}`);
 74 |       throw error;
 75 |     }
 76 |   }
 77 | 
 78 |   async searchDocuments(query: string, k: number = 10): Promise<SearchResult[]> {
 79 |     if (!this.vectorStore) {
 80 |       throw new Error('Vector bank has not been initialized. Run ingestion first.');   
 81 |     }
 82 | 
 83 |     try {
 84 |       // Busca semântica silenciosa
 85 | 
 86 |       // PHASE 1: SIMILARITY SEARCH WITH SCORES
 87 |       // Use similaritySearchWithScore to get both documents and scores
 88 |       const results = await this.vectorStore.similaritySearchWithScore(query, k);
 89 | 
 90 |       // PHASE 2: FORMAT RESULTS
 91 |       const formattedResults: SearchResult[] = [];
 92 | 
 93 |       for(const [document, score] of results) {
 94 |         formattedResults.push({
 95 |           content: document.pageContent, // Chunk text
 96 |           metadata: document.metadata, // Info about page, source, etc.
 97 |           score: score // Similarity score (lower is more similar)
 98 |         });
 99 |       }
100 | 
101 |       // ${formattedResults.length} chunks encontrados silenciosamente
102 | 
103 |       return formattedResults;
104 |     } catch (error) {
105 |       console.log(`Error during semantic search: ${error}`);
106 |       return []; // return empty array on error
107 |     }
108 |   }
109 | 
110 |   async generateAnswer(query: string): Promise<string> {
111 |     try {
112 |       // Pipeline RAG iniciado silenciosamente
113 | 
114 |       // STEP 1: RETRIEVAL
115 |       const documents = await this.searchDocuments(query, 10);
116 | 
117 |       if (!documents.length) {
118 |         console.log('No relevant documents found in the database.');
119 |         return 'I don\'t have the information necessary to answer your question.';
120 |       }
121 | 
122 |       // STEP 2: CONTEXT ASSEMBLY
123 |       const context = documents.map((doc, index) => {
124 |         return doc.content;
125 |       })
126 |       .join('\n\n');
127 | 
128 |       // STEP 3: Estruturando prompts para o LLM
129 |       const fullPrompt = PROMPT_TEMPLATE
130 |         .replace('{contexto}', context)
131 |         .replace('{pergunta}', query);
132 | 
133 |       // STEP 4: GENERATION USING LLM
134 |       const messages: ChatMessage[] = [
135 |         { role: 'user', content: fullPrompt }
136 |       ];
137 | 
138 |       const response = await this.googleClient.chatCompletions(
139 |         messages,
140 |         0.1
141 |       );
142 | 
143 |       // Pipeline RAG concluído com sucesso
144 | 
145 |       return response.trim();
146 |     } catch (error) {
147 |       console.log(`Error in RAG pipeline: ${error}`);
148 |       return 'Internal error: Unable to process your query. Please check if ingestion has been performed.';
149 |     }
150 |   }
151 | 
152 |   // Utility method for checking system status
153 |   async getSystemStatus(): Promise<{ isReady: boolean; chunksCount: number}> {
154 |     try {
155 |       if (!this.vectorStore) {
156 |         return { isReady: false, chunksCount: 0 };
157 |       }
158 | 
159 |       const testResults = await this.vectorStore.similaritySearch("test", 1);
160 |       return {
161 |         isReady: true,
162 |         chunksCount: testResults.length > 0 ? -1 : 0 // -1 means "there are documents, but we don't know how many
163 |       }
164 |     } catch (error) {
165 |       return { isReady: false, chunksCount: 0 };
166 |     }
167 |   }
168 | }
169 | 
170 | // Factory function to create RAG instance
171 | export async function searchPrompt(question?: string): Promise<RAGSearch | null> {
172 |   try {
173 |     console.log('Initializing RAG Search system...');
174 |     const ragSearch = new RAGSearch();
175 | 
176 |     await new Promise(resolve => setTimeout(resolve, 1000));
177 | 
178 |     const status = await ragSearch.getSystemStatus();
179 |     if (!status.isReady) {
180 |       console.log('System is not ready. Run ingestion first.');
181 |       return null;
182 |     }
183 | 
184 |     console.log('RAG system initialized and ready for use.');
185 |     return ragSearch;
186 |   } catch (error) {
187 |     console.log(`Error initializing RAG Search system: ${error}`);
188 |     return null;
189 |   }
190 | }


--------------------------------------------------------------------------------
/src/chat.ts:
--------------------------------------------------------------------------------
  1 | import { createInterface } from "readline";
  2 | import { searchPrompt, RAGSearch } from "./search";
  3 | 
  4 | // Function to print initial banner with system informations
  5 | function printBanner(): void {
  6 |   console.log('='.repeat(60));
  7 |   console.log('RAG CHAT - PDF Question and Answer System');
  8 |   console.log('Powered by Google Gemini + LangChain + pgVector');
  9 |   console.log('⚡ TypeScript + Node.js Implementation');
 10 |   console.log('='.repeat(60));
 11 |   console.log("Special commands:");
 12 |   console.log("   • 'exit, quit, exit' - Closes the program");
 13 |   console.log("   • 'help' - Shows available commands");
 14 |   console.log("   • 'clear' - Clears the screen");
 15 |   console.log("   • 'status' - Checks system status");
 16 |   console.log('='.repeat(60));
 17 | }
 18 | 
 19 | // Function to print help instructions
 20 | function printHelp(): void {
 21 |   console.log('\n AVAILABLE COMMANDS:');
 22 |   console.log('   exit, quit, exit    - Closes the program');
 23 |   console.log('   help                 - Shows available commands');
 24 |   console.log('   clear               - Clears the screen');
 25 |   console.log('   status              - Checks system status');
 26 |   console.log('   [any text]         - Asks a question about the PDF');
 27 |   console.log('\n TIPS FOR USE:');
 28 |   console.log('   • Ask specific questions about the PDF content');
 29 |   console.log('   • The system responds only based on the document');
 30 |   console.log('   • Out-of-context questions return "I don\'t have information"');
 31 |   console.log();
 32 | }
 33 | 
 34 | // Function to clear the console screen
 35 | function clearScreen(): void {
 36 |   console.clear();
 37 | }
 38 | 
 39 | async function checkStatus(searchSystem: RAGSearch | null): Promise<void> {
 40 |   console.log('\n RAG SYSTEM STATUS:');
 41 |   console.log('='.repeat(40));
 42 |   
 43 |   if (!searchSystem) {
 44 |     console.log('System: NOT INITIALIZED');
 45 |     console.log('\n TROUBLESHOOTING CHECKLIST:');
 46 |     console.log('   1. Is PostgreSQL running?');
 47 |     console.log('      → Command: docker compose up -d');
 48 |     console.log('   2. Has ingestion been executed?'); 
 49 |     console.log('      → Command: npm run ingest');
 50 |     console.log('   3. Is the API Key configured?');
 51 |     console.log('      → File: .env (GOOGLE_API_KEY)');
 52 |     console.log('   4. Are dependencies installed?');
 53 |     console.log('      → Command: npm install');
 54 |     return;
 55 |   }
 56 | 
 57 |   try {
 58 |     const systemStatus = await searchSystem.getSystemStatus();
 59 | 
 60 |     console.log('RAG System: OPERATIONAL');
 61 |     console.log('PostgreSQL Connection: OK');
 62 |     console.log('pgVector Extension: OK'); 
 63 |     console.log('Google Gemini API: OK');
 64 |     console.log(`Vector Database: ${systemStatus.isReady ? 'READY' : 'NOT READY'}`);
 65 | 
 66 |     if (systemStatus.chunksCount > 0) {
 67 |       console.log(`Available chunks: ${systemStatus.chunksCount}`);
 68 |     }
 69 | 
 70 |     console.log('\n System ready to answer questions!');
 71 |   } catch (error) {
 72 |     console.log('Status: PARTIALLY OPERATIONAL');
 73 |     console.log(`Error checking system status: ${error}`);
 74 |   }
 75 | 
 76 |   console.log('='.repeat(40));
 77 | }
 78 | 
 79 | // Main function to initialize RAG system and handle user input
 80 | async function main(): Promise<void> {
 81 |   console.log('STEP 6: Initializing the RAG Chat CLI Interface');
 82 | 
 83 |   printBanner();
 84 | 
 85 |   console.log('\n PHASE 1: INITIALIZING RAG SYSTEM');
 86 |   const searchSystem = await searchPrompt();
 87 | 
 88 |   if (!searchSystem) {
 89 |     console.log('\n CRITICAL ERROR: RAG system could not be initialized!');
 90 |     console.log('\n POSSIBLE CAUSES AND SOLUTIONS:');
 91 |     console.log('   1. PostgreSQL is not running');
 92 |     console.log('      → Solution: docker compose up -d');
 93 |     console.log('   2. Ingestion process has not been executed');
 94 |     console.log('      → Solution: npm run ingest');
 95 |     console.log('   3. GOOGLE_API_KEY is not configured or invalid');
 96 |     console.log('      → Solution: Configure in the .env file');
 97 |     console.log('   4. Node.js dependencies are not installed');
 98 |     console.log('      → Solution: npm install');
 99 |     console.log('   5. pgVector extension has not been created');
100 |     console.log('      → Solution: Check Docker logs');
101 | 
102 |     process.exit(1);
103 |   }
104 | 
105 |   console.log('PHASE 1: RAG system initialized successfully!\n');
106 | 
107 |   // PHASE 2: SETUP COMMAND LINE INTERFACE
108 |   const rl = createInterface({
109 |     input: process.stdin,
110 |     output: process.stdout,
111 |     prompt: '\n Make a question: '
112 |   });
113 | 
114 |   // Helper function to capture user input asynchronously
115 |   const askQuestion = (prompt: string): Promise<string> => {
116 |     return new Promise((resolve) => {
117 |       rl.question(prompt, resolve);
118 |     });
119 |   };
120 | 
121 |   console.log('System ready! Type your question or “help” to see commands.');
122 | 
123 |   // PHASE 3: MAIN CHAT LOOP
124 |   while(true) {
125 |     try {
126 |       // Capture user input
127 |       const userInput = (await askQuestion('\n Make a question: ')).trim();
128 | 
129 |       // PROCESSING COMMAND: Analyze whether it is a special command or a question
130 |       const command = userInput.toLowerCase();
131 | 
132 |       // Output commands
133 |       if (['exit', 'quit', 'sair', 'q'].includes(command)) {
134 |         console.log('\n Thank you for using RAG Chat. Goodbye!\n');
135 |         console.log('System shutting down...');
136 |         break;
137 |       }
138 | 
139 |       // Help command
140 |       if (['ajuda', 'help', 'h', '?'].includes(command)) {
141 |         printHelp();
142 |         continue;
143 |       }
144 | 
145 |       // Clear screen command
146 |       if (['limpar', 'clear', 'cls'].includes(command)) {
147 |         clearScreen();
148 |         printBanner();
149 |         continue;
150 |       }
151 | 
152 |       // Status command
153 |       if (['status', 'info', 's'].includes(command)) {
154 |         await checkStatus(searchSystem);
155 |         continue;
156 |       }
157 | 
158 |       // Validate empty input
159 |       if (!userInput) {
160 |         console.log('Empty input. Type a question or “help” to see commands.');
161 |         continue;
162 |       }
163 | 
164 |       // PROCESSING QUESTION: Forward the question to the RAG system
165 |       console.log('\n Processing your question...');
166 |       console.log('Searching PDF knowledge...');
167 | 
168 |       const startTime = Date.now();
169 | 
170 |       // Call the complete RAG pipeline
171 |       const answer = await searchSystem.generateAnswer(userInput);
172 | 
173 |       const endTime = Date.now();
174 |       const responseTime = ((endTime - startTime) / 1000).toFixed(2);
175 | 
176 |       // FORMATTED DISPLAY OF THE RESPONSE
177 |       console.log('\n' + '='.repeat(80));
178 |       console.log(`ASK: ${userInput}`);
179 |       console.log('='.repeat(80));
180 |       console.log(`🤖 RESPONSE:`);
181 |       console.log(answer);
182 |       console.log('='.repeat(80));
183 |       console.log(`⚡ Response time: ${responseTime}s`);
184 |     } catch (error) {
185 |       // TRATAMENTO DE ERROS
186 |       if (error instanceof Error && error.message.includes('SIGINT')) {
187 |         // Ctrl+C foi pressionado
188 |         console.log('\n\n Interruption detected (Ctrl+C)');
189 |         console.log('👋 Chat closed by user. See you next time!');
190 |         break;
191 |       } else {
192 |         // Outros erros
193 |         console.log(`\n Unexpected error during processing:`);
194 |         console.log(`   ${error}`);
195 |         console.log('\n You can:');
196 |         console.log('   • Try again with another question');
197 |         console.log('   • Type "status" to check the system');
198 |         console.log('   • Type "exit" to quit');
199 |       }
200 |     }
201 |   }
202 | 
203 |   rl.close();
204 | }
205 | 
206 | // EVENT HANDLERS: Operating system signal management
207 | 
208 | // Handler for Ctrl+C (SIGINT)
209 | process.on('SIGINT', () => {
210 |   console.log('\n\n Interrupt signal received (Ctrl+C)');
211 |   console.log('Cleaning up resources...');
212 |   console.log('RAG Chat closed. See you later!');
213 |   process.exit(0);
214 | });
215 | 
216 | // Handler for uncaught errors
217 | process.on('uncaughtException', (error) => {
218 |   console.error('\n Uncaught FATAL ERROR:', error);
219 |   console.error('Restart the application: npm run start');
220 |   process.exit(1);
221 | });
222 | 
223 | // Handler for rejected promises
224 | process.on('unhandledRejection', (reason, promise) => {
225 |   console.error('\n Unhandled rejected promise:', reason);
226 |   console.error('Promise:', promise);
227 | });
228 | 
229 | // ENTRY POINT: Run the main function
230 | main().catch((error) => {
231 |   console.error('\n FATAL ERROR in main application:', error);
232 |   console.error('Try restarting: npm run start');
233 |   process.exit(1);
234 | });


--------------------------------------------------------------------------------
/README-en.md:
--------------------------------------------------------------------------------
  1 | # 🤖 RAG Search Ingestion - LangChain.js + Docker + Gemini
  2 | 
  3 | ![Node.js](https://img.shields.io/badge/Node.js-22+-339933?style=for-the-badge&logo=node.js&logoColor=white)
  4 | ![TypeScript](https://img.shields.io/badge/TypeScript-5.9+-3178C6?style=for-the-badge&logo=typescript&logoColor=white)
  5 | ![LangChain](https://img.shields.io/badge/LangChain.js-0.3+-00A86B?style=for-the-badge&logo=chainlink&logoColor=white)
  6 | ![Google Gemini](https://img.shields.io/badge/Google%20Gemini-API-4285F4?style=for-the-badge&logo=google&logoColor=white)
  7 | ![PostgreSQL](https://img.shields.io/badge/PostgreSQL-15+-336791?style=for-the-badge&logo=postgresql&logoColor=white)
  8 | ![pgVector](https://img.shields.io/badge/pgVector-Extension-336791?style=for-the-badge&logo=postgresql&logoColor=white)
  9 | ![Docker](https://img.shields.io/badge/Docker-Compose-2496ED?style=for-the-badge&logo=docker&logoColor=white)
 10 | ![License](https://img.shields.io/badge/License-MIT-green?style=for-the-badge)
 11 | 
 12 | A complete **Retrieval-Augmented Generation (RAG)** application for intelligent PDF document search, built with TypeScript, Node.js, and modern AI technologies.
 13 | 
 14 | ## 📋 Table of Contents
 15 | 
 16 | - [Overview](#-overview)
 17 | - [Technologies Used](#-technologies-used)
 18 | - [Architecture](#-architecture)
 19 | - [Prerequisites](#-prerequisites)
 20 | - [Setup](#-setup)
 21 | - [How to Run](#-how-to-run)
 22 | - [How to Use](#-how-to-use)
 23 | - [Example Questions](#-example-questions)
 24 | - [Project Structure](#-project-structure)
 25 | - [Features](#-features)
 26 | - [Troubleshooting](#-troubleshooting)
 27 | - [Complete Tutorial](#-complete-tutorial)
 28 | 
 29 | ## 🎯 Overview
 30 | 
 31 | This project implements a complete RAG system that allows natural language questions about PDF document content. The system processes documents, creates vector embeddings, stores them in a PostgreSQL database with pgVector, and answers questions using Google Gemini.
 32 | 
 33 | ### How It Works
 34 | 
 35 | 1. **Ingestion**: The system loads and processes PDF documents, splitting them into chunks
 36 | 2. **Vectorization**: Each chunk is converted into embeddings using Google Gemini
 37 | 3. **Storage**: Embeddings are stored in PostgreSQL with pgVector extension
 38 | 4. **Search**: When you ask a question, the system finds the most relevant chunks
 39 | 5. **Generation**: Google Gemini generates an answer based on the found context
 40 | 
 41 | ## 🛠 Technologies Used
 42 | 
 43 | ### Backend & Processing
 44 | - **Node.js 22+** - JavaScript runtime
 45 | - **TypeScript** - Typed programming language
 46 | - **LangChain.js** - Framework for AI applications
 47 | - **TSX** - TypeScript executor for development
 48 | 
 49 | ### Database & Vectors
 50 | - **PostgreSQL 15** - Relational database
 51 | - **pgVector** - Extension for vector search
 52 | - **Docker & Docker Compose** - Containerization
 53 | 
 54 | ### AI & Machine Learning
 55 | - **Google Gemini API** - Language model for embeddings and chat
 56 | - **models/embedding-001** - Model for creating embeddings
 57 | - **gemini-2.0-flash** - Model for response generation
 58 | 
 59 | ### Document Processing
 60 | - **pdf-parse** - PDF text extraction
 61 | - **RecursiveCharacterTextSplitter** - Intelligent text splitting
 62 | 
 63 | ## 🏗 Architecture
 64 | 
 65 | ```
 66 | ┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
 67 | │   PDF Document  │    │   Text Splitter   │    │   Embeddings    │
 68 | │                 │───▶│                  │───▶│   (Gemini)      │
 69 | │   document.pdf  │    │   LangChain.js   │    │                 │
 70 | └─────────────────┘    └──────────────────┘    └─────────────────┘
 71 |                                                         │
 72 |                                                         ▼
 73 | ┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
 74 | │  User Question  │    │   Similarity     │    │   PostgreSQL    │
 75 | │                 │───▶│   Search         │◀───│   + pgVector    │
 76 | │  "Which company │    │                  │    │                 │
 77 | │   has the       │    └──────────────────┘    └─────────────────┘
 78 | │   highest       │            │
 79 | │   revenue?"     │            ▼
 80 | └─────────────────┘            ▼
 81 |         ▲              ┌──────────────────┐
 82 |         │              │   Context        │
 83 |         │              │   Assembly       │
 84 |         │              │                  │
 85 |         │              └──────────────────┘
 86 |         │                      │
 87 |         │                      ▼
 88 |         │              ┌──────────────────┐
 89 |         │              │   Google Gemini  │
 90 |         └──────────────│   Response       │
 91 |                        │   Generation     │
 92 |                        └──────────────────┘
 93 | ```
 94 | 
 95 | ## 📋 Prerequisites
 96 | 
 97 | - **Node.js 22+** installed
 98 | - **Docker & Docker Compose** installed
 99 | - **Google AI Studio API Key** (free)
100 | - **Git** to clone the repository
101 | 
102 | ## ⚙️ Setup
103 | 
104 | ### 1. Clone the Repository
105 | 
106 | ```bash
107 | git clone https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini.git
108 | cd rag-search-ingestion-langchainjs-gemini
109 | ```
110 | 
111 | ### 2. Install Dependencies
112 | 
113 | ```bash
114 | npm install
115 | ```
116 | 
117 | ### 3. Configure Environment Variables
118 | 
119 | Create a `.env` file in the project root:
120 | 
121 | ```env
122 | GOOGLE_API_KEY=your_google_api_key_here
123 | GOOGLE_EMBEDDING_MODEL=models/embedding-001
124 | GOOGLE_CHAT_MODEL=gemini-2.0-flash
125 | DATABASE_URL=postgresql://postgres:postgres@localhost:5432/rag
126 | PG_VECTOR_COLLECTION_NAME=pdf_documents
127 | PDF_PATH=./document.pdf
128 | ```
129 | 
130 | ### 4. Get your Google API Key
131 | 
132 | 1. Visit [Google AI Studio](https://aistudio.google.com/)
133 | 2. Create a new API Key
134 | 3. Copy and paste it into the `.env` file
135 | 
136 | ## 🚀 How to Run
137 | 
138 | ### Step 1: Start the Database
139 | 
140 | ```bash
141 | docker-compose up -d
142 | ```
143 | 
144 | ### Step 2: Process the PDF (Ingestion)
145 | 
146 | ```bash
147 | npm run dev:ingest
148 | ```
149 | 
150 | ### Step 3: Start the Interactive Chat
151 | 
152 | ```bash
153 | npm run dev:chat
154 | ```
155 | 
156 | ## 💬 How to Use
157 | 
158 | After running the chat, you'll see the interface:
159 | 
160 | ```
161 | ============================================================
162 | 🤖 RAG CHAT - PDF Question and Answer System
163 | Powered by Google Gemini + LangChain + pgVector
164 | ⚡ TypeScript + Node.js Implementation
165 | ============================================================
166 | 
167 | System ready! Type your question or "help" to see commands.
168 | 
169 | 💬 Ask a question: _
170 | ```
171 | 
172 | ### Special Commands
173 | 
174 | - `help` - Shows help and available commands
175 | - `status` - Checks system status
176 | - `clear` - Clears the screen
177 | - `exit` - Exits the chat
178 | 
179 | ## 🎯 Example Questions
180 | 
181 | ### Revenue Questions
182 | ```
183 | Which company had the highest revenue?
184 | What is Aliança Energia's revenue?
185 | List the top 5 companies by revenue
186 | Which companies earned more than 1 billion?
187 | ```
188 | 
189 | ### Company Questions
190 | ```
191 | How many companies are listed in the document?
192 | Which company was founded most recently?
193 | List companies founded in the 1990s
194 | Which company has the lowest revenue?
195 | ```
196 | 
197 | ### Analytical Questions
198 | ```
199 | Which sector has the most companies?
200 | Compare revenue between different sectors
201 | What is the average revenue of companies?
202 | How many companies were founded in each decade?
203 | ```
204 | 
205 | ### Specific Questions
206 | ```
207 | Are there any technology companies in the list?
208 | Which companies have "Sustainable" in their name?
209 | List energy sector companies
210 | Which automotive company has the highest revenue?
211 | ```
212 | 
213 | ## 📁 Project Structure
214 | 
215 | ```
216 | rag-search-ingestion-langchainjs-gemini/
217 | ├── src/
218 | │   ├── chat.ts              # Interactive chat interface
219 | │   ├── search.ts            # RAG pipeline and semantic search
220 | │   ├── ingest.ts            # PDF processing and ingestion
221 | │   └── google-client.ts     # Google Gemini API client
222 | ├── docker-compose.yml       # PostgreSQL + pgVector configuration
223 | ├── document.pdf            # Sample document
224 | ├── package.json            # Dependencies and scripts
225 | ├── tsconfig.json           # TypeScript configuration
226 | ├── .env                    # Environment variables (create)
227 | └── README.md              # This documentation
228 | ```
229 | 
230 | ### File Descriptions
231 | 
232 | - **`chat.ts`** - Interactive chat interface with readline
233 | - **`search.ts`** - Implements complete RAG pipeline (4 stages)
234 | - **`ingest.ts`** - PDF processing and ingestion
235 | - **`google-client.ts`** - Google Gemini API client
236 | 
237 | ## ✨ Features
238 | 
239 | ### 🔍 Intelligent Semantic Search
240 | - Finds relevant information even with synonyms
241 | - Contextual search using vector embeddings
242 | - Automatic relevance ranking
243 | 
244 | ### 🤖 Natural Responses
245 | - Responses in natural language
246 | - Based exclusively on PDF content
247 | - Context preserved during conversation
248 | 
249 | ### ⚡ Optimized Performance
250 | - Embedding cache in PostgreSQL
251 | - Ultra-fast vector search with pgVector
252 | - Asynchronous processing
253 | 
254 | ### 🛡️ Error Handling
255 | - Robust input validation
256 | - Fallbacks for API issues
257 | - User-friendly error messages
258 | 
259 | ## 🔧 Troubleshooting
260 | 
261 | ### Problem: "Error connecting to database"
262 | ```bash
263 | # Check if PostgreSQL is running
264 | docker ps
265 | 
266 | # Restart containers
267 | docker-compose down
268 | docker-compose up -d
269 | ```
270 | 
271 | ### Problem: "Google API Key invalid"
272 | 1. Check if the API Key is correct in `.env`
273 | 2. Confirm the API is active in Google AI Studio
274 | 3. Check for extra spaces or characters
275 | 
276 | ### Problem: "No documents found"
277 | ```bash
278 | # Run ingestion again
279 | npm run dev:ingest
280 | 
281 | # Check documents in database
282 | docker exec postgres_rag_ts psql -U postgres -d rag -c "SELECT COUNT(*) FROM pdf_documents;"
283 | ```
284 | 
285 | ### Problem: "429 Too Many Requests"
286 | - Wait a few minutes (quota limit)
287 | - Check your plan in Google AI Studio
288 | - Consider using a new API Key if available
289 | 
290 | ## 📊 Available Scripts
291 | 
292 | ```bash
293 | npm run build          # Compiles TypeScript to JavaScript
294 | npm run start          # Runs compiled version
295 | npm run dev:chat       # Interactive chat (development)
296 | npm run dev:ingest     # PDF ingestion (development)
297 | ```
298 | 
299 | ## 📚 Complete Tutorial
300 | 
301 | A detailed tutorial is available in the [tutorial/article-en.md](./tutorial/article-en.md) file. It covers everything from initial setup to complete execution of the RAG system, with step-by-step explanations and screenshots.
302 | 
303 | ## 🤝 Contributing
304 | 
305 | Contributions are welcome! Feel free to:
306 | 
307 | 1. Fork the project
308 | 2. Create a feature branch
309 | 3. Commit your changes
310 | 4. Create a Pull Request
311 | 
312 | ## 📝 License
313 | 
314 | This project is under the MIT license. See the `LICENSE` file for more details.
315 | 
316 | ## 👨‍💻 Author
317 | 
318 | - **Twitter** - [@glaucia86](https://twitter.com/glaucia86)
319 | - **LinkedIn** - [Glaucia Lemos](https://www.linkedin.com/in/glaucialemos/)
320 | - **YouTube** - [Glaucia Lemos](https://www.youtube.com/@GlauciaLemos)
321 | 
322 | ---
323 | 
324 | ⭐ **If this project was helpful, leave a star on GitHub!**


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🤖 RAG Search Ingestion - LangChain.js + Docker + Gemini
  2 | 
  3 | ![Node.js](https://img.shields.io/badge/Node.js-22+-339933?style=for-the-badge&logo=node.js&logoColor=white)
  4 | ![TypeScript](https://img.shields.io/badge/TypeScript-5.9+-3178C6?style=for-the-badge&logo=typescript&logoColor=white)
  5 | ![LangChain](https://img.shields.io/badge/LangChain.js-0.3+-00A86B?style=for-the-badge&logo=chainlink&logoColor=white)
  6 | ![Google Gemini](https://img.shields.io/badge/Google%20Gemini-API-4285F4?style=for-the-badge&logo=google&logoColor=white)
  7 | ![PostgreSQL](https://img.shields.io/badge/PostgreSQL-15+-336791?style=for-the-badge&logo=postgresql&logoColor=white)
  8 | ![pgVector](https://img.shields.io/badge/pgVector-Extension-336791?style=for-the-badge&logo=postgresql&logoColor=white)
  9 | ![Docker](https://img.shields.io/badge/Docker-Compose-2496ED?style=for-the-badge&logo=docker&logoColor=white)
 10 | ![License](https://img.shields.io/badge/License-MIT-green?style=for-the-badge)
 11 | ![Copilot Powered](https://img.shields.io/badge/Copilot-Powered%20by-blue?logo=github)
 12 | 
 13 | Uma aplicação completa de **Retrieval-Augmented Generation (RAG)** para busca inteligente em documentos PDF, construída com TypeScript, Node.js e tecnologias modernas de IA.
 14 | 
 15 | ## 📋 Índice
 16 | 
 17 | - [Visão Geral](#-visão-geral)
 18 | - [Tecnologias Utilizadas](#-tecnologias-utilizadas)
 19 | - [Arquitetura](#-arquitetura)
 20 | - [Pré-requisitos](#-pré-requisitos)
 21 | - [Configuração](#-configuração)
 22 | - [Como Executar](#-como-executar)
 23 | - [Como Usar](#-como-usar)
 24 | - [Exemplos de Perguntas](#-exemplos-de-perguntas)
 25 | - [Estrutura do Projeto](#-estrutura-do-projeto)
 26 | - [Funcionalidades](#-funcionalidades)
 27 | - [Troubleshooting](#-troubleshooting)
 28 | - [Tutorial Completo](#-tutorial-completo)
 29 | 
 30 | ## 🎯 Visão Geral
 31 | 
 32 | Este projeto implementa um sistema RAG completo que permite fazer perguntas em linguagem natural sobre o conteúdo de documentos PDF. O sistema processa documentos, cria embeddings vetoriais, armazena em um banco de dados PostgreSQL com pgVector e responde perguntas usando Google Gemini.
 33 | 
 34 | ### Como Funciona
 35 | 
 36 | 1. **Ingestão**: O sistema carrega e processa documentos PDF, dividindo-os em chunks
 37 | 2. **Vetorização**: Cada chunk é convertido em embeddings usando Google Gemini
 38 | 3. **Armazenamento**: Os embeddings são armazenados no PostgreSQL com extensão pgVector
 39 | 4. **Busca**: Quando você faz uma pergunta, o sistema encontra os chunks mais relevantes
 40 | 5. **Geração**: O Google Gemini gera uma resposta baseada no contexto encontrado
 41 | 
 42 | ## 🛠 Tecnologias Utilizadas
 43 | 
 44 | ### Backend & Processamento
 45 | - **Node.js 22+** - Runtime JavaScript
 46 | - **TypeScript** - Linguagem de programação tipada
 47 | - **LangChain.js** - Framework para aplicações de IA
 48 | - **TSX** - Executor TypeScript para desenvolvimento
 49 | 
 50 | ### Banco de Dados & Vetores
 51 | - **PostgreSQL 15** - Banco de dados relacional
 52 | - **pgVector** - Extensão para busca vetorial
 53 | - **Docker & Docker Compose** - Containerização
 54 | 
 55 | ### IA & Machine Learning
 56 | - **Google Gemini API** - Modelo de linguagem para embeddings e chat
 57 | - **models/embedding-001** - Modelo para criar embeddings
 58 | - **gemini-2.0-flash** - Modelo para geração de respostas
 59 | 
 60 | ### Processamento de Documentos
 61 | - **pdf-parse** - Extração de texto de PDFs
 62 | - **RecursiveCharacterTextSplitter** - Divisão inteligente de texto
 63 | 
 64 | ## 🏗 Arquitetura
 65 | 
 66 | ```
 67 | ┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
 68 | │   PDF Document  │    │   Text Splitter   │    │   Embeddings    │
 69 | │                 │───▶│                  │───▶│   (Gemini)      │
 70 | │   document.pdf  │    │   LangChain.js   │    │                 │
 71 | └─────────────────┘    └──────────────────┘    └─────────────────┘
 72 |                                                         │
 73 |                                                         ▼
 74 | ┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
 75 | │  User Question  │    │   Similarity     │    │   PostgreSQL    │
 76 | │                 │───▶│   Search         │◀───│   + pgVector    │
 77 | │  "Qual empresa  │    │                  │    │                 │
 78 | │   tem maior     │    └──────────────────┘    └─────────────────┘
 79 | │   faturamento?" │            │
 80 | └─────────────────┘            ▼
 81 |         ▲              ┌──────────────────┐
 82 |         │              │   Context        │
 83 |         │              │   Assembly       │
 84 |         │              │                  │
 85 |         │              └──────────────────┘
 86 |         │                      │
 87 |         │                      ▼
 88 |         │              ┌──────────────────┐
 89 |         │              │   Google Gemini  │
 90 |         └──────────────│   Response       │
 91 |                        │   Generation     │
 92 |                        └──────────────────┘
 93 | ```
 94 | 
 95 | ## 📋 Pré-requisitos
 96 | 
 97 | - **Node.js 22+** instalado
 98 | - **Docker & Docker Compose** instalados
 99 | - **Google AI Studio API Key** (gratuita)
100 | - **Git** para clonar o repositório
101 | 
102 | ## ⚙️ Configuração
103 | 
104 | ### 1. Clone o Repositório
105 | 
106 | ```bash
107 | git clone https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini.git
108 | cd rag-search-ingestion-langchainjs-gemini
109 | ```
110 | 
111 | ### 2. Instale as Dependências
112 | 
113 | ```bash
114 | npm install
115 | ```
116 | 
117 | ### 3. Configure as Variáveis de Ambiente
118 | 
119 | Crie um arquivo `.env` na raiz do projeto:
120 | 
121 | ```env
122 | # Google AI API Configuration
123 | GOOGLE_API_KEY=sua_api_key_aqui
124 | 
125 | # Database Configuration
126 | DB_HOST=localhost
127 | DB_PORT=5432
128 | DB_NAME=rag
129 | DB_USER=postgres
130 | DB_PASSWORD=postgres
131 | 
132 | # Vector Database Configuration
133 | VECTOR_DIMENSION=768
134 | ```
135 | 
136 | ### 4. Obtenha sua Google API Key
137 | 
138 | 1. Acesse [Google AI Studio](https://aistudio.google.com/)
139 | 2. Crie uma nova API Key
140 | 3. Copie e cole no arquivo `.env`
141 | 
142 | ## 🚀 Como Executar
143 | 
144 | ### Passo 1: Iniciar o Banco de Dados
145 | 
146 | ```bash
147 | docker-compose up -d
148 | ```
149 | 
150 | ### Passo 2: Processar o PDF (Ingestão)
151 | 
152 | ```bash
153 | npm run dev:ingest
154 | ```
155 | 
156 | ### Passo 3: Iniciar o Chat Interativo
157 | 
158 | ```bash
159 | npm run dev:chat
160 | ```
161 | 
162 | ## 💬 Como Usar
163 | 
164 | Após executar o chat, você verá a interface:
165 | 
166 | ```
167 | ============================================================
168 | 🤖 RAG CHAT - Sistema de Perguntas e Respostas em PDF
169 | Powered by Google Gemini + LangChain + pgVector
170 | ⚡ TypeScript + Node.js Implementation
171 | ============================================================
172 | 
173 | Sistema pronto! Digite sua pergunta ou "help" para ver comandos.
174 | 
175 | 💬 Faça uma pergunta: _
176 | ```
177 | 
178 | ### Comandos Especiais
179 | 
180 | - `help` - Mostra ajuda e comandos disponíveis
181 | - `status` - Verifica o status do sistema
182 | - `clear` - Limpa a tela
183 | - `exit` - Sai do chat
184 | 
185 | ## 🎯 Exemplos de Perguntas
186 | 
187 | ### Perguntas sobre Faturamento
188 | ```
189 | Qual empresa teve o maior faturamento?
190 | Qual o faturamento da empresa Aliança Energia?
191 | Liste as 5 empresas com maior receita
192 | Quais empresas faturaram mais de 1 bilhão?
193 | ```
194 | 
195 | ### Perguntas sobre Empresas
196 | ```
197 | Quantas empresas estão listadas no documento?
198 | Qual empresa foi fundada mais recentemente?
199 | Liste empresas fundadas na década de 1990
200 | Qual empresa tem o menor faturamento?
201 | ```
202 | 
203 | ### Perguntas Analíticas
204 | ```
205 | Qual setor tem mais empresas?
206 | Compare o faturamento entre diferentes setores
207 | Qual a média de faturamento das empresas?
208 | Quantas empresas foram fundadas em cada década?
209 | ```
210 | 
211 | ### Perguntas Específicas
212 | ```
213 | Existe alguma empresa de tecnologia na lista?
214 | Quais empresas têm "Sustentável" no nome?
215 | Liste empresas do setor de energia
216 | Qual empresa do setor automotivo tem maior faturamento?
217 | ```
218 | 
219 | ## 📁 Estrutura do Projeto
220 | 
221 | ```
222 | rag-search-ingestion-langchainjs-gemini/
223 | ├── src/
224 | │   ├── chat.ts              # Interface de chat interativo
225 | │   ├── search.ts            # Pipeline RAG e busca semântica
226 | │   ├── ingest.ts            # Processamento e ingestão de PDFs
227 | │   └── google-client.ts     # Cliente Google Gemini API
228 | ├── docker-compose.yml       # Configuração PostgreSQL + pgVector
229 | ├── document.pdf            # Documento de exemplo
230 | ├── package.json            # Dependências e scripts
231 | ├── tsconfig.json           # Configuração TypeScript
232 | ├── .env                    # Variáveis de ambiente (criar)
233 | └── README.md              # Esta documentação
234 | ```
235 | 
236 | ### Descrição dos Arquivos
237 | 
238 | - **`chat.ts`** - Interface principal do usuário com readline
239 | - **`search.ts`** - Implementa o pipeline RAG completo (4 etapas)
240 | - **`ingest.ts`** - Processa PDFs e cria embeddings vetoriais
241 | - **`google-client.ts`** - Integração com Google Gemini API
242 | 
243 | ## ✨ Funcionalidades
244 | 
245 | ### 🔍 Busca Semântica Inteligente
246 | 
247 | - Encontra informações relevantes mesmo com sinônimos
248 | - Busca contextual usando embeddings vetoriais
249 | - Ranking de relevância automático
250 | 
251 | ### 🤖 Respostas Naturais
252 | 
253 | - Respostas em português natural
254 | - Baseadas exclusivamente no conteúdo do PDF
255 | - Contexto preservado durante a conversa
256 | 
257 | ### ⚡ Performance Otimizada
258 | 
259 | - Cache de embeddings no PostgreSQL
260 | - Busca vetorial ultrarrápida com pgVector
261 | - Processamento assíncrono
262 | 
263 | ### 🛡️ Tratamento de Erros
264 | 
265 | - Validação de entrada robusta
266 | - Fallbacks para problemas de API
267 | - Mensagens de erro amigáveis
268 | 
269 | ## 🔧 Troubleshooting
270 | 
271 | ### Problema: "Error connecting to database"
272 | ```bash
273 | # Verificar se PostgreSQL está rodando
274 | docker ps
275 | 
276 | # Reiniciar containers
277 | docker-compose down
278 | docker-compose up -d
279 | ```
280 | 
281 | ### Problema: "Google API Key invalid"
282 | 1. Verifique se a API Key está correta no `.env`
283 | 2. Confirme que a API está ativa no Google AI Studio
284 | 3. Verifique se não há espaços ou caracteres extras
285 | 
286 | ### Problema: "No documents found"
287 | 
288 | ```bash
289 | # Executar ingestão novamente
290 | npm run dev:ingest
291 | 
292 | # Verificar documentos no banco
293 | docker exec postgres_rag_ts psql -U postgres -d rag -c "SELECT COUNT(*) FROM pdf_documents;"
294 | ```
295 | 
296 | ### Problema: "429 Too Many Requests"
297 | 
298 | - Aguarde alguns minutos (limite de quota)
299 | - Verifique seu plano no Google AI Studio
300 | - Consider usar uma nova API Key se disponível
301 | 
302 | ## 📊 Scripts Disponíveis
303 | 
304 | ```bash
305 | npm run build          # Compila TypeScript para JavaScript
306 | npm run start          # Executa versão compilada
307 | npm run dev:chat       # Chat interativo (desenvolvimento)
308 | npm run dev:ingest     # Ingestão de PDF (desenvolvimento)
309 | ```
310 | 
311 | ## 📚 Tutorial Completo
312 | 
313 | Um tutorial detalhado está disponível no arquivo [tutorial/article.md](./tutorial/article.md). Ele cobre desde a configuração inicial até a execução completa do sistema RAG, com explicações passo a passo e capturas de tela.
314 | 
315 | ## 🤝 Contribuição
316 | 
317 | Contribuições são bem-vindas! Sinta-se à vontade para:
318 | 
319 | 1. Fazer fork do projeto
320 | 2. Criar uma feature branch
321 | 3. Fazer commit das mudanças
322 | 4. Criar um Pull Request
323 | 
324 | ## 📝 Licença
325 | 
326 | Este projeto está sob a licença MIT. Veja o arquivo `LICENSE` para mais detalhes.
327 | 
328 | ## 👨‍💻 Autor
329 | 
330 | - **Twitter** - [@glaucia86](https://twitter.com/glaucia86)
331 | - **LinkedIn** - [Glaucia Lemos](https://www.linkedin.com/in/glaucialemos/)
332 | - **YouTube** - [Glaucia Lemos](https://www.youtube.com/@GlauciaLemos) 
333 | 
334 | ---
335 | 
336 | ⭐ **Se este projeto foi útil, deixe uma estrela no GitHub!**
337 | 


--------------------------------------------------------------------------------
/tutorial/article-en.md:
--------------------------------------------------------------------------------
   1 | # Complete RAG System: Zero to Hero with TypeScript, Docker, Google Gemini and LangChain.js
   2 | 
   3 | ![alt text](./resource/rag-docker-ts-langchain.jpg)
   4 | 
   5 | The implementation of Retrieval-Augmented Generation (RAG) systems represents one of the most promising approaches to solve the fundamental limitations of modern Large Language Models. This article presents a complete journey in building a robust and scalable RAG system, using **[TypeScript](https://www.typescriptlang.org/)** as the development foundation, **[Docker](https://www.docker.com/)** for infrastructure orchestration, **[Google Gemini](https://ai.google.dev/gemini-api/docs/quickstart?hl=pt-br)** for artificial intelligence, and **[LangChain.js](https://js.langchain.com/docs/introduction/)** as the integration framework.
   6 | 
   7 | Our solution allows users to ask natural language questions about PDF documents, combining advanced semantic search with precise contextual answer generation. The system demonstrates how to integrate cutting-edge technologies to create practical and scalable AI applications, covering everything from document extraction and processing to the generation of contextually relevant responses.
   8 | 
   9 | The main technologies that form the backbone of this implementation include Node.js version 22 or higher for modern JavaScript runtime, TypeScript 5.9 or higher for robust static typing, LangChain.js 0.3 or higher as AI orchestration framework, Google Gemini API for embeddings and text generation, PostgreSQL 15 or higher with the pgVector extension for vectorial storage and search, and Docker for containerization and simplified deployment.
  10 | 
  11 | > Note: as many already know, I'm taking the **[MBA in Software Engineering in A.I at FullCycle](https://ia.fullcycle.com.br/mba-ia/?utm_source=google_search&utm_campaign=search_mba-arquitetura&utm_medium=curso_especifico&utm_content=search_mba-arquitetura&gad_source=1&gad_campaignid=21917349974&gclid=Cj0KCQjww4TGBhCKARIsAFLXndQejvz0K1XTOHQ3CSglzOlQfVH64T2CS1qZnwkiyChx0HoXzaK4KY0aAosOEALw_wcB)**, and this article is based on one of the practical projects from the course. I'm not doing promotion, just sharing the knowledge learned so that others can benefit too. But if you want to know more about the MBA, click on the previous link.
  12 | 
  13 | ## Understanding RAG and its fundamental importance
  14 | 
  15 | ### The Challenge of Traditional LLMs
  16 | 
  17 | Large Language Models like GPT, Claude, and Gemini have revolutionized natural language processing, but face limitations that prevent their direct application in business and specialized scenarios. The knowledge of these models remains static, being limited to training data up to a specific date, creating a temporal gap that can be critical in domains where updated information is essential.
  18 | 
  19 | Additionally, these models tend to produce hallucinations, inventing information when they don't have sufficient knowledge about a topic. This characteristic can be particularly problematic in applications that require factual precision. LLMs also lack specific context about companies' internal data or specialized documents, limiting their usefulness in scenarios where specialized knowledge is necessary.
  20 | 
  21 | The impossibility of post-training updates represents another significant obstacle. Once trained, a model cannot learn new facts or incorporate updated information without a complete retraining process, which is costly and complex.
  22 | 
  23 | ## RAG as an elegant architectural solution
  24 | 
  25 | Retrieval-Augmented Generation emerges as an architecture that elegantly solves these limitations through the combination of two fundamental components.
  26 | 
  27 | - **The Retrieval component:** functions as an intelligent search system that finds relevant information in an external knowledge base.
  28 | 
  29 | - **The Generation component:** uses an LLM to generate responses based exclusively on the retrieved context, ensuring that responses are grounded in verifiable information.
  30 | 
  31 | The processing flow follows a logical sequence where a user query is converted into a vector embedding, which is then used for similarity search in the vector database. The most relevant documents are retrieved and concatenated into a context, which is provided to the LLM along with the original question for final response generation.
  32 | 
  33 | ## Transformative technical advantages
  34 | 
  35 | The RAG architecture offers factuality through responses based on verifiable sources, eliminating the need to rely exclusively on the model's internal knowledge. Updates are guaranteed as the knowledge base can be updated without needing to retrain the model, allowing incorporation of new documents and information in real-time.
  36 | 
  37 | Transparency is a fundamental characteristic, as it allows tracking the sources of information used in response generation. Cost-effectiveness is significant, as it avoids the need for model fine-tuning, which requires massive computational resources and specialized technical expertise.
  38 | 
  39 | ## System architecture: comprehensive technical vision
  40 | 
  41 | ### Detailed high-level architecture
  42 | 
  43 | The RAG system architecture can be visualized as a processing pipeline that transforms PDF documents into a searchable knowledge base and uses this base to answer natural language questions. The process begins with a PDF document that goes through text extraction, followed by intelligent segmentation using LangChain.js. The resulting segments are converted into vector embeddings through the Gemini model.
  44 | 
  45 | > Note: although the article focuses on PDF files, in a RAG application, we could use any data source, such as: relational databases, NoSQL, APIs, Word documents, Excel spreadsheets, among others.
  46 | 
  47 | These embeddings are stored in PostgreSQL with the **[pgVector](https://www.postgresql.org/about/news/pgvector-070-released-2852/)** extension, creating a searchable knowledge base. When a user asks a question, it is converted into an embedding and used for similarity search in the vector database. The most relevant documents are retrieved and assembled into context, which is then sent to Google Gemini along with the question for final response generation.
  48 | 
  49 | ### After all, what are embeddings?
  50 | 
  51 | Embeddings are numerical representations of data, such as text or images, in a high-dimensional vector space. They capture the semantic meaning of data, allowing machines to understand and process information more effectively. In the context of RAG, embeddings are used to transform queries and documents into vectors that can be compared to find similarities.
  52 | 
  53 | - Example:
  54 | 
  55 | ```text
  56 | "cat" -> [0.1, 0.3, 0.5, ...]
  57 | "dog" -> [0.2, 0.4, 0.6, ...]
  58 | ```
  59 | 
  60 | I recommend the official Gemini documentation that explains embeddings in more detail: **[Embeddings](https://ai.google.dev/gemini-api/docs/embeddings?hl=pt-br)**
  61 | 
  62 | ## Technological components in depth
  63 | 
  64 | To keep the application simple and easy to run, I used interfaces that utilize Node.js with TypeScript for runtime and robust static typing. The Readline Interface provides an interactive CLI for testing and demonstrations, allowing natural interaction with the system.
  65 | 
  66 | For document processing, we use the following libraries:
  67 | 
  68 | - **[LangChain.js](https://js.langchain.com/docs/introduction/):** serves as the main framework for LLM applications, offering high-level abstractions for common tasks.
  69 | 
  70 | - **[RecursiveCharacterTextSplitter](https://js.langchain.com/docs/concepts/text_splitters/):** implements intelligent chunking algorithm that preserves semantic context.
  71 | 
  72 | - **[PDF-Parse](https://www.npmjs.com/package/pdf-parse):** performs clean text extraction from PDF documents.
  73 | 
  74 | Embeddings and AI are managed through the Google Gemini API, using the embedding-001 model for generating 768-dimension embeddings and **[gemini-2.0-flash](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-0-flash?hl=pt-br)** for optimized response generation.
  75 | 
  76 | The vector database combines _PostgreSQL 15_ or higher as a robust relational database with _pgVector_ as an extension for efficient vector search. _HNSW Indexing_ implements approximate search algorithm that offers performance for searches in milliseconds even with large data volumes.
  77 | 
  78 | The infrastructure uses _Docker Compose_ for container orchestration, simplifying deployment and dependency management. Environment Variables provide flexible and secure configuration.
  79 | 
  80 | ### What is HNSW Indexing?
  81 | 
  82 | _HNSW Indexing_ stands for _Hierarchical Navigable Small World Graph Indexing._
  83 | It's a technique widely used in approximate nearest neighbor search _(Approximate Nearest Neighbor Search – ANN)_ in vector databases, such as when you need to retrieve embeddings of text, images, or audio quickly.
  84 | 
  85 | #### How does it work?
  86 | 
  87 | - It organizes vectors in a hierarchical graph structure.
  88 | 
  89 | - In upper levels, you have more general connections between vectors, which allow "long jumps" through the search space.
  90 | 
  91 | - As you go down levels, the graphs become denser and more connected, allowing more precise and local searches.
  92 | 
  93 | - This creates a balance between speed (quick jumps between regions) and precision (fine-tuning at lower levels).
  94 | 
  95 | #### Why is it important?
  96 | 
  97 | - **High performance:** can search neighbors in millions of vectors with low latency.
  98 | 
  99 | - **Scalability:** it's efficient in both memory and time, even on large databases.
 100 | 
 101 | - **Common use:** it's the default index in various vector database libraries, such as FAISS (Facebook AI Similarity Search), Milvus, Weaviate, and Pinecone.
 102 | 
 103 | ### Practical example
 104 | 
 105 | Imagine you have 10 million document embeddings. If you were to compare each query with all of them, it would be unfeasible.
 106 | 
 107 | With HNSW, you can find the semantically closest documents in milliseconds, without going through all vectors.
 108 | 
 109 | I won't go into details about HNSW Indexing, but if you want to take a look at a practical implementation using TypeScript, here's the link to the project repository I created: **[HNSW + Gemini + LangChain.js - Clean Architecture](https://github.com/glaucia86/hnsw-gemini-langchainjs)**. In another article, I can detail more about HNSW Indexing and break this implementation into parts to make it easier to understand.
 110 | 
 111 | ## Detailed RAG Pipeline
 112 | 
 113 | The ingestion pipeline follows the sequence:
 114 | 
 115 | > PDF → Text Extraction → Chunking → Embeddings → Vector Storage.
 116 | 
 117 | Each step is optimized to preserve maximum semantic information while preparing data for efficient search.
 118 | 
 119 | The query pipeline executes:
 120 | 
 121 | > User Query → Query Embedding → Similarity Search → Context Assembly → LLM Generation → Response.
 122 | 
 123 | This process ensures that each response is grounded in specific evidence from the processed documents.
 124 | 
 125 | ## Development Environment Configuration
 126 | 
 127 | ### Essential Technical Prerequisites
 128 | 
 129 | The development environment requires the following minimum versions:
 130 | 
 131 | - **Node.js version 22.0.0 or higher** - For support of the latest JavaScript features and optimized performance
 132 | - **NPM version 10.0.0 or higher** - Necessary for modern dependency management
 133 | - **Docker version 24.0.0 or higher** - Ensures compatibility with advanced containerization features
 134 | - **Git version 2.40.0 or higher** - Essential for version control
 135 | 
 136 | To verify the installed versions, run the following commands in your terminal:
 137 | 
 138 | ```bash
 139 | node --version    # v22.0.0+
 140 | npm --version     # 10.0.0+
 141 | docker --version  # 24.0.0+
 142 | git --version     # 2.40.0+
 143 | ```
 144 | 
 145 | ## Complete Project Initialization
 146 | 
 147 | The project structure starts with creating a main directory and subdirectory for source code:
 148 | 
 149 | ```bash
 150 | mkdir rag-system-typescript && cd rag-system-typescript
 151 | mkdir src
 152 | ```
 153 | 
 154 | Node.js initialization is done through the command:
 155 | 
 156 | ```bash
 157 | npm init -y
 158 | ```
 159 | 
 160 | This command creates the `package.json` file with default configurations.
 161 | 
 162 | Production dependencies include essential packages for system functionality:
 163 | 
 164 | ```bash
 165 | npm install @google/generative-ai @langchain/core @langchain/community @langchain/textsplitters dotenv pg uuid
 166 | ```
 167 | 
 168 | These libraries provide Google AI integration, LangChain framework, environment variable manipulation, PostgreSQL connection, and unique identifier generation.
 169 | 
 170 | Development dependencies ensure a robust development experience:
 171 | 
 172 | ```bash
 173 | npm install -D @types/node @types/pg @types/pdf-parse tsx typescript
 174 | ```
 175 | 
 176 | These include TypeScript type definitions, TypeScript compiler, and tsx development executor.
 177 | 
 178 | ## Advanced TypeScript Configuration
 179 | 
 180 | The `tsconfig.json` file defines compilation configurations that optimize for modern development and performance.
 181 | 
 182 | <details><summary><b>tsconfig.json</b></summary>
 183 | <br/>
 184 | 
 185 | ```json
 186 | {
 187 |   "compilerOptions": {
 188 |     "target": "ES2022",
 189 |     "module": "ESNext", 
 190 |     "moduleResolution": "node",
 191 |     "outDir": "./dist",           
 192 |     "rootDir": "./src",         
 193 |     "strict": true,
 194 |     "esModuleInterop": true,
 195 |     "skipLibCheck": true,
 196 |     "forceConsistentCasingInFileNames": true,
 197 |     "resolveJsonModule": true,
 198 |     "allowSyntheticDefaultImports": true,
 199 |     "experimentalDecorators": true,
 200 |     "emitDecoratorMetadata": true,
 201 |     "declaration": true,
 202 |     "declarationMap": true,
 203 |     "sourceMap": true,
 204 |     "types": ["node"],
 205 |     "lib": ["ES2022", "DOM"]
 206 |   },
 207 |   "include": [
 208 |     "src/**/*"
 209 |   ],
 210 |   "exclude": [
 211 |     "node_modules",
 212 |     "dist",
 213 |     "**/*.test.ts",
 214 |     "**/*.spec.ts"
 215 |   ],
 216 |   "ts-node": {
 217 |     "esm": true
 218 |   }
 219 | }
 220 | ```
 221 | 
 222 | </details>
 223 | <br/>
 224 | 
 225 | ## Intelligent Automation Scripts
 226 | 
 227 | The scripts in `package.json` automate common tasks:
 228 | 
 229 | ```json
 230 |   "scripts": {
 231 |     "build": "tsc",
 232 |     "start": "npm run build && node dist/chat.js",
 233 |     "ingest": "npm run build && node dist/ingest.js",
 234 |     "dev:chat": "tsx src/chat.ts",
 235 |     "dev:ingest": "tsx src/ingest.ts"
 236 |   },
 237 | ```
 238 | 
 239 | ## Infrastructure: PostgreSQL + pgVector
 240 | 
 241 | ### Theoretical Foundations of Vector Databases
 242 | 
 243 | Mathematical embeddings represent a revolution in how computers process and understand natural language. Texts are converted into high-dimensionality vectors, where each dimension captures specific aspects of semantic meaning. For the _Gemini embedding-001_ model, each text is represented by 768 floating-point numbers.
 244 | 
 245 | Proximity in vector space represents semantic similarity, allowing mathematical algorithms to find related texts through distance calculations. For example, the phrases _"company revenue"_ and _"corporate income"_ would produce close vectors in multidimensional space.
 246 | 
 247 | _pgVector_ adds native vector capabilities to PostgreSQL, including vector data type for efficient storage, HNSW (Hierarchical Navigable Small World) indexes for fast search, and similarity operations like cosine distance, Euclidean distance, and inner product.
 248 | 
 249 | ## Advanced Docker Configuration
 250 | 
 251 | The `docker-compose.yml` file defines complete infrastructure for the RAG system. The PostgreSQL service uses the **pgvector/pgvector:pg17** image which includes PostgreSQL 17 with pre-installed pgVector extension.
 252 | 
 253 | <details><summary><b>docker-compose.yml</b></summary>
 254 | <br/>
 255 | 
 256 | ```yaml
 257 | services:
 258 |   # Main service: PostgreSQL with pgVector extension
 259 |   postgres:
 260 |     image: pgvector/pgvector:pg17
 261 |     container_name: postgres_rag_ts
 262 |     environment:
 263 |       POSTGRES_USER: postgres
 264 |       POSTGRES_PASSWORD: postgres  
 265 |       POSTGRES_DB: rag
 266 |     ports:
 267 |       - "5432:5432"
 268 |     volumes:
 269 |       # Data persistence
 270 |       - postgres_data:/var/lib/postgresql/data
 271 |     healthcheck:
 272 |       # Checks if the database is ready
 273 |       test: ["CMD-SHELL", "pg_isready -U postgres -d rag"]
 274 |       interval: 10s
 275 |       timeout: 5s
 276 |       retries: 5
 277 |     restart: unless-stopped
 278 | 
 279 |   # Auxiliary service: Initializes pgVector extension
 280 |   bootstrap_vector_ext:
 281 |     image: pgvector/pgvector:pg17
 282 |     depends_on:
 283 |       postgres:
 284 |         condition: service_healthy
 285 |     entrypoint: ["/bin/sh", "-c"]
 286 |     command: >
 287 |       PGPASSWORD=postgres
 288 |       psql "postgresql://postgres@postgres:5432/rag" -v ON_ERROR_STOP=1
 289 |       -c "CREATE EXTENSION IF NOT EXISTS vector;"
 290 |     restart: "no"
 291 | 
 292 | volumes:
 293 |   postgres_data:
 294 | ```
 295 | 
 296 | </details>
 297 | <br/>
 298 | 
 299 | The `bootstrap_vector_ext` service ensures that the pgVector extension is created automatically after PostgreSQL is operational. The healthcheck monitors database availability before initializing dependencies.
 300 | 
 301 | ## Infrastructure Initialization and Verification
 302 | 
 303 | Infrastructure initialization is done through the command:
 304 | 
 305 | ```bash
 306 | docker-compose up -d
 307 | ```
 308 | 
 309 | This command starts containers in daemon mode. Status verification is performed with:
 310 | 
 311 | ```bash
 312 | docker ps
 313 | ```
 314 | 
 315 | This command lists active containers. Logs can be monitored with:
 316 | 
 317 | ```bash
 318 | docker logs postgres_rag_ts
 319 | ```
 320 | 
 321 | This command allows identifying initialization problems.
 322 | 
 323 | ## Google Gemini Integration: Advanced AI Client
 324 | 
 325 | ### In-depth Theory of Embeddings
 326 | 
 327 | Embeddings represent one of the most significant innovations in natural language processing, converting discrete text representations into continuous vectors of real numbers. These vectors capture complex semantic relationships, allowing mathematical operations on linguistic concepts.
 328 | 
 329 | The 768-number dimensionality for the embedding-001 model offers sufficient space to represent subtle semantic nuances while maintaining computational efficiency. Close vectors in multidimensional space correspond to semantically similar texts, allowing mathematical similarity search.
 330 | 
 331 | Vector operations allow conceptual manipulation, where differences and sums of vectors can reveal analogical relationships. The classic example _"king" - "man" + "woman" ≈ "queen"_ demonstrates how embeddings capture abstract relational structures.
 332 | 
 333 | ### Robust Google Client Implementation
 334 | 
 335 | The Google client implementation encapsulates all communication with Gemini APIs, offering clean interface and robust error handling.
 336 | 
 337 | <details><summary><b>src/google-client.ts</b></summary>
 338 | <br/>
 339 | 
 340 | ```typescript
 341 | import { config } from 'dotenv';
 342 | import { GoogleGenerativeAI } from '@google/generative-ai';
 343 | import { Embeddings } from '@langchain/core/embeddings';
 344 | 
 345 | config();
 346 | 
 347 | export interface ChatMessage {
 348 |   role: 'system' | 'user' | 'assistant';
 349 |   content: string;
 350 | }
 351 | 
 352 | export class GoogleClient {
 353 |   private googleApiKey: string;
 354 |   private embeddingModel: string;
 355 |   private chatModel: string;
 356 |   private genAI: GoogleGenerativeAI;
 357 | 
 358 |   constructor() {
 359 |     this.googleApiKey = process.env.GOOGLE_API_KEY || '';
 360 |     this.embeddingModel = process.env.GOOGLE_EMBEDDING_MODEL || '';
 361 |     this.chatModel = process.env.GOOGLE_CHAT_MODEL || '';
 362 | 
 363 |     if (!this.googleApiKey) {
 364 |       throw new Error('Google API key is not set in environment variables.');
 365 |     }
 366 | 
 367 |     this.genAI = new GoogleGenerativeAI(this.googleApiKey);
 368 |   }
 369 | 
 370 |   async getEmbeddings(texts: string[]): Promise<number[][]> {
 371 |     const embeddings: number[][] = [];
 372 | 
 373 |     for(const text of texts) {
 374 |       try {
 375 |         const model = this.genAI.getGenerativeModel({ model: 'embedding-001' });
 376 |         const result = await model.embedContent(text);
 377 |         
 378 |         if (result.embedding && result.embedding.values) {
 379 |           embeddings.push(result.embedding.values);
 380 |         } else {
 381 |           console.log(`No embedding returned for text: ${text}`);
 382 |           const dummySize = 768;
 383 |           embeddings.push(new Array(dummySize).fill(0));
 384 |         }
 385 |       } catch (error) {
 386 |         console.log(`Error generating embedding: ${error}`);
 387 |         const dummySize = 768;
 388 |         embeddings.push(new Array(dummySize).fill(0));
 389 |       }
 390 |     }
 391 | 
 392 |     return embeddings;
 393 |   }
 394 | 
 395 |   async chatCompletions(messages: ChatMessage[], temperature: number = 0.1): Promise<string> {
 396 |     try {
 397 |       const model = this.genAI.getGenerativeModel({
 398 |         model: this.chatModel,
 399 |         generationConfig: {
 400 |           temperature,
 401 |           maxOutputTokens: 1000,
 402 |         }
 403 |       });
 404 | 
 405 |       let prompt = '';
 406 |       for (const message of messages) {
 407 |         const { role, content } = message;
 408 |         
 409 |         if (role === 'system') {
 410 |           prompt += `Instructions: ${content}\n\n`;
 411 |         } else if (role === 'user') {
 412 |           prompt += `${content}\n`;
 413 |         } else if (role === 'assistant') {
 414 |           prompt += `Assistant: ${content}\n`;
 415 |         }
 416 |       }
 417 | 
 418 |       const result = await model.generateContent(prompt);
 419 |       return result.response.text();
 420 |     } catch (error) {
 421 |       console.log(`Error generating chat completion: ${error}`);
 422 |       return 'Sorry, an error occurred while generating the response.';
 423 |     }
 424 |   }
 425 | }
 426 | ```
 427 | 
 428 | </details>
 429 | <br/>
 430 | 
 431 | The `GoogleClient` class manages configuration and communication with Gemini APIs. The `getEmbeddings` method processes texts in batches, implementing graceful error handling and fallback for failure cases. `chatCompletions` converts structured messages into prompts optimized for Gemini.
 432 | 
 433 | The `GoogleEmbeddings` class extends LangChain.js abstractions for seamless integration with existing frameworks.
 434 | 
 435 | <details><summary><b>src/google-embeddings.ts</b></summary><br/>
 436 | 
 437 | ```typescript
 438 | export class GoogleEmbeddings extends Embeddings {
 439 |   private client: GoogleClient;
 440 | 
 441 |   constructor() {
 442 |     super({});
 443 |     this.client = new GoogleClient();
 444 |   }
 445 | 
 446 |   async embedDocuments(texts: string[]): Promise<number[][]> {
 447 |     console.log(`Generating embeddings for ${texts.length} documents...`);
 448 | 
 449 |     const batchSize = 10; // Processing 10 texts at a time for better optimization
 450 |     const allEmbeddings: number[][] = [];
 451 | 
 452 |     for(let i = 0; i < texts.length; i += batchSize) {
 453 |       const batchTexts = texts.slice(i, i + batchSize);
 454 |       const batchEmbeddings = await this.client.getEmbeddings(batchTexts);
 455 |       allEmbeddings.push(...batchEmbeddings);
 456 | 
 457 |       console.log(`Batch ${Math.floor(i / batchSize) + 1}: ${batchTexts.length} processed texts`);  
 458 |     }
 459 | 
 460 |     return allEmbeddings;
 461 |   }
 462 | 
 463 |   // Method for embedding a single query
 464 |   async embedQuery(text: string): Promise<number[]> {
 465 |     const embeddings = await this.client.getEmbeddings([text]);
 466 |     return embeddings[0];
 467 |   }
 468 | }
 469 | 
 470 | // Factory function to create GoogleClient instances
 471 | export function getGoogleClient(): GoogleClient {
 472 |   return new GoogleClient();
 473 | }
 474 | ```
 475 | 
 476 | </details>
 477 | <br/>
 478 | 
 479 | ## Secure Environment Configuration
 480 | 
 481 | The `.env` file centralizes sensitive configuration, separating credentials from source code for security and deployment flexibility.
 482 | 
 483 | ```text
 484 | GOOGLE_API_KEY=your_google_api_key_here
 485 | GOOGLE_EMBEDDING_MODEL=models/embedding-001
 486 | GOOGLE_CHAT_MODEL=gemini-2.0-flash
 487 | DATABASE_URL=postgresql://postgres:postgres@localhost:5432/rag
 488 | PG_VECTOR_COLLECTION_NAME=pdf_documents
 489 | PDF_PATH=./document.pdf
 490 | ```
 491 | 
 492 | > Note: to create a Google Gemini API Key, follow the steps described in the official documentation: **[AI Studio - Google](https://aistudio.google.com/apikey)** and click on: `Create API Key`.
 493 | 
 494 | ## Ingestion System: PDF to Intelligent Vectors
 495 | 
 496 | ### Advanced Chunking Theory
 497 | 
 498 | Chunking represents one of the most critical aspects in RAG systems, determining the quality and relevance of responses. The fundamental challenge is that LLMs have limited context windows, while documents can be extensive, creating the need for intelligent segmentation.
 499 | 
 500 | The chunking strategy must balance context size with information specificity. Chunks that are too large may contain irrelevant information that dilutes relevance. Chunks that are too small may lack sufficient context for complete understanding.
 501 | 
 502 | The `RecursiveCharacterTextSplitter` (from LangChain.js) is very useful in textual documents, as it preserves the natural structure of paragraphs and sentences. In this case, parameters like `chunk_size` around 1,000 characters and `chunk_overlap` of 150–200 work as a good starting point, maintaining balance between context and specificity.
 503 | 
 504 | However, since this project works with _tabular PDF_, this strategy is not the most effective. For tables, we prefer to break the document line by line, ensuring that each record is an independent chunk. Additionally, we include the table header in each fragment to maintain semantic clarity. This way, overlap is unnecessary (kept at 0) and separators are adapted to prioritize line breaks.
 505 | 
 506 | This approach ensures that each tabular entry is preserved integrally and improves precision when retrieving information via RAG.
 507 | 
 508 | ## Detailed `RecursiveCharacterTextSplitter` Algorithm
 509 | 
 510 | The algorithm follows an intelligent fallback strategy that tries to break by natural separators before resorting to artificial breaks. First, it tries to break by paragraphs using double line breaks. If resulting chunks still exceed maximum size, then it breaks by simple lines. For still large chunks, it breaks by spaces between words. As a last resort, it breaks character by character.
 511 | 
 512 | This approach ensures that related information stays together whenever possible, preserving semantic coherence necessary for effective retrieval.
 513 | 
 514 | ## Complete Ingestion Implementation
 515 | 
 516 | The ingestion implementation combines PDF extraction, intelligent segmentation, embedding generation, and vector storage in an integrated pipeline.
 517 | 
 518 | <details><summary><b>src/ingest.ts</b></summary>
 519 | <br/>
 520 | 
 521 | ```typescript
 522 | import { config } from 'dotenv';
 523 | import { Document } from '@langchain/core/documents';
 524 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
 525 | import { GoogleEmbeddings } from './google-client';
 526 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
 527 | import { PDFLoader as LangChainPDFLoader } from '@langchain/community/document_loaders/fs/pdf';
 528 | 
 529 | config();
 530 | 
 531 | class PDFLoader {
 532 |   constructor(private filePath: string) {}
 533 | 
 534 |   async load(): Promise<Document[]> {
 535 |     try {
 536 |       console.log(`Reading PDF file: ${this.filePath}`);
 537 |       
 538 |       const langChainLoader = new LangChainPDFLoader(this.filePath);
 539 |       const documents = await langChainLoader.load();
 540 |       
 541 |       console.log(`PDF loaded successfully! Found ${documents.length} pages`);
 542 |       return documents;
 543 |     } catch (error) {
 544 |       console.error('Error loading PDF:', error);
 545 |       throw error;
 546 |     }
 547 |   }
 548 | 
 549 |   async ingestToVectorStore(): Promise<void> {
 550 |     try {
 551 |       console.log('Starting PDF ingestion process...');
 552 |       
 553 |       const rawDocuments = await this.load();
 554 |       console.log(`PDF loaded: ${rawDocuments.length} sections`);
 555 | 
 556 |       console.log('Splitting documents into chunks...');
 557 |       const textSplitter = new RecursiveCharacterTextSplitter({
 558 |         chunkSize: 400,
 559 |         chunkOverlap: 0,
 560 |       });
 561 | 
 562 |       const splitDocuments = await textSplitter.splitDocuments(rawDocuments);
 563 |       console.log(`Documents split into ${splitDocuments.length} chunks`);
 564 | 
 565 |       console.log('Initializing Google embeddings...');
 566 |       const embeddings = new GoogleEmbeddings();
 567 | 
 568 |       console.log('Connecting to PostgreSQL vector store...');
 569 |       const vectorStore = await PGVectorStore.initialize(embeddings, {
 570 |         postgresConnectionOptions: {
 571 |           connectionString: process.env.DATABASE_URL,
 572 |         },
 573 |         tableName: process.env.PG_VECTOR_COLLECTION_NAME || 'pdf_documents',
 574 |         columns: {
 575 |           idColumnName: 'id',
 576 |           vectorColumnName: 'vector',
 577 |           contentColumnName: 'content',
 578 |           metadataColumnName: 'metadata',
 579 |         },
 580 |       });
 581 | 
 582 |       console.log('Adding documents to vector store...');
 583 |       await vectorStore.addDocuments(splitDocuments);
 584 | 
 585 |       console.log('PDF ingestion completed successfully!');
 586 |       console.log(`Total chunks processed: ${splitDocuments.length}`);
 587 |       
 588 |       await vectorStore.end();
 589 |       
 590 |     } catch (error) {
 591 |       console.error('Error during PDF ingestion:', error);
 592 |       process.exit(1);
 593 |     }
 594 |   }
 595 | }
 596 | 
 597 | async function main() {
 598 |   const pdfPath = './document.pdf';
 599 |   const loader = new PDFLoader(pdfPath);
 600 |   await loader.ingestToVectorStore();
 601 | }
 602 | 
 603 | // Run ingestion
 604 | main();
 605 | ```
 606 | 
 607 | </details>
 608 | <br/>
 609 | 
 610 | The `PDFLoader` class encapsulates the entire ingestion process, from file loading to storage in the vector database. The `load` method uses LangChain.js PDFLoader for robust text extraction. `ingestToVectorStore` coordinates the complete processing pipeline.
 611 | 
 612 | ## Automatic PostgreSQL Schema
 613 | 
 614 | The `PGVectorStore` automatically creates an optimized schema for vector storage and search. The pdf_documents table includes:
 615 | 
 616 | - **id -** UUID primary key for unique identification
 617 | - **content -** Original text of the chunk extracted from PDF
 618 | - **vector -** 768-dimension embeddings generated by Gemini
 619 | - **metadata -** Structural information like page, source, and context
 620 | 
 621 | ```sql
 622 | CREATE TABLE pdf_documents (
 623 |   id UUID PRIMARY KEY,
 624 |   content TEXT,
 625 |   vector VECTOR(768),
 626 |   metadata JSONB
 627 | );
 628 | 
 629 | CREATE INDEX ON pdf_documents USING hnsw (vector vector_cosine_ops);
 630 | ```
 631 | 
 632 | The `HNSW index` optimizes vector search, offering logarithmic complexity versus traditional linear search.
 633 | 
 634 | ## RAG Search System: Intelligent Retrieval + Generation
 635 | 
 636 | ### Advanced Semantic Search Theory
 637 | 
 638 | The semantic search pipeline represents a fundamental transformation in how computational systems find relevant information. Unlike traditional keyword search, semantic search uses vector representations to capture conceptual meaning.
 639 | 
 640 | The process begins with converting the user's question into a vector embedding using the same model used during ingestion. This query embedding is then compared with all stored embeddings using mathematical similarity metrics. The HNSW algorithm accelerates this comparison, reducing complexity from O(n) to O(log n).
 641 | 
 642 | Results are ranked by _similarity score_, where lower values indicate greater similarity in cosine space. _Context assembly_ concatenates the most relevant chunks, creating rich context for response generation.
 643 | 
 644 | ## RAG Search System: Intelligent Retrieval + Generation
 645 | 
 646 | ### Advanced Semantic Search Theory
 647 | 
 648 | The semantic search pipeline represents a fundamental transformation in how computational systems find relevant information. Unlike traditional keyword search, semantic search uses vector representations to capture conceptual meaning.
 649 | 
 650 | The process begins with converting the user's question into a vector embedding using the same model used during ingestion. This query embedding is then compared with all stored embeddings using mathematical similarity metrics. The HNSW algorithm accelerates this comparison, reducing complexity from O(n) to O(log n).
 651 | 
 652 | Results are ranked by similarity score, where lower values indicate greater similarity in cosine space. Context assembly concatenates the most relevant chunks, creating rich context for response generation.
 653 | 
 654 | ## Anti-Hallucination Prompt Engineering
 655 | 
 656 | The prompt template implements sophisticated strategies to _prevent hallucinations_ and ensure response factuality. Explicit instructions emphasize exclusive use of provided context. Fallback response provides default answer for cases where information is not available. Low temperature of 0.1 reduces creativity and increases determinism. Negative examples demonstrate cases where the correct answer is "I don't know".
 657 | 
 658 | This approach ensures that the system always recognizes limitations of available knowledge, preferring to admit ignorance rather than invent information.
 659 | 
 660 | ## CLI Interface: Exceptional User Experience
 661 | 
 662 | ### User-Centered Design
 663 | 
 664 | The CLI interface was designed considering user experience principles applied to AI systems. Immediate feedback through progress indicators keeps users informed about ongoing operations. Special commands like `help, status, clear, and exit` offer intuitive control. Graceful error handling presents informative messages that guide users in problem resolution. Non-blocking asynchronous interface maintains responsiveness even during computationally intensive operations.
 665 | 
 666 | ### Interactive Interface Implementation
 667 | 
 668 | The implementation combines native Node.js readline with advanced command logic to create a fluid and intuitive experience.
 669 | 
 670 | <details><summary><b>src/chat.ts</b></summary>
 671 | <br/>
 672 | 
 673 | ```typescript
 674 | import { createInterface } from "readline";
 675 | import { searchPrompt, RAGSearch } from "./search";
 676 | 
 677 | // Function to print initial banner with system information
 678 | function printBanner(): void {
 679 |   console.log('='.repeat(60));
 680 |   console.log('RAG CHAT - PDF Question and Answer System');
 681 |   console.log('Powered by Google Gemini + LangChain + pgVector');
 682 |   console.log('⚡ TypeScript + Node.js Implementation');
 683 |   console.log('='.repeat(60));
 684 |   console.log("Special commands:");
 685 |   console.log("   • 'exit, quit, exit' - Closes the program");
 686 |   console.log("   • 'help' - Shows available commands");
 687 |   console.log("   • 'clear' - Clears the screen");
 688 |   console.log("   • 'status' - Checks system status");
 689 |   console.log('='.repeat(60));
 690 | }
 691 | 
 692 | // Function to print help instructions
 693 | function printHelp(): void {
 694 |   console.log('\n AVAILABLE COMMANDS:');
 695 |   console.log('   exit, quit, exit    - Closes the program');
 696 |   console.log('   help                 - Shows available commands');
 697 |   console.log('   clear               - Clears the screen');
 698 |   console.log('   status              - Checks system status');
 699 |   console.log('   [any text]         - Asks a question about the PDF');
 700 |   console.log('\n USAGE TIPS:');
 701 |   console.log('   • Ask specific questions about the PDF content');
 702 |   console.log('   • The system responds only based on the document');
 703 |   console.log('   • Out-of-context questions return "I don\'t have information"');
 704 |   console.log();
 705 | }
 706 | 
 707 | // Function to clear the console screen
 708 | function clearScreen(): void {
 709 |   console.clear();
 710 | }
 711 | 
 712 | async function checkStatus(searchSystem: RAGSearch | null): Promise<void> {
 713 |   console.log('\n RAG SYSTEM STATUS:');
 714 |   console.log('='.repeat(40));
 715 |   
 716 |   if (!searchSystem) {
 717 |     console.log('System: NOT INITIALIZED');
 718 |     console.log('\n TROUBLESHOOTING CHECKLIST:');
 719 |     console.log('   1. Is PostgreSQL running?');
 720 |     console.log('      → Command: docker compose up -d');
 721 |     console.log('   2. Has ingestion been executed?'); 
 722 |     console.log('      → Command: npm run ingest');
 723 |     console.log('   3. Is the API Key configured?');
 724 |     console.log('      → File: .env (GOOGLE_API_KEY)');
 725 |     console.log('   4. Are dependencies installed?');
 726 |     console.log('      → Command: npm install');
 727 |     return;
 728 |   }
 729 | 
 730 |   try {
 731 |     const systemStatus = await searchSystem.getSystemStatus();
 732 | 
 733 |     console.log('RAG System: OPERATIONAL');
 734 |     console.log('PostgreSQL Connection: OK');
 735 |     console.log('pgVector Extension: OK'); 
 736 |     console.log('Google Gemini API: OK');
 737 |     console.log(`Vector Database: ${systemStatus.isReady ? 'READY' : 'NOT READY'}`);
 738 | 
 739 |     if (systemStatus.chunksCount > 0) {
 740 |       console.log(`Available chunks: ${systemStatus.chunksCount}`);
 741 |     }
 742 | 
 743 |     console.log('\n System ready to answer questions!');
 744 |   } catch (error) {
 745 |     console.log('Status: PARTIALLY OPERATIONAL');
 746 |     console.log(`Error checking system status: ${error}`);
 747 |   }
 748 | 
 749 |   console.log('='.repeat(40));
 750 | }
 751 | 
 752 | // Main function to initialize RAG system and handle user input
 753 | async function main(): Promise<void> {
 754 |   console.log('STEP 6: Initializing the RAG Chat CLI Interface');
 755 | 
 756 |   printBanner();
 757 | 
 758 |   console.log('\n PHASE 1: INITIALIZING RAG SYSTEM');
 759 |   const searchSystem = await searchPrompt();
 760 | 
 761 |   if (!searchSystem) {
 762 |     console.log('\n CRITICAL ERROR: RAG system could not be initialized!');
 763 |     console.log('\n POSSIBLE CAUSES AND SOLUTIONS:');
 764 |     console.log('   1. PostgreSQL is not running');
 765 |     console.log('      → Solution: docker compose up -d');
 766 |     console.log('   2. Ingestion process has not been executed');
 767 |     console.log('      → Solution: npm run ingest');
 768 |     console.log('   3. GOOGLE_API_KEY is not configured or invalid');
 769 |     console.log('      → Solution: Configure in the .env file');
 770 |     console.log('   4. Node.js dependencies are not installed');
 771 |     console.log('      → Solution: npm install');
 772 |     console.log('   5. pgVector extension has not been created');
 773 |     console.log('      → Solution: Check Docker logs');
 774 | 
 775 |     process.exit(1);
 776 |   }
 777 | 
 778 |   console.log('PHASE 1: RAG system initialized successfully!\n');
 779 | 
 780 |   // PHASE 2: SETUP COMMAND LINE INTERFACE
 781 |   const rl = createInterface({
 782 |     input: process.stdin,
 783 |     output: process.stdout,
 784 |     prompt: '\n Ask a question: '
 785 |   });
 786 | 
 787 |   // Helper function to capture user input asynchronously
 788 |   const askQuestion = (prompt: string): Promise<string> => {
 789 |     return new Promise((resolve) => {
 790 |       rl.question(prompt, resolve);
 791 |     });
 792 |   };
 793 | 
 794 |   console.log('System ready! Type your question or "help" to see commands.');
 795 | 
 796 |   // PHASE 3: MAIN CHAT LOOP
 797 |   while(true) {
 798 |     try {
 799 |       // Capture user input
 800 |       const userInput = (await askQuestion('\n Ask a question: ')).trim();
 801 | 
 802 |       // COMMAND PROCESSING: Analyze whether it is a special command or a question
 803 |       const command = userInput.toLowerCase();
 804 | 
 805 |       // Exit commands
 806 |       if (['exit', 'quit', 'sair', 'q'].includes(command)) {
 807 |         console.log('\n Thank you for using RAG Chat. Goodbye!\n');
 808 |         console.log('System shutting down...');
 809 |         break;
 810 |       }
 811 | 
 812 |       // Help command
 813 |       if (['ajuda', 'help', 'h', '?'].includes(command)) {
 814 |         printHelp();
 815 |         continue;
 816 |       }
 817 | 
 818 |       // Clear screen command
 819 |       if (['limpar', 'clear', 'cls'].includes(command)) {
 820 |         clearScreen();
 821 |         printBanner();
 822 |         continue;
 823 |       }
 824 | 
 825 |       // Status command
 826 |       if (['status', 'info', 's'].includes(command)) {
 827 |         await checkStatus(searchSystem);
 828 |         continue;
 829 |       }
 830 | 
 831 |       // Validate empty input
 832 |       if (!userInput) {
 833 |         console.log('Empty input. Type a question or "help" to see commands.');
 834 |         continue;
 835 |       }
 836 | 
 837 |       // QUESTION PROCESSING: Forward the question to the RAG system
 838 |       console.log('\n Processing your question...');
 839 |       console.log('Searching PDF knowledge...');
 840 | 
 841 |       const startTime = Date.now();
 842 | 
 843 |       // Call the complete RAG pipeline
 844 |       const answer = await searchSystem.generateAnswer(userInput);
 845 | 
 846 |       const endTime = Date.now();
 847 |       const responseTime = ((endTime - startTime) / 1000).toFixed(2);
 848 | 
 849 |       // FORMATTED RESPONSE DISPLAY
 850 |       console.log('\n' + '='.repeat(80));
 851 |       console.log(`QUESTION: ${userInput}`);
 852 |       console.log('='.repeat(80));
 853 |       console.log(`🤖 RESPONSE:`);
 854 |       console.log(answer);
 855 |       console.log('='.repeat(80));
 856 |       console.log(`⚡ Response time: ${responseTime}s`);
 857 |     } catch (error) {
 858 |       // ERROR HANDLING
 859 |       if (error instanceof Error && error.message.includes('SIGINT')) {
 860 |         // Ctrl+C was pressed
 861 |         console.log('\n\n Interruption detected (Ctrl+C)');
 862 |         console.log('👋 Chat closed by user. See you next time!');
 863 |         break;
 864 |       } else {
 865 |         // Other errors
 866 |         console.log(`\n Unexpected error during processing:`);
 867 |         console.log(`   ${error}`);
 868 |         console.log('\n You can:');
 869 |         console.log('   • Try again with another question');
 870 |         console.log('   • Type "status" to check the system');
 871 |         console.log('   • Type "exit" to quit');
 872 |       }
 873 |     }
 874 |   }
 875 | 
 876 |   rl.close();
 877 | }
 878 | 
 879 | // EVENT HANDLERS: Operating system signal management
 880 | 
 881 | // Handler for Ctrl+C (SIGINT)
 882 | process.on('SIGINT', () => {
 883 |   console.log('\n\n Interrupt signal received (Ctrl+C)');
 884 |   console.log('Cleaning up resources...');
 885 |   console.log('RAG Chat closed. See you later!');
 886 |   process.exit(0);
 887 | });
 888 | 
 889 | // Handler for uncaught errors
 890 | process.on('uncaughtException', (error) => {
 891 |   console.error('\n Uncaught FATAL ERROR:', error);
 892 |   console.error('Restart the application: npm run start');
 893 |   process.exit(1);
 894 | });
 895 | 
 896 | // Handler for rejected promises
 897 | process.on('unhandledRejection', (reason, promise) => {
 898 |   console.error('\n Unhandled rejected promise:', reason);
 899 |   console.error('Promise:', promise);
 900 | });
 901 | 
 902 | // ENTRY POINT: Run the main function
 903 | main().catch((error) => {
 904 |   console.error('\n FATAL ERROR in main application:', error);
 905 |   console.error('Try restarting: npm run start');
 906 |   process.exit(1);
 907 | });
 908 | ```
 909 | 
 910 | </details>
 911 | <br/>
 912 | 
 913 | The `RAGSearch` class encapsulates complete search and generation functionality. `searchDocuments` executes vector search and returns formatted results with scores. `generateAnswer` orchestrates the complete RAG pipeline.
 914 | 
 915 | The `printBanner` function presents essential information about the system and available commands. `checkStatus` offers detailed component diagnostics, facilitating troubleshooting. The main loop processes commands and questions with robust error handling.
 916 | 
 917 | ## Comprehensive Execution and Validation
 918 | 
 919 | ### Optimized Execution Sequence
 920 | 
 921 | Execution follows a logical sequence that ensures correct initialization of all components. First, initialize infrastructure:
 922 | 
 923 | ```bash
 924 | docker-compose up -d
 925 | ```
 926 | 
 927 | This command brings up PostgreSQL with pgVector. Verify container status:
 928 | 
 929 | ```bash
 930 | docker ps
 931 | ```
 932 | 
 933 | This command confirms correct operation. Execute ingestion to process PDF documents:
 934 | 
 935 | ```bash
 936 | npm run dev:ingest
 937 | ```
 938 | 
 939 | Finally, start interactive chat for system interaction:
 940 | 
 941 | ```bash
 942 | npm run dev:chat
 943 | ```
 944 | 
 945 | ## Comprehensive Test Scenarios
 946 | 
 947 | The system supports various test scenarios that validate complete functionality. Questions within the PDF context should return responses based exclusively on processed content. Questions outside the context should result in the default response "I don't have the necessary information to answer your question." Special commands like status, help, and clear should work correctly.
 948 | 
 949 | ## Systematic Troubleshooting
 950 | 
 951 | Common problems have well-defined solutions that can be identified through specific error messages:
 952 | 
 953 | - **Error: "Google API key is not set"**: This error indicates the need to configure the GOOGLE_API_KEY environment variable in the .env file. Verify that the file contains the valid API key obtained from Google AI Studio.
 954 | 
 955 | - **Error: "Vector store not initialized"**: This message suggests that PostgreSQL is not operational or the ingestion process was not executed. Confirm that Docker containers are running and execute document ingestion.
 956 | 
 957 | - **Error: "No documents found"**: This problem indicates that the ingestion process needs to be executed to populate the vector database with processed PDF chunks.
 958 | 
 959 | - **Error: "Connection refused"**: This failure points to PostgreSQL being offline, resolvable by checking Docker container status and reinitializing infrastructure if necessary.
 960 | 
 961 | ## Advanced Production Considerations
 962 | 
 963 | ### Optimized Performance and Scalability
 964 | 
 965 | The implemented optimizations ensure adequate performance for production use. Batch processing during ingestion implements rate limiting for external APIs, avoiding throttling. Connection pooling in PostgreSQL allows multiple simultaneous connections. HNSW indexing offers sub-second search even with millions of vectors. Asynchronous operations maintain application responsiveness.
 966 | 
 967 | Performance metrics demonstrate system efficiency. Ingestion processes a 50-page PDF in approximately 30 seconds. Search returns results in 2-3 seconds per question. Throughput supports more than 100 questions per minute on modest hardware.
 968 | 
 969 | ## Robust Security and Reliability
 970 | 
 971 | Security implementations follow best practices for production applications. Environment variables isolate secrets from source code. Input validation and sanitization prevent injection attacks. Robust error handling prevents leakage of sensitive information. Graceful shutdown handling ensures proper resource cleanup.
 972 | 
 973 | Recommended monitoring includes structured logs using libraries like `Winston` or `Pino`. Performance metrics can be collected with `Prometheus`. Automatic health checks monitor component availability. Rate limiting per user prevents resource abuse. This remains a tip for future improvements.
 974 | 
 975 | ## Future Improvements Roadmap
 976 | 
 977 | The technical roadmap identifies evolution opportunities. Migration from CLI to REST API will facilitate integration with web applications. `React` or `Next.js` interface will offer modern visual experience. Multi-tenancy support will allow multiple users and documents. `Redis` cache for frequent responses will reduce latency. `OpenTelemetry` integration will provide complete observability.
 978 | 
 979 | ## References and Resources for Deep Dive
 980 | 
 981 | ### Project Documentation and Repository
 982 | 
 983 | The complete code for this RAG system is available in the official repository **[rag-search-ingestion-langchainjs-gemini](https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini)**, where you will find functional implementation, detailed installation instructions, usage examples, and complete documentation of all developed components. The repository includes Docker configuration files ready for production, automation scripts for development, and specific test cases that demonstrate practical application of the concepts presented in this article.
 984 | 
 985 | ### RAG Theoretical Foundations
 986 | 
 987 | For in-depth understanding of theoretical foundations, the original paper "**[Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks" by Lewis et al](https://dl.acm.org/doi/abs/10.5555/3495724.3496517)**. at the NeurIPS 2020 conference establishes the fundamental principles of RAG architecture. The research "**[Dense Passage Retrieval for Open-Domain Question Answering" by Karpukhin et al](https://arxiv.org/abs/2004.04906)**. explores advanced dense retrieval techniques that underpin modern semantic search systems. The work "**[In-Context Retrieval-Augmented Language Models](https://arxiv.org/abs/2302.00083)**" presents recent evolutions in dynamic context integration in language models.
 988 | 
 989 | ### Technologies and Frameworks
 990 | 
 991 | The official LangChain.js documentation at **[https://js.langchain.com/](https://js.langchain.com/)** offers complete guides on AI pipeline implementation, including specific tutorials on integration with different embedding providers and language models. The Google AI Developer Documentation at **[https://ai.google.dev/docs](https://ai.google.dev/docs)** provides detailed technical specifications about Gemini APIs, including rate limits, prompt engineering best practices, and performance optimizations.
 992 | 
 993 | For PostgreSQL and pgVector, the official documentation at **[https://github.com/pgvector/pgvector](https://github.com/pgvector/pgvector)** contains technical specifications about HNSW index implementation, performance configurations, and scaling strategies for large volumes of vector data. The PostgreSQL Documentation at **[https://www.postgresql.org/docs/](https://www.postgresql.org/docs/)** offers fundamentals on database administration, query optimization, and advanced configurations for high-performance applications.
 994 | 
 995 | ### Embedding Models and Vector Search
 996 | 
 997 | Deep understanding of embeddings can be expanded through the research "**[Attention Is All You Need](https://arxiv.org/abs/1706.03762)**" which introduces the Transformer architecture fundamental to modern embedding models. The paper "**[Efficient Estimation of Word Representations in Vector Space" by Mikolov et al](https://arxiv.org/abs/1301.3781)**. establishes mathematical foundations of semantic vector representations. For vector search algorithms, "**[Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs](https://arxiv.org/abs/1603.09320)**" details implementation and optimizations of the HNSW algorithm used by pgVector.
 998 | 
 999 | ### Prompt Engineering and Hallucination Control
1000 | 
1001 | The research "**[Constitutional AI: Harmlessness from AI Feedback](https://arxiv.org/abs/2212.08073)**" explores advanced techniques for behavior control in language models. "**[Chain-of-Thought Prompting Elicits Reasoning in Large Language Models](https://arxiv.org/abs/2201.11903)**" demonstrates prompt structuring strategies for complex reasoning. "**[Instruction Following with Large Language Models](https://arxiv.org/abs/2506.13734)**" offers insights on effective instruction design for RAG systems.
1002 | 
1003 | ### Practical Resources and Tutorials
1004 | 
1005 | LangChain Cookbook at **[https://github.com/langchain-ai/langchain/tree/master/cookbook](https://github.com/langchain-ai/langchain/tree/master/cookbook)** contains practical examples of implementing different RAG patterns. Pinecone Learning Center at **[https://www.pinecone.io/learn/](https://www.pinecone.io/learn/)** offers tutorials on vector databases and semantic search applications. Weaviate Documentation at **[https://weaviate.io/developers/weaviate/](https://weaviate.io/developers/weaviate/)** presents alternatives for vector storage and their technical specificities.
1006 | 
1007 | ## Author and Contributions
1008 | 
1009 | This project was developed by Glaucia Lemos, A.I Developer Specialist, who shares knowledge through multiple platforms. Her social media profiles include Twitter at **[https://twitter.com/glaucia86](https://twitter.com/glaucia86)** for technical updates and development insights, LinkedIn at **[https://www.linkedin.com/in/glaucialemos/](https://www.linkedin.com/in/glaucialemos/)** for professional networking and technical articles, and YouTube at **[https://www.youtube.com/@GlauciaLemos](https://www.youtube.com/@GlauciaLemos)** for video tutorials and technical talks about modern development.
1010 | 


--------------------------------------------------------------------------------
/tutorial/article.md:
--------------------------------------------------------------------------------
   1 | # Sistema RAG Completo: Zero to Hero com TypeScript, Docker, Google Gemini e LangChain.js
   2 | 
   3 | ![alt text](./resource/rag-docker-ts-langchain.jpg)
   4 | 
   5 | A implementação de sistemas de Retrieval-Augmented Generation (RAG) representa uma das abordagens mais promissoras para resolver as limitações fundamentais dos Large Language Models modernos. Este artigo apresenta uma jornada completa na construção de um sistema RAG robusto e escalável, utilizando **[TypeScript](https://www.typescriptlang.org/)** como base de desenvolvimento, **[Docker](https://www.docker.com/)** para orquestração de infraestrutura, **[Google Gemini](https://ai.google.dev/gemini-api/docs/quickstart?hl=pt-br)** para inteligência artificial e **[LangChain.js](https://js.langchain.com/docs/introduction/)** como framework de integração.
   6 | 
   7 | Nossa solução permite que usuários façam perguntas em linguagem natural sobre documentos PDF, combinando busca semântica avançada com geração de respostas contextuais precisas. O sistema demonstra como integrar tecnologias de ponta para criar aplicações de IA práticas e escaláveis, abordando desde a extração e processamento de documentos até a geração de respostas contextualmente relevantes.
   8 | 
   9 | As tecnologias principais que formam o backbone desta implementação incluem Node.js versão 22 ou superior para runtime JavaScript moderno, TypeScript 5.9 ou superior para tipagem estática robusta, LangChain.js 0.3 ou superior como framework de orquestração de IA, Google Gemini API para embeddings e geração de texto, PostgreSQL 15 ou superior com a extensão pgVector para armazenamento e busca vetorial, e Docker para containerização e implantação simplificada.
  10 | 
  11 | > observação: como muitos já sabem, estou fazendo o **[MBA em Engenheria de Software em A.I na FullCycle](https://ia.fullcycle.com.br/mba-ia/?utm_source=google_search&utm_campaign=search_mba-arquitetura&utm_medium=curso_especifico&utm_content=search_mba-arquitetura&gad_source=1&gad_campaignid=21917349974&gclid=Cj0KCQjww4TGBhCKARIsAFLXndQejvz0K1XTOHQ3CSglzOlQfVH64T2CS1qZnwkiyChx0HoXzaK4KY0aAosOEALw_wcB)**, e este artigo é baseado em um dos projetos práticos do curso. Não estou fazendo jabá, apenas compartilhando o conhecimento aprendido e para que outros possam se beneficiar também. Mas, caso queira saber mais sobre o MBA, clique no link anterior.
  12 | 
  13 | ## Compreendendo RAG e sua importância fundamental
  14 | 
  15 | ### O Desafio dos LLMs Tradicionais
  16 | 
  17 | Large Language Models como GPT, Claude e Gemini revolucionaram o processamento de linguagem natural, mas enfrentam limitações que impedem sua aplicação direta em cenários empresariais e especializados. O conhecimento destes modelos permanece estático, sendo limitado aos dados de treinamento até uma data específica, criando uma lacuna temporal que pode ser crítica em domínios onde informações atualizadas são essenciais.
  18 | 
  19 | Além disso, estes modelos tendem a produzir alucinações, inventando informações quando não possuem conhecimento suficiente sobre um tópico. Esta característica pode ser particularmente problemática em aplicações que exigem precisão factual. Os LLMs também carecem de contexto específico sobre dados internos de empresas ou documentos especializados, limitando sua utilidade em cenários onde conhecimento especializado é necessário.
  20 | 
  21 | A impossibilidade de atualização pós-treinamento representa outro obstáculo significativo. Uma vez treinado, um modelo não pode aprender novos fatos ou incorporar informações atualizadas sem um processo completo de retreinamento, que é custoso e complexo.
  22 | 
  23 | ## RAG como solução arquitetural elegante
  24 | 
  25 | Retrieval-Augmented Generation emerge como uma arquitetura que resolve elegantemente essas limitações através da combinação de dois componentes fundamentais. 
  26 | 
  27 | - **O componente de Retrieval (Recuperação):** funciona como um sistema de busca inteligente que encontra informações relevantes em uma base de conhecimento externa. 
  28 | 
  29 | - **O componente de Generation (Geração):** utiliza um LLM para gerar respostas baseadas exclusivamente no contexto recuperado, garantindo que as respostas sejam fundamentadas em informações verificáveis.
  30 | 
  31 | O fluxo de processamento segue uma sequência lógica onde uma consulta do usuário é convertida em embedding vetorial, que é então usado para busca por similaridade no banco vetorial. Os documentos mais relevantes são recuperados e concatenados em um contexto, que é fornecido ao LLM junto com a pergunta original para geração da resposta final.
  32 | 
  33 | ## Vantagens técnicas transformadoras
  34 | 
  35 | A arquitetura RAG oferece factualidade através de respostas baseadas em fontes verificáveis, eliminando a necessidade de confiar exclusivamente no conhecimento interno do modelo. A atualização é garantida pois a base de conhecimento pode ser atualizada sem necessidade de retreinar o modelo, permitindo incorporação de novos documentos e informações em tempo real.
  36 | 
  37 | A transparência é uma característica fundamental, pois permite rastrear as fontes das informações utilizadas na geração das respostas. A custo-efetividade é significativa, pois evita a necessidade de fine-tuning de modelos, que requer recursos computacionais massivos e expertise técnica especializada.
  38 | 
  39 | ## Arquitetura do sistema: visão técnica abragente
  40 | 
  41 | ### Arquitetura de alto nível detalhada
  42 | 
  43 | A arquitetura do sistema RAG pode ser visualizada como um pipeline de processamento que transforma documentos PDF  em uma base de conhecimento pesquisável e utiliza essa base para responder perguntas em linguagem natural. O processo começa com um documento PDF que passa por extração de texto, seguida por segmentação inteligente usando LangChain.js. Os segmentos resultantes são convertidos em embeddings vetoriais através do modelo Gemini.
  44 | 
  45 | > observação: embora o artigo enfoque em arquivos PDF, numa aplicação RAG, poderíamos utilizar qualquer fonte de dados, como: bancos de dados relacionais, NoSQL, APIs, documentos Word, planilhas Excel, entre outros.
  46 | 
  47 | Estes embeddings são armazenados em PostgreSQL com a extensão **[pgVector](https://www.postgresql.org/about/news/pgvector-070-released-2852/)**, criando uma base de conhecimento pesquisável. Quando um usuário faz uma pergunta, ela é convertida em embedding e usada para busca por similaridade no banco vetorial. Os documentos mais relevantes são recuperados e montados em contexto, que é então enviado para o Google Gemini junto com a pergunta para geração da resposta final.
  48 | 
  49 | ### Afinal, o que são embeddings?
  50 | 
  51 | Embeddings são representações numéricas de dados, como texto ou imagens, em um espaço vetorial de alta dimensão. Eles capturam o significado semântico dos dados, permitindo que máquinas compreendam e processem informações de maneira mais eficaz. No contexto de RAG, embeddings são usados para transformar consultas e documentos em vetores que podem ser comparados para encontrar similaridades.
  52 | 
  53 | - Exemplo: 
  54 | 
  55 | ```text
  56 | "gato" -> [0.1, 0.3, 0.5, ...]
  57 | "cachorro" -> [0.2, 0.4, 0.6, ...]
  58 | ```
  59 | 
  60 | Deixo a recomendação da documentação oficial do Gemini que explica com mais detalhes sobre embeddings: **[Embeddings](https://ai.google.dev/gemini-api/docs/embeddings?hl=pt-br)**
  61 | 
  62 | ## Componentes tecnológicos em profundidade
  63 | 
  64 | Para deixar a aplicação simples e fácil de executar, utilizei de interface que utilizam Node.js com TypeScript para runtime e tipagem estática robusta. A Readline Interface fornece uma CLI interativa para testes e demonstrações, permitindo interação natural com o sistema.
  65 | 
  66 | Para processamento de documentos, usamos as seguintes bibliotecas:
  67 | 
  68 | - **[LangChain.js](https://js.langchain.com/docs/introduction/):**  serve como framework principal para aplicações LLM, oferecendo abstrações de alto nível para tarefas comuns. 
  69 | 
  70 | - **[RecursiveCharacterTextSplitter](https://js.langchain.com/docs/concepts/text_splitters/):** implementa algoritmo inteligente de chunking que preserva contexto semântico. 
  71 | 
  72 | - **[PDF-Parse](https://www.npmjs.com/package/pdf-parse):** realiza extração limpa de texto de documentos PDF. 
  73 | 
  74 | Os embeddings e IA são gerenciados através da Google Gemini API, utilizando o modelo embedding-001 para geração de embeddings de 768 dimensões e **[gemini-2.0-flash](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-0-flash?hl=pt-br)** para geração de respostas otimizadas.
  75 | 
  76 | O banco de dados vetorial combina _PostgreSQL 15_ ou superior como banco relacional robusto com _pgVector_ como extensão para busca vetorial eficiente. _HNSW Indexing_ implementa algoritmo de busca aproximada que oferece performance para buscas em milissegundos mesmo em grandes volumes de dados.
  77 | 
  78 | A infraestrutura utiliza _Docker Compose_ para orquestração de containers, simplificando deployment e gerenciamento de dependências. Environment Variables proporcionam configuração flexível e segura.
  79 | 
  80 | ### O que é HNSW Indexing?
  81 | 
  82 | _HNSW Indexing_ significa _Hierarchical Navigable Small World Graph Indexing._
  83 | É uma técnica muito usada em busca aproximada por vizinhos mais próximos _(Approximate Nearest Neighbor Search – ANN)_ em bases vetoriais, como quando você precisa recuperar embeddings de texto, imagens ou áudio de forma rápida.
  84 | 
  85 | #### Como funciona?
  86 | 
  87 | - Ele organiza os vetores em uma estrutura de grafo hierárquico.
  88 | 
  89 | - Nos níveis superiores, você tem conexões mais gerais entre vetores, que permitem "pulos longos" pelo espaço de busca.
  90 | 
  91 | - Conforme vai descendo nos níveis, os grafos ficam mais densos e conectados, permitindo buscas mais precisas e locais.
  92 | 
  93 | - Isso cria um equilíbrio entre velocidade (saltos rápidos entre regiões) e precisão (ajuste fino nos níveis inferiores).
  94 | 
  95 | #### Por que é importante?
  96 | 
  97 | - **Alta performance:** consegue buscar vizinhos em milhões de vetores com baixa latência.
  98 | 
  99 | - **Escalabilidade:** é eficiente tanto em memória quanto em tempo, mesmo em bases grandes.
 100 | 
 101 | - **Uso comum:** é o índice padrão em várias bibliotecas de bases vetoriais, como FAISS (Facebook AI Similarity Search), Milvus, Weaviate e Pinecone.
 102 | 
 103 | ### Exemplo prático
 104 | 
 105 | Imagine que você tem 10 milhões de embeddings de documentos. Se fosse comparar cada consulta com todos, seria inviável.
 106 | 
 107 | Com HNSW, você consegue encontrar os documentos semanticamente mais próximos em milissegundos, sem percorrer todos os vetores.
 108 | 
 109 | Não estarei entrando em detalhes sobre o HNSW Indexing, mas caso queira dar uma olhada numa implementação prática usando TypeScript, deixo o link do repositório do projeto que criei: **[HNSW + Gemini + LangChain.js - Clean Architecture](https://github.com/glaucia86/hnsw-gemini-langchainjs)**. Num outro artigo, posso detalhar mais sobre o HNSW Indexing e quebrar em partes a essa implementação para que fique mais fácil de entender.
 110 | 
 111 | ## Pipeline RAG Detalhado
 112 | 
 113 | O pipeline de ingestão segue a sequência:
 114 | 
 115 | > PDF → Text Extraction → Chunking → Embeddings → Vector Storage.
 116 | 
 117 | Cada etapa é otimizada para preservar máxima informação semântica enquanto prepara os dados para busca eficiente.
 118 | 
 119 | O pipeline de consulta executa: 
 120 | 
 121 | > User Query → Query Embedding → Similarity Search → Context Assembly → LLM Generation → Response. 
 122 | 
 123 | Este processo garante que cada resposta seja fundamentada em evidências específicas dos documentos processados.
 124 | 
 125 | ## Configuração do Ambiente de Desenvolvimento
 126 | 
 127 | ### Pré-requisitos Técnicos Essenciais
 128 | 
 129 | O ambiente de desenvolvimento requer as seguintes versões mínimas:
 130 | 
 131 | - **Node.js versão 22.0.0 ou superior** - Para suporte às funcionalidades mais recentes do JavaScript e performance otimizada
 132 | - **NPM versão 10.0.0 ou superior** - Necessário para gerenciamento de dependências moderno
 133 | - **Docker versão 24.0.0 ou superior** - Garante compatibilidade com recursos de containerização avançados
 134 | - **Git versão 2.40.0 ou superior** - Essencial para controle de versão
 135 | 
 136 | ara verificar as versões instaladas, execute os seguintes comandos em seu terminal:
 137 | 
 138 | ```bash
 139 | node --version    # v22.0.0+
 140 | npm --version     # 10.0.0+
 141 | docker --version  # 24.0.0+
 142 | git --version     # 2.40.0+
 143 | ```
 144 | 
 145 | ## Inicialização Completa do Projeto
 146 | 
 147 | A estrutura do projeto começa com a criação de um diretório principal e subdiretório para código fonte:
 148 | 
 149 | ```bash
 150 | mkdir rag-system-typescript && cd rag-system-typescript
 151 | mkdir src
 152 | ```
 153 | 
 154 | A inicialização do Node.js é feita através do comando:
 155 | 
 156 | ```bash
 157 | npm init -y
 158 | ```
 159 | 
 160 | Este comando cria o arquivo `package.json` com configurações padrão.
 161 | 
 162 | As dependências de produção incluem pacotes essenciais para funcionalidade do sistema:
 163 | 
 164 | ```bash
 165 | npm install @google/generative-ai @langchain/core @langchain/community @langchain/textsplitters dotenv pg uuid
 166 | ```
 167 | 
 168 | Estas bibliotecas fornecem integração com Google AI, framework LangChain, manipulação de variáveis de ambiente, conexão PostgreSQL e geração de identificadores únicos.
 169 | 
 170 | As dependências de desenvolvimento garantem experiência de desenvolvimento robusta:
 171 | 
 172 | ```bash
 173 | npm install -D @types/node @types/pg @types/pdf-parse tsx typescript
 174 | ```
 175 | 
 176 | Estas incluem definições de tipos TypeScript, compilador TypeScript e executor de desenvolvimento tsx.
 177 | 
 178 | ## Configuração TypeScript Avançada
 179 | 
 180 | O arquivo `tsconfig.json` define configurações de compilação que otimizam para desenvolvimento moderno e performance.
 181 | 
 182 | <details><summary><b>tsonfig.json</b></summary>
 183 | <br/>
 184 | 
 185 | ```json
 186 | {
 187 |   "compilerOptions": {
 188 |     "target": "ES2022",
 189 |     "module": "ESNext", 
 190 |     "moduleResolution": "node",
 191 |     "outDir": "./dist",           
 192 |     "rootDir": "./src",         
 193 |     "strict": true,
 194 |     "esModuleInterop": true,
 195 |     "skipLibCheck": true,
 196 |     "forceConsistentCasingInFileNames": true,
 197 |     "resolveJsonModule": true,
 198 |     "allowSyntheticDefaultImports": true,
 199 |     "experimentalDecorators": true,
 200 |     "emitDecoratorMetadata": true,
 201 |     "declaration": true,
 202 |     "declarationMap": true,
 203 |     "sourceMap": true,
 204 |     "types": ["node"],
 205 |     "lib": ["ES2022", "DOM"]
 206 |   },
 207 |   "include": [
 208 |     "src/**/*"
 209 |   ],
 210 |   "exclude": [
 211 |     "node_modules",
 212 |     "dist",
 213 |     "**/*.test.ts",
 214 |     "**/*.spec.ts"
 215 |   ],
 216 |   "ts-node": {
 217 |     "esm": true
 218 |   }
 219 | }
 220 | ```
 221 | 
 222 | </details>
 223 | <br/>
 224 | 
 225 | ## Scripts de Automação Inteligentes
 226 | 
 227 | Os scripts no `package.json` automatizam tarefas comuns:
 228 | 
 229 | ```json
 230 |   "scripts": {
 231 |     "build": "tsc",
 232 |     "start": "npm run build && node dist/chat.js",
 233 |     "ingest": "npm run build && node dist/ingest.js",
 234 |     "dev:chat": "tsx src/chat.ts",
 235 |     "dev:ingest": "tsx src/ingest.ts"
 236 |   },
 237 | ```
 238 | 
 239 | ## Infraestrutura: PostgreSQL + pgVector
 240 | 
 241 | ### Fundamentos Teóricos dos Bancos Vetoriais
 242 | 
 243 | Embeddings matemáticos representam uma revolução na forma como computadores processam e compreendem linguagem natural. Textos são convertidos em vetores de alta dimensionalidade, onde cada dimensão captura aspectos específicos do significado semântico. Para o modelo _Gemini embedding-001_, cada texto é representado por 768 números de ponto flutuante.
 244 | 
 245 | A proximidade no espaço vetorial representa similaridade semântica, permitindo que algoritmos matemáticos encontrem textos relacionados através de cálculos de distância. Por exemplo, as frases _"empresa faturamento"_ e _"receita corporativa"_ produziriam vetores próximos no espaço multidimensional.
 246 | 
 247 | O _pgVector_ adiciona capacidades vetoriais nativas ao PostgreSQL, incluindo tipo de dados vector para armazenamento eficiente, índices HNSW (Hierarchical Navigable Small World) para busca rápida, e operações de similaridade como distância coseno, euclidiana e produto interno.
 248 | 
 249 | ## Configuração Docker Avançada
 250 | 
 251 | O arquivo `docker-compose.yml` define infraestrutura completa para o sistema RAG. O serviço PostgreSQL utiliza imagem **pgvector/pgvector:pg17** que inclui PostgreSQL 17 com extensão pgVector pré-instalada.
 252 | 
 253 | <details><summary><b>docker-compose.yml</b></summary>
 254 | <br/>
 255 | 
 256 | ```yaml
 257 | services:
 258 |   # Main service: PostgreSQL with pgVector extension
 259 |   postgres:
 260 |     image: pgvector/pgvector:pg17
 261 |     container_name: postgres_rag_ts
 262 |     environment:
 263 |       POSTGRES_USER: postgres
 264 |       POSTGRES_PASSWORD: postgres  
 265 |       POSTGRES_DB: rag
 266 |     ports:
 267 |       - "5432:5432"
 268 |     volumes:
 269 |       # Data persistence
 270 |       - postgres_data:/var/lib/postgresql/data
 271 |     healthcheck:
 272 |       # Checks if the database is ready
 273 |       test: ["CMD-SHELL", "pg_isready -U postgres -d rag"]
 274 |       interval: 10s
 275 |       timeout: 5s
 276 |       retries: 5
 277 |     restart: unless-stopped
 278 | 
 279 |   # Auxiliary service: Initializes pgVector extension
 280 |   bootstrap_vector_ext:
 281 |     image: pgvector/pgvector:pg17
 282 |     depends_on:
 283 |       postgres:
 284 |         condition: service_healthy
 285 |     entrypoint: ["/bin/sh", "-c"]
 286 |     command: >
 287 |       PGPASSWORD=postgres
 288 |       psql "postgresql://postgres@postgres:5432/rag" -v ON_ERROR_STOP=1
 289 |       -c "CREATE EXTENSION IF NOT EXISTS vector;"
 290 |     restart: "no"
 291 | 
 292 | volumes:
 293 |   postgres_data:
 294 | ```
 295 | 
 296 | </details>
 297 | <br/>
 298 | 
 299 | O serviço `bootstrap_vector_ext` garante que a extensão pgVector seja criada automaticamente após PostgreSQL estar operacional. O healthcheck monitora disponibilidade do banco antes de inicializar dependências.
 300 | 
 301 | ## Inicialização e Verificação da Infraestrutura
 302 | 
 303 | A inicialização da infraestrutura é feita através do comando:
 304 | 
 305 | ```bash
 306 | docker-compose up -d
 307 | ```
 308 | 
 309 | Este comando inicia containers em modo daemon. A verificação do status é realizada com:
 310 | 
 311 | ```bash
 312 | docker ps
 313 | ```
 314 | 
 315 | Este comando lista containers ativos. Os logs podem ser monitorados com:
 316 | 
 317 | ```bash
 318 | docker logs postgres_rag_ts
 319 | ```
 320 | 
 321 | Este comando permite identificar problemas de inicialização.
 322 | 
 323 | ## Integração Google Gemini: Cliente de IA Avançado
 324 | 
 325 | ### Teoria Aprofundada dos Embeddings
 326 | 
 327 | Embeddings representam uma das inovações mais significativas em processamento de linguagem natural, convertendo representações discretas de texto em vetores contínuos de números reais. Estes vetores capturam relações semânticas complexas, permitindo operações matemáticas sobre conceitos linguísticos.
 328 | 
 329 | A dimensionalidade de 768 números para o modelo embedding-001 oferece espaço suficiente para representar nuances semânticas sutis enquanto mantém eficiência computacional. Vetores próximos no espaço multidimensional correspondem a textos semanticamente similares, permitindo busca por similaridade matemática.
 330 | 
 331 | Operações vetoriais permitem manipulação conceitual, onde diferenças e somas de vetores podem revelar relações analógicas. O exemplo clássico _"rei" - "homem" + "mulher" ≈ "rainha"_ demonstra como embeddings capturam estruturas relacionais abstratas.
 332 | 
 333 | ### Implementação Robusta do Cliente Google
 334 | 
 335 | A implementação do cliente Google encapsula toda comunicação com APIs Gemini, oferecendo interface limpa e tratamento de erros robusto.
 336 | 
 337 | <details><summary><b>src/google-client.ts</b></summary>
 338 | <br/>
 339 | 
 340 | ```typescript
 341 | import { config } from 'dotenv';
 342 | import { GoogleGenerativeAI } from '@google/generative-ai';
 343 | import { Embeddings } from '@langchain/core/embeddings';
 344 | 
 345 | config();
 346 | 
 347 | export interface ChatMessage {
 348 |   role: 'system' | 'user' | 'assistant';
 349 |   content: string;
 350 | }
 351 | 
 352 | export class GoogleClient {
 353 |   private googleApiKey: string;
 354 |   private embeddingModel: string;
 355 |   private chatModel: string;
 356 |   private genAI: GoogleGenerativeAI;
 357 | 
 358 |   constructor() {
 359 |     this.googleApiKey = process.env.GOOGLE_API_KEY || '';
 360 |     this.embeddingModel = process.env.GOOGLE_EMBEDDING_MODEL || '';
 361 |     this.chatModel = process.env.GOOGLE_CHAT_MODEL || '';
 362 | 
 363 |     if (!this.googleApiKey) {
 364 |       throw new Error('Google API key is not set in environment variables.');
 365 |     }
 366 | 
 367 |     this.genAI = new GoogleGenerativeAI(this.googleApiKey);
 368 |   }
 369 | 
 370 |   async getEmbeddings(texts: string[]): Promise<number[][]> {
 371 |     const embeddings: number[][] = [];
 372 | 
 373 |     for(const text of texts) {
 374 |       try {
 375 |         const model = this.genAI.getGenerativeModel({ model: 'embedding-001' });
 376 |         const result = await model.embedContent(text);
 377 |         
 378 |         if (result.embedding && result.embedding.values) {
 379 |           embeddings.push(result.embedding.values);
 380 |         } else {
 381 |           console.log(`No embedding returned for text: ${text}`);
 382 |           const dummySize = 768;
 383 |           embeddings.push(new Array(dummySize).fill(0));
 384 |         }
 385 |       } catch (error) {
 386 |         console.log(`Error generating embedding: ${error}`);
 387 |         const dummySize = 768;
 388 |         embeddings.push(new Array(dummySize).fill(0));
 389 |       }
 390 |     }
 391 | 
 392 |     return embeddings;
 393 |   }
 394 | 
 395 |   async chatCompletions(messages: ChatMessage[], temperature: number = 0.1): Promise<string> {
 396 |     try {
 397 |       const model = this.genAI.getGenerativeModel({
 398 |         model: this.chatModel,
 399 |         generationConfig: {
 400 |           temperature,
 401 |           maxOutputTokens: 1000,
 402 |         }
 403 |       });
 404 | 
 405 |       let prompt = '';
 406 |       for (const message of messages) {
 407 |         const { role, content } = message;
 408 |         
 409 |         if (role === 'system') {
 410 |           prompt += `Instructions: ${content}\n\n`;
 411 |         } else if (role === 'user') {
 412 |           prompt += `${content}\n`;
 413 |         } else if (role === 'assistant') {
 414 |           prompt += `Assistant: ${content}\n`;
 415 |         }
 416 |       }
 417 | 
 418 |       const result = await model.generateContent(prompt);
 419 |       return result.response.text();
 420 |     } catch (error) {
 421 |       console.log(`Error generating chat completion: ${error}`);
 422 |       return 'Sorry, an error occurred while generating the response.';
 423 |     }
 424 |   }
 425 | }
 426 | ```
 427 | 
 428 | </details>
 429 | <br/>
 430 | 
 431 | A classe `GoogleClient` gerencia configuração e comunicação com APIs Gemini. O método `getEmbeddings` processa textos em lotes, implementando tratamento de erros gracioso e fallback para casos de falha. `chatCompletions` converte mensagens estruturadas em prompts otimizados para Gemini.
 432 | 
 433 | A classe `GoogleEmbeddings` estende abstrações LangChain.js para integração seamless com frameworks existentes.
 434 | 
 435 | 
 436 | <details><summary><b>src/google-embeddings.ts</b></summary><br/>
 437 | 
 438 | ```typescript
 439 | export class GoogleEmbeddings extends Embeddings {
 440 |   private client: GoogleClient;
 441 | 
 442 |   constructor() {
 443 |     super({});
 444 |     this.client = new GoogleClient();
 445 |   }
 446 | 
 447 |   async embedDocuments(texts: string[]): Promise<number[][]> {
 448 |     console.log(`Generating embeddings for ${texts.length} documents...`);
 449 | 
 450 |     const batchSize = 10; // Processing 10 texts at a time for a better optimization
 451 |     const allEmbeddings: number[][] = [];
 452 | 
 453 |     for(let i = 0; i < texts.length; i += batchSize) {
 454 |       const batchTexts = texts.slice(i, i + batchSize);
 455 |       const batchEmbeddings = await this.client.getEmbeddings(batchTexts);
 456 |       allEmbeddings.push(...batchEmbeddings);
 457 | 
 458 |       console.log(`Lot ${Math.floor(i / batchSize) + 1}: ${batchTexts.length} processed texts`);  
 459 |     }
 460 | 
 461 |     return allEmbeddings;
 462 |   }
 463 | 
 464 |   // Method for embedding a single query
 465 |   async embedQuery(text: string): Promise<number[]> {
 466 |     const embeddings = await this.client.getEmbeddings([text]);
 467 |     return embeddings[0];
 468 |   }
 469 | }
 470 | 
 471 | // Factory function to create a GoogleClient instances
 472 | export function getGoogleClient(): GoogleClient {
 473 |   return new GoogleClient();
 474 | }
 475 | ```
 476 | 
 477 | </details>
 478 | <br/>
 479 | 
 480 | ## Configuração de Ambiente Segura
 481 | 
 482 | O arquivo `.env` centraliza configuração sensível, separando credenciais do código fonte para segurança e flexibilidade de deployment.
 483 | 
 484 | ```text
 485 | GOOGLE_API_KEY=sua_google_api_key_aqui
 486 | GOOGLE_EMBEDDING_MODEL=models/embedding-001
 487 | GOOGLE_CHAT_MODEL=gemini-2.0-flash
 488 | DATABASE_URL=postgresql://postgres:postgres@localhost:5432/rag
 489 | PG_VECTOR_COLLECTION_NAME=pdf_documents
 490 | PDF_PATH=./document.pdf
 491 | ```
 492 | 
 493 | > observação: para criar uma API Key do Google Gemini, siga os passos descritos na documentação oficial: **[AI Studio - Google](https://aistudio.google.com/apikey)** e clique em: `Create API Key`.
 494 | 
 495 | ## Sistema de Ingestão: PDF para Vetores Inteligentes
 496 | 
 497 | ### Teoria Avançada do Chunking
 498 | 
 499 | O chunking representa um dos aspectos mais críticos em sistemas RAG, determinando qualidade e relevância das respostas. O desafio fundamental é que LLMs possuem janelas de contexto limitadas, enquanto documentos podem ser extensos, criando necessidade de segmentação inteligente.
 500 | 
 501 | A estratégia de chunking deve balançar tamanho de contexto com especificidade de informação. Chunks muito grandes podem conter informações irrelevantes que diluem a relevância. Chunks muito pequenos podem carecer de contexto suficiente para compreensão completa.
 502 | 
 503 | O `RecursiveCharacterTextSplitter` (do LangChain.js) é muito útil em documentos textuais, já que preserva a estrutura natural de parágrafos e frases. Nesse caso, parâmetros como `chunk_size` em torno de 1.000 caracteres e `chunk_overlap` de 150–200 funcionam como um bom ponto de partida, mantendo equilíbrio entre contexto e especificidade.
 504 | 
 505 | No entanto, como este projeto trabalha com _PDF tabular_, essa estratégia não é a mais eficaz. Para tabelas, preferimos quebrar o documento linha a linha, garantindo que cada registro seja um chunk independente. Além disso, incluímos o cabeçalho da tabela em cada fragmento para manter clareza semântica. Dessa forma, o overlap é desnecessário (mantido em 0) e os separadores são adaptados para priorizar quebras de linha.
 506 | 
 507 | Essa abordagem garante que cada entrada tabular seja preservada integralmente e melhora a precisão na hora de recuperar informações via RAG.
 508 | 
 509 | ## Algoritmo `RecursiveCharacterTextSplitter` detalhado
 510 | 
 511 | O algoritmo segue estratégia de fallback inteligente que tenta quebrar por separadores naturais antes de recorrer a quebras artificiais. Primeiro, tenta quebrar por parágrafos usando quebras duplas de linha. Se chunks resultantes ainda excedem tamanho máximo, então quebra por linhas simples. Para chunks ainda grandes, quebra por espaços entre palavras. Como último recurso, quebra caractere por caractere.
 512 | 
 513 | Esta abordagem garante que a informação relacionada permaneça junta sempre que possível, preservando coerência semântica necessária para recuperação eficaz.
 514 | 
 515 | ## Implementação Completa da Ingestão
 516 | 
 517 | A implementação da ingestão combina extração de PDF, segmentação inteligente, geração de embeddings e armazenamento vetorial em pipeline integrado.
 518 | 
 519 | <details><summary><b>src/ingest.ts</b></summary>
 520 | <br/>
 521 | 
 522 | ```typescript
 523 | import { config } from 'dotenv';
 524 | import { Document } from '@langchain/core/documents';
 525 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
 526 | import { GoogleEmbeddings } from './google-client';
 527 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
 528 | import { PDFLoader as LangChainPDFLoader } from '@langchain/community/document_loaders/fs/pdf';
 529 | 
 530 | config();
 531 | 
 532 | class PDFLoader {
 533 |   constructor(private filePath: string) {}
 534 | 
 535 |   async load(): Promise<Document[]> {
 536 |     try {
 537 |       console.log(`Reading PDF file: ${this.filePath}`);
 538 |       
 539 |       const langChainLoader = new LangChainPDFLoader(this.filePath);
 540 |       const documents = await langChainLoader.load();
 541 |       
 542 |       console.log(`PDF loaded successfully! Found ${documents.length} pages`);
 543 |       return documents;
 544 |     } catch (error) {
 545 |       console.error('Error loading PDF:', error);
 546 |       throw error;
 547 |     }
 548 |   }
 549 | 
 550 |   async ingestToVectorStore(): Promise<void> {
 551 |     try {
 552 |       console.log('Starting PDF ingestion process...');
 553 |       
 554 |       const rawDocuments = await this.load();
 555 |       console.log(`PDF loaded: ${rawDocuments.length} sections`);
 556 | 
 557 |       console.log('Splitting documents into chunks...');
 558 |       const textSplitter = new RecursiveCharacterTextSplitter({
 559 |         chunkSize: 400,
 560 |         chunkOverlap: 0,
 561 |       });
 562 | 
 563 |       const splitDocuments = await textSplitter.splitDocuments(rawDocuments);
 564 |       console.log(`Documents split into ${splitDocuments.length} chunks`);
 565 | 
 566 |       console.log('Initializing Google embeddings...');
 567 |       const embeddings = new GoogleEmbeddings();
 568 | 
 569 |       console.log('Connecting to PostgreSQL vector store...');
 570 |       const vectorStore = await PGVectorStore.initialize(embeddings, {
 571 |         postgresConnectionOptions: {
 572 |           connectionString: process.env.DATABASE_URL,
 573 |         },
 574 |         tableName: process.env.PG_VECTOR_COLLECTION_NAME || 'pdf_documents',
 575 |         columns: {
 576 |           idColumnName: 'id',
 577 |           vectorColumnName: 'vector',
 578 |           contentColumnName: 'content',
 579 |           metadataColumnName: 'metadata',
 580 |         },
 581 |       });
 582 | 
 583 |       console.log('Adding documents to vector store...');
 584 |       await vectorStore.addDocuments(splitDocuments);
 585 | 
 586 |       console.log('PDF ingestion completed successfully!');
 587 |       console.log(`Total chunks processed: ${splitDocuments.length}`);
 588 |       
 589 |       await vectorStore.end();
 590 |       
 591 |     } catch (error) {
 592 |       console.error('Error during PDF ingestion:', error);
 593 |       process.exit(1);
 594 |     }
 595 |   }
 596 | }
 597 | 
 598 | async function main() {
 599 |   const pdfPath = './document.pdf';
 600 |   const loader = new PDFLoader(pdfPath);
 601 |   await loader.ingestToVectorStore();
 602 | }
 603 | 
 604 | // Run ingestion
 605 | main();
 606 | ```
 607 | 
 608 | </details>
 609 | <br/>
 610 | 
 611 | 
 612 | A classe `PDFLoader` encapsula todo processo de ingestão, desde carregamento do arquivo até armazenamento no banco vetorial. O método `load` utiliza LangChain.js PDFLoader para extração robusta de texto. `ingestToVectorStore` coordena pipeline completo de processamento.
 613 | 
 614 | ## Schema PostgreSQL Automático
 615 | 
 616 | O `PGVectorStore` cria automaticamente schema otimizado para armazenamento e busca vetorial. A tabela pdf_documents inclui:
 617 | 
 618 | - **id -** Chave primária UUID para identificação única
 619 | - **content -** Texto original do chunk extraído do PDF
 620 | - **vector -** Embeddings de 768 dimensões gerados pelo Gemini
 621 | - **metadata -** Informações estruturais como página, fonte e contexto
 622 | 
 623 | ```sql
 624 | CREATE TABLE pdf_documents (
 625 |   id UUID PRIMARY KEY,
 626 |   content TEXT,
 627 |   vector VECTOR(768),
 628 |   metadata JSONB
 629 | );
 630 | 
 631 | CREATE INDEX ON pdf_documents USING hnsw (vector vector_cosine_ops);
 632 | ```
 633 | 
 634 | O `índice HNSW` otimiza busca vetorial, oferecendo complexidade logarítmica versus busca linear tradicional.
 635 | 
 636 | ## Sistema de Busca RAG: Retrieval + Generation Inteligente
 637 | 
 638 | ### Teoria da Busca Semântica Avançada
 639 | 
 640 | O pipeline de busca semântica representa transformação fundamental na forma como sistemas computacionais encontram informação relevante. Diferentemente de busca por palavras-chave tradicional, busca semântica utiliza representações vetoriais para capturar significado conceitual.
 641 | 
 642 | O processo inicia com conversão da pergunta do usuário em embedding vetorial usando mesmo modelo utilizado durante a ingestão. Este query embedding é então comparado com todos embeddings armazenados usando métricas de similaridade matemática. O algoritmo HNSW acelera esta comparação, reduzindo complexidade de O(n) para O(log n).
 643 | 
 644 | Resultados são classificados por _score de similaridade_, onde valores menores indicam maior similaridade no espaço coseno. _Context assembly_ concatena chunks mais relevantes, criando contexto rico para geração da resposta.
 645 | 
 646 | ## Sistema de Busca RAG: Retrieval + Generation Inteligente
 647 | 
 648 | ### Teoria da Busca Semântica Avançada
 649 | 
 650 | O pipeline de busca semântica representa transformação fundamental na forma como sistemas computacionais encontram informação relevante. Diferentemente de busca por palavras-chave tradicional, busca semântica utiliza representações vetoriais para capturar significado conceitual.
 651 | 
 652 | O processo inicia com conversão da pergunta do usuário em embedding vetorial usando mesmo modelo utilizado durante ingestão. Este query embedding é então comparado com todos embeddings armazenados usando métricas de similaridade matemática. O algoritmo HNSW acelera esta comparação, reduzindo complexidade de O(n) para O(log n).
 653 | 
 654 | Resultados são classificados por score de similaridade, onde valores menores indicam maior similaridade no espaço coseno. Context assembly concatena chunks mais relevantes, criando contexto rico para geração da resposta.
 655 | 
 656 | ## Prompt Engineering Anti-Alucinação
 657 | 
 658 | O template de prompt implementa estratégias sofisticadas para _prevenir alucinações_ e garantir factualidade das respostas. Instruções explícitas enfatizam uso exclusivo do contexto fornecido. Fallback response fornece resposta padrão para casos onde informação não está disponível. Temperature baixa de 0.1 reduz criatividade e aumenta determinismo. Exemplos negativos demonstram casos onde resposta correta é "não sei".
 659 | 
 660 | Esta abordagem garante que sistema sempre reconheça limitações do conhecimento disponível, preferindo admitir ignorância a inventar informações.
 661 | 
 662 | ## Interface CLI: Experiência do Usuário Excepcional
 663 | 
 664 | ### Design Centrado no Usuário
 665 | 
 666 | A interface CLI foi projetada considerando princípios de experiência do usuário aplicados a sistemas de IA. Feedback imediato através de indicadores de progresso mantém usuários informados sobre operações em andamento. Comandos especiais como `help, status, clear e exit` oferecem controle intuitivo. Error handling graceful apresenta mensagens informativas que guiam usuários na resolução de problemas. Interface assíncrona não-bloqueante mantém responsividade mesmo durante operações computacionalmente intensivas.
 667 | 
 668 | ### Implementação da Interface Interativa
 669 | 
 670 | A implementação combina readline nativo do Node.js com lógica de comando avançada para criar experiência fluida e intuitiva.
 671 | 
 672 | <details><summary><b>src/chat.ts</b></summary>
 673 | <br/>
 674 | 
 675 | ```typescript
 676 | import { createInterface } from "readline";
 677 | import { searchPrompt, RAGSearch } from "./search";
 678 | 
 679 | // Function to print initial banner with system informations
 680 | function printBanner(): void {
 681 |   console.log('='.repeat(60));
 682 |   console.log('RAG CHAT - PDF Question and Answer System');
 683 |   console.log('Powered by Google Gemini + LangChain + pgVector');
 684 |   console.log('⚡ TypeScript + Node.js Implementation');
 685 |   console.log('='.repeat(60));
 686 |   console.log("Special commands:");
 687 |   console.log("   • 'exit, quit, exit' - Closes the program");
 688 |   console.log("   • 'help' - Shows available commands");
 689 |   console.log("   • 'clear' - Clears the screen");
 690 |   console.log("   • 'status' - Checks system status");
 691 |   console.log('='.repeat(60));
 692 | }
 693 | 
 694 | // Function to print help instructions
 695 | function printHelp(): void {
 696 |   console.log('\n AVAILABLE COMMANDS:');
 697 |   console.log('   exit, quit, exit    - Closes the program');
 698 |   console.log('   help                 - Shows available commands');
 699 |   console.log('   clear               - Clears the screen');
 700 |   console.log('   status              - Checks system status');
 701 |   console.log('   [any text]         - Asks a question about the PDF');
 702 |   console.log('\n TIPS FOR USE:');
 703 |   console.log('   • Ask specific questions about the PDF content');
 704 |   console.log('   • The system responds only based on the document');
 705 |   console.log('   • Out-of-context questions return "I don\'t have information"');
 706 |   console.log();
 707 | }
 708 | 
 709 | // Function to clear the console screen
 710 | function clearScreen(): void {
 711 |   console.clear();
 712 | }
 713 | 
 714 | async function checkStatus(searchSystem: RAGSearch | null): Promise<void> {
 715 |   console.log('\n RAG SYSTEM STATUS:');
 716 |   console.log('='.repeat(40));
 717 |   
 718 |   if (!searchSystem) {
 719 |     console.log('System: NOT INITIALIZED');
 720 |     console.log('\n TROUBLESHOOTING CHECKLIST:');
 721 |     console.log('   1. Is PostgreSQL running?');
 722 |     console.log('      → Command: docker compose up -d');
 723 |     console.log('   2. Has ingestion been executed?'); 
 724 |     console.log('      → Command: npm run ingest');
 725 |     console.log('   3. Is the API Key configured?');
 726 |     console.log('      → File: .env (GOOGLE_API_KEY)');
 727 |     console.log('   4. Are dependencies installed?');
 728 |     console.log('      → Command: npm install');
 729 |     return;
 730 |   }
 731 | 
 732 |   try {
 733 |     const systemStatus = await searchSystem.getSystemStatus();
 734 | 
 735 |     console.log('RAG System: OPERATIONAL');
 736 |     console.log('PostgreSQL Connection: OK');
 737 |     console.log('pgVector Extension: OK'); 
 738 |     console.log('Google Gemini API: OK');
 739 |     console.log(`Vector Database: ${systemStatus.isReady ? 'READY' : 'NOT READY'}`);
 740 | 
 741 |     if (systemStatus.chunksCount > 0) {
 742 |       console.log(`Available chunks: ${systemStatus.chunksCount}`);
 743 |     }
 744 | 
 745 |     console.log('\n System ready to answer questions!');
 746 |   } catch (error) {
 747 |     console.log('Status: PARTIALLY OPERATIONAL');
 748 |     console.log(`Error checking system status: ${error}`);
 749 |   }
 750 | 
 751 |   console.log('='.repeat(40));
 752 | }
 753 | 
 754 | // Main function to initialize RAG system and handle user input
 755 | async function main(): Promise<void> {
 756 |   console.log('STEP 6: Initializing the RAG Chat CLI Interface');
 757 | 
 758 |   printBanner();
 759 | 
 760 |   console.log('\n PHASE 1: INITIALIZING RAG SYSTEM');
 761 |   const searchSystem = await searchPrompt();
 762 | 
 763 |   if (!searchSystem) {
 764 |     console.log('\n CRITICAL ERROR: RAG system could not be initialized!');
 765 |     console.log('\n POSSIBLE CAUSES AND SOLUTIONS:');
 766 |     console.log('   1. PostgreSQL is not running');
 767 |     console.log('      → Solution: docker compose up -d');
 768 |     console.log('   2. Ingestion process has not been executed');
 769 |     console.log('      → Solution: npm run ingest');
 770 |     console.log('   3. GOOGLE_API_KEY is not configured or invalid');
 771 |     console.log('      → Solution: Configure in the .env file');
 772 |     console.log('   4. Node.js dependencies are not installed');
 773 |     console.log('      → Solution: npm install');
 774 |     console.log('   5. pgVector extension has not been created');
 775 |     console.log('      → Solution: Check Docker logs');
 776 | 
 777 |     process.exit(1);
 778 |   }
 779 | 
 780 |   console.log('PHASE 1: RAG system initialized successfully!\n');
 781 | 
 782 |   // PHASE 2: SETUP COMMAND LINE INTERFACE
 783 |   const rl = createInterface({
 784 |     input: process.stdin,
 785 |     output: process.stdout,
 786 |     prompt: '\n Make a question: '
 787 |   });
 788 | 
 789 |   // Helper function to capture user input asynchronously
 790 |   const askQuestion = (prompt: string): Promise<string> => {
 791 |     return new Promise((resolve) => {
 792 |       rl.question(prompt, resolve);
 793 |     });
 794 |   };
 795 | 
 796 |   console.log('System ready! Type your question or “help” to see commands.');
 797 | 
 798 |   // PHASE 3: MAIN CHAT LOOP
 799 |   while(true) {
 800 |     try {
 801 |       // Capture user input
 802 |       const userInput = (await askQuestion('\n Make a question: ')).trim();
 803 | 
 804 |       // PROCESSING COMMAND: Analyze whether it is a special command or a question
 805 |       const command = userInput.toLowerCase();
 806 | 
 807 |       // Output commands
 808 |       if (['exit', 'quit', 'sair', 'q'].includes(command)) {
 809 |         console.log('\n Thank you for using RAG Chat. Goodbye!\n');
 810 |         console.log('System shutting down...');
 811 |         break;
 812 |       }
 813 | 
 814 |       // Help command
 815 |       if (['ajuda', 'help', 'h', '?'].includes(command)) {
 816 |         printHelp();
 817 |         continue;
 818 |       }
 819 | 
 820 |       // Clear screen command
 821 |       if (['limpar', 'clear', 'cls'].includes(command)) {
 822 |         clearScreen();
 823 |         printBanner();
 824 |         continue;
 825 |       }
 826 | 
 827 |       // Status command
 828 |       if (['status', 'info', 's'].includes(command)) {
 829 |         await checkStatus(searchSystem);
 830 |         continue;
 831 |       }
 832 | 
 833 |       // Validate empty input
 834 |       if (!userInput) {
 835 |         console.log('Empty input. Type a question or “help” to see commands.');
 836 |         continue;
 837 |       }
 838 | 
 839 |       // PROCESSING QUESTION: Forward the question to the RAG system
 840 |       console.log('\n Processing your question...');
 841 |       console.log('Searching PDF knowledge...');
 842 | 
 843 |       const startTime = Date.now();
 844 | 
 845 |       // Call the complete RAG pipeline
 846 |       const answer = await searchSystem.generateAnswer(userInput);
 847 | 
 848 |       const endTime = Date.now();
 849 |       const responseTime = ((endTime - startTime) / 1000).toFixed(2);
 850 | 
 851 |       // FORMATTED DISPLAY OF THE RESPONSE
 852 |       console.log('\n' + '='.repeat(80));
 853 |       console.log(`ASK: ${userInput}`);
 854 |       console.log('='.repeat(80));
 855 |       console.log(`🤖 RESPONSE:`);
 856 |       console.log(answer);
 857 |       console.log('='.repeat(80));
 858 |       console.log(`⚡ Response time: ${responseTime}s`);
 859 |     } catch (error) {
 860 |       // TRATAMENTO DE ERROS
 861 |       if (error instanceof Error && error.message.includes('SIGINT')) {
 862 |         // Ctrl+C foi pressionado
 863 |         console.log('\n\n Interruption detected (Ctrl+C)');
 864 |         console.log('👋 Chat closed by user. See you next time!');
 865 |         break;
 866 |       } else {
 867 |         // Outros erros
 868 |         console.log(`\n Unexpected error during processing:`);
 869 |         console.log(`   ${error}`);
 870 |         console.log('\n You can:');
 871 |         console.log('   • Try again with another question');
 872 |         console.log('   • Type "status" to check the system');
 873 |         console.log('   • Type "exit" to quit');
 874 |       }
 875 |     }
 876 |   }
 877 | 
 878 |   rl.close();
 879 | }
 880 | 
 881 | // EVENT HANDLERS: Operating system signal management
 882 | 
 883 | // Handler for Ctrl+C (SIGINT)
 884 | process.on('SIGINT', () => {
 885 |   console.log('\n\n Interrupt signal received (Ctrl+C)');
 886 |   console.log('Cleaning up resources...');
 887 |   console.log('RAG Chat closed. See you later!');
 888 |   process.exit(0);
 889 | });
 890 | 
 891 | // Handler for uncaught errors
 892 | process.on('uncaughtException', (error) => {
 893 |   console.error('\n Uncaught FATAL ERROR:', error);
 894 |   console.error('Restart the application: npm run start');
 895 |   process.exit(1);
 896 | });
 897 | 
 898 | // Handler for rejected promises
 899 | process.on('unhandledRejection', (reason, promise) => {
 900 |   console.error('\n Unhandled rejected promise:', reason);
 901 |   console.error('Promise:', promise);
 902 | });
 903 | 
 904 | // ENTRY POINT: Run the main function
 905 | main().catch((error) => {
 906 |   console.error('\n FATAL ERROR in main application:', error);
 907 |   console.error('Try restarting: npm run start');
 908 |   process.exit(1);
 909 | });
 910 | ```
 911 | 
 912 | </details>
 913 | <br/>
 914 | 
 915 | A classe `RAGSearch` encapsula funcionalidade completa de busca e geração. `searchDocuments` executa busca vetorial e retorna resultados formatados com scores. `generateAnswer` orquestra pipeline completo de RAG.
 916 | 
 917 | A função `printBanner` apresenta informações essenciais sobre sistema e comandos disponíveis. `checkStatus` oferece diagnóstico detalhado de componentes, facilitando troubleshooting. O loop principal processa comandos e perguntas com tratamento robusto de erros.
 918 | 
 919 | ## Execução e Validação Comprehensive
 920 | 
 921 | ### Sequência de Execução Otimizada
 922 | 
 923 | A execução segue sequência lógica que garante inicialização correta de todos componentes. Primeiro, inicialize infraestrutura:
 924 | 
 925 | ```bash
 926 | docker-compose up -d
 927 | ```
 928 | 
 929 | Este comando sobe PostgreSQL com pgVector. Verifique status dos containers:
 930 | 
 931 | ```bash
 932 | docker ps
 933 | ```
 934 | 
 935 | Este comando confirma operação correta. Execute ingestão para processar documentos PDF:
 936 | 
 937 | ```bash
 938 | npm run dev:ingest
 939 | ```
 940 | 
 941 | Finalmente, inicie chat interativo para interação com sistema:
 942 | 
 943 | ```bash
 944 | npm run dev:chat
 945 | ```
 946 | 
 947 | ## Cenários de Teste Abrangentes
 948 | 
 949 | O sistema suporta diversos cenários de teste que validam funcionalidade completa. Perguntas dentro do contexto do PDF devem retornar respostas baseadas exclusivamente no conteúdo processado. Perguntas fora do contexto devem resultar na resposta padrão "Não tenho informações necessárias para responder sua pergunta." Comandos especiais como status, help e clear devem funcionar corretamente.
 950 | 
 951 | ## Troubleshooting Sistemático
 952 | 
 953 | Problemas comuns possuem soluções bem definidas que podem ser identificadas através de mensagens de erro específicas:
 954 | 
 955 | - **Erro: "Google API key is not set"**: Este erro indica necessidade de configurar a variável de ambiente GOOGLE_API_KEY no arquivo .env. Verifique se o arquivo contém a chave API válida obtida no Google AI Studio.
 956 | 
 957 | - **Erro: "Vector store not initialized"**: Esta mensagem sugere que PostgreSQL não está operacional ou o processo de ingestão não foi executado. Confirme que os containers Docker estão rodando e execute a ingestão de documentos.
 958 | 
 959 | - **Erro: "No documents found"**: Este problema indica que o processo de ingestão precisa ser executado para popular o banco vetorial com chunks do PDF processado.
 960 | 
 961 | - **Erro: "Connection refused"**: Esta falha aponta para PostgreSQL offline, resolvível verificando status dos containers Docker e reinicializando a infraestrutura se necessário.
 962 | 
 963 | ## Considerações de Produção Avançadas
 964 | 
 965 | ### Performance e Escalabilidade Otimizada
 966 | 
 967 | As otimizações implementadas garantem performance adequada para uso produtivo. Batch processing durante ingestão implementa rate limiting para APIs externas, evitando throttling. Connection pooling no PostgreSQL permite múltiplas conexões simultâneas. HNSW indexing oferece busca sub-segundo mesmo com milhões de vetores. Operações assíncronas mantêm responsividade da aplicação.
 968 | 
 969 | Métricas de performance demonstram eficiência do sistema. Ingestão processa PDF de 50 páginas em aproximadamente 30 segundos. Busca retorna resultados em 2-3 segundos por pergunta. Throughput suporta mais de 100 perguntas por minuto em hardware modesto.
 970 | 
 971 | ## Segurança e Confiabilidade Robusta
 972 | 
 973 | Implementações de segurança seguem best practices para aplicações produtivas. Environment variables isolam secrets do código fonte. Input validation e sanitization previnem ataques de injeção. Error handling robusto previne vazamento de informações sensíveis. Graceful shutdown handling garante limpeza adequada de recursos.
 974 | 
 975 | Monitoramento recomendado inclui logs estruturados usando bibliotecas como `Winston` ou `Pino`. Métricas de performance podem ser coletadas com `Prometheus`. Health checks automáticos monitoram disponibilidade de componentes. Rate limiting por usuário previne abuso de recursos. Fica a dica para futuras melhorias.
 976 | 
 977 | ## Roadmap de Melhorias Futuras
 978 | 
 979 | O roadmap técnico identifica oportunidades de evolução. Migração da CLI para API REST facilitará integração com aplicações web. Interface `React` ou `Next.js` oferecerá experiência visual moderna. Suporte multi-tenancy permitirá múltiplos usuários e documentos. `Cache Redis` para respostas frequentes reduzirá latência. Integração `OpenTelemetry` proporcionará observabilidade completa.
 980 | 
 981 | ## Referências e Recursos para Aprofundamento
 982 | 
 983 | ### Documentação e Repositório do Projeto
 984 | 
 985 | O código completo deste sistema RAG está disponível no repositório oficial **[rag-search-ingestion-langchainjs-gemini](https://github.com/glaucia86/rag-search-ingestion-langchainjs-gemini)**, onde você encontrará implementação funcional, instruções detalhadas de instalação, exemplos de uso, e documentação completa de todos os componentes desenvolvidos. O repositório inclui arquivos de configuração Docker prontos para produção, scripts de automação para desenvolvimento, e casos de teste específicos que demonstram a aplicação prática dos conceitos apresentados neste artigo.
 986 | 
 987 | ### Fundamentos Teóricos de RAG
 988 | 
 989 | Para compreensão aprofundada dos fundamentos teóricos, o paper original "**[Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks" por Lewis et al](https://dl.acm.org/doi/abs/10.5555/3495724.3496517)**. na conferência NeurIPS 2020 estabelece os princípios fundamentais da arquitetura RAG. A pesquisa "**[Dense Passage Retrieval for Open-Domain Question Answering" por Karpukhin et al](https://arxiv.org/abs/2004.04906)**. explora técnicas avançadas de recuperação densa que fundamentam sistemas de busca semântica modernos. O trabalho "**[In-Context Retrieval-Augmented Language Models](https://arxiv.org/abs/2302.00083)**" apresenta evoluções recentes na integração de contexto dinânico em modelos de linguagem.
 990 | 
 991 | ### Tecnologias e Frameworks
 992 | 
 993 | A documentação oficial do LangChain.js em **[https://js.langchain.com/](https://js.langchain.com/)** oferece guias completos sobre implementação de pipelines de IA, incluindo tutoriais específicos sobre integração com diferentes provedores de embeddings e modelos de linguagem. O Google AI Developer Documentation em **[https://ai.google.dev/docs](https://ai.google.dev/docs)** fornece especificações técnicas detalhadas sobre APIs Gemini, incluindo rate limits, melhores práticas de prompt engineering, e otimizações de performance.
 994 | Para PostgreSQL e pgVector, a documentação oficial em **[https://github.com/pgvector/pgvector](https://github.com/pgvector/pgvector)** contém especificações técnicas sobre implementação de índices HNSW, configurações de performance, e estratégias de escalonamento para grandes volumes de dados vetoriais. O PostgreSQL Documentation em **[https://www.postgresql.org/docs/](https://www.postgresql.org/docs/)** oferece fundamentos sobre administração de banco de dados, otimização de queries, e configurações avançadas para aplicações de alta performance.
 995 | 
 996 | ### Embedding Models e Busca Vetorial
 997 | 
 998 | A compreensão profunda de embeddings pode ser expandida através da pesquisa "**[Attention Is All You Need](https://arxiv.org/abs/1706.03762)**" que introduz arquitetura Transformer fundamental para modelos de embedding modernos. O paper "**[Efficient Estimation of Word Representations in Vector Space" por Mikolov et al](https://arxiv.org/abs/1301.3781)**. estabelece fundamentos matemáticos de representações vetoriais semânticas. Para algoritmos de busca vetorial, "**[Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs](https://arxiv.org/abs/1603.09320)**" detalha implementação e otimizações do algoritmo HNSW utilizado pelo pgVector.
 999 | 
1000 | ### Prompt Engineering e Controle de Alucinações
1001 | 
1002 | A pesquisa "**[Constitutional AI: Harmlessness from AI Feedback](https://arxiv.org/abs/2212.08073)**" explora técnicas avançadas para controle de comportamento em modelos de linguagem. "**[Chain-of-Thought Prompting Elicits Reasoning in Large Language Models](https://arxiv.org/abs/2201.11903)**" demonstra estratégias de estruturação de prompts para raciocínio complexo. "**[Instruction Following with Large Language Models](https://arxiv.org/abs/2506.13734)**" oferece insights sobre design de instruções eficazes para sistemas RAG.
1003 | 
1004 | ### Recursos Práticos e Tutoriais
1005 | 
1006 | LangChain Cookbook em **[https://github.com/langchain-ai/langchain/tree/master/cookbook](https://github.com/langchain-ai/langchain/tree/master/cookbook)** contém exemplos práticos de implementação de diferentes padrões RAG. Pinecone Learning Center em **[https://www.pinecone.io/learn/](https://www.pinecone.io/learn/)** oferece tutoriais sobre bancos de dados vetoriais e aplicações de busca semântica. Weaviate Documentation em **[https://weaviate.io/developers/weaviate/](https://weaviate.io/developers/weaviate/)** apresenta alternativas para armazenamento vetorial e suas especificidades técnicas.
1007 | 
1008 | ## Autora  e Contribuições
1009 | 
1010 | Este projeto foi desenvolvido por Glaucia Lemos, A.I Developer Specialist, que compartilha conhecimento através de múltiplas plataformas. Seus perfis nas redes sociais incluem Twitter em **[https://twitter.com/glaucia86](https://twitter.com/glaucia86)** para atualizações técnicas e insights sobre desenvolvimento, LinkedIn em **[https://www.linkedin.com/in/glaucialemos/](https://www.linkedin.com/in/glaucialemos/)** para networking profissional e artigos técnicos, e YouTube em **[https://www.youtube.com/@GlauciaLemos](https://www.youtube.com/@GlauciaLemos)** para tutoriais em vídeo e palestras técnicas sobre desenvolvimento moderno.
1011 | 
1012 | 


--------------------------------------------------------------------------------