├── python ├── src │ └── reag │ │ ├── __init__.py │ │ ├── schema.py │ │ ├── prompt.py │ │ └── client.py ├── .env.example ├── .gitignore ├── pyproject.toml ├── README.md └── tests │ └── test_client.py ├── typescript ├── env.example ├── env.d.ts ├── .npmignore ├── jest.config.ts ├── tsconfig.json ├── src │ ├── schema.ts │ ├── prompt.ts │ └── client.ts ├── .gitignore ├── package.json ├── README.md └── tests │ └── client.test.ts ├── LICENSE.md ├── CONTRIBUTING.md └── README.md /python/src/reag/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | -------------------------------------------------------------------------------- /typescript/env.example: -------------------------------------------------------------------------------- 1 | # openai 2 | OPENAI_API_KEY= -------------------------------------------------------------------------------- /typescript/env.d.ts: -------------------------------------------------------------------------------- 1 | declare global { 2 | namespace NodeJS { 3 | interface ProcessEnv { 4 | OPENAI_API_KEY: string; 5 | } 6 | } 7 | } 8 | 9 | export {}; 10 | -------------------------------------------------------------------------------- /typescript/.npmignore: -------------------------------------------------------------------------------- 1 | src/ 2 | tests/ 3 | .github/ 4 | .git/ 5 | .gitignore 6 | .npmignore 7 | tsconfig.json 8 | jest.config.js 9 | *.test.ts 10 | *.spec.ts 11 | coverage/ 12 | .env* -------------------------------------------------------------------------------- /python/src/reag/schema.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | from typing import List 3 | 4 | 5 | class ResponseSchema(BaseModel): 6 | content: str 7 | reasoning: str 8 | is_irrelevant: bool 9 | 10 | 11 | class ResponseSchemaMessage(BaseModel): 12 | source: ResponseSchema 13 | -------------------------------------------------------------------------------- /typescript/jest.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from "jest"; 2 | import "dotenv/config"; 3 | 4 | const config: Config = { 5 | preset: "ts-jest", 6 | testEnvironment: "node", 7 | testMatch: ["**/tests/**/*.test.ts"], 8 | verbose: true, 9 | clearMocks: true, 10 | resetMocks: true, 11 | }; 12 | 13 | export default config; 14 | -------------------------------------------------------------------------------- /typescript/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2018", 4 | "module": "commonjs", 5 | "declaration": true, 6 | "outDir": "./dist", 7 | "strict": true, 8 | "esModuleInterop": true, 9 | "skipLibCheck": true, 10 | "forceConsistentCasingInFileNames": true, 11 | "rootDir": "./src" 12 | }, 13 | "include": ["src/**/*"], 14 | "exclude": ["node_modules", "dist", "tests"] 15 | } -------------------------------------------------------------------------------- /typescript/src/schema.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | 3 | export const RESPONSE_SCHEMA = z.object({ 4 | content: z 5 | .string() 6 | .describe("The source of the information, relevant passage"), 7 | reasoning: z 8 | .string() 9 | .describe("The reasoning behind why the source is relevant"), 10 | isIrrelevant: z 11 | .boolean() 12 | .describe("Whether the source is relevant to the question"), 13 | }); 14 | -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Virtual Environment 24 | venv/ 25 | env/ 26 | ENV/ 27 | .env 28 | .venv/ 29 | 30 | # IDE 31 | .idea/ 32 | .vscode/ 33 | *.swp 34 | *.swo 35 | .DS_Store 36 | 37 | # Testing 38 | .coverage 39 | htmlcov/ 40 | .pytest_cache/ 41 | .tox/ 42 | 43 | # Poetry 44 | poetry.lock 45 | -------------------------------------------------------------------------------- /typescript/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules/ 3 | package-lock.json 4 | yarn.lock 5 | 6 | # Build output 7 | dist/ 8 | build/ 9 | *.tsbuildinfo 10 | 11 | # Environment variables 12 | .env 13 | .env.local 14 | .env.*.local 15 | 16 | # IDE and editor files 17 | .idea/ 18 | .vscode/ 19 | *.swp 20 | *.swo 21 | .DS_Store 22 | 23 | # Test coverage 24 | coverage/ 25 | 26 | # Logs 27 | logs/ 28 | *.log 29 | npm-debug.log* 30 | yarn-debug.log* 31 | yarn-error.log* 32 | 33 | # Optional npm cache directory 34 | .npm 35 | 36 | # Optional eslint cache 37 | .eslintcache 38 | 39 | # Optional REPL history 40 | .node_repl_history 41 | 42 | # Output of 'npm pack' 43 | *.tgz 44 | -------------------------------------------------------------------------------- /python/src/reag/prompt.py: -------------------------------------------------------------------------------- 1 | REAG_SYSTEM_PROMPT = """ 2 | # Role and Objective 3 | You are an intelligent knowledge retrieval assistant. Your task is to analyze provided documents or URLs to extract the most relevant information for user queries. 4 | 5 | # Instructions 6 | 1. Analyze the user's query carefully to identify key concepts and requirements. 7 | 2. Search through the provided sources for relevant information and output the relevant parts in the 'content' field. 8 | 3. If you cannot find the necessary information in the documents, return 'isIrrelevant: true', otherwise return 'isIrrelevant: false'. 9 | 10 | # Constraints 11 | - Do not make assumptions beyond available data 12 | - Clearly indicate if relevant information is not found 13 | - Maintain objectivity in source selection 14 | """ 15 | -------------------------------------------------------------------------------- /typescript/src/prompt.ts: -------------------------------------------------------------------------------- 1 | export const REAG_SYSTEM_PROMPT = ` 2 | # Role and Objective 3 | You are an intelligent knowledge retrieval assistant. Your task is to analyze provided documents or URLs to extract the most relevant information for user queries. 4 | 5 | # Instructions 6 | 1. Analyze the user's query carefully to identify key concepts and requirements. 7 | 2. Search through the provided sources for relevant information and output the relevant parts in the 'content' field. 8 | 3. If you cannot find the necessary information in the documents, return 'isIrrelevant: true', otherwise return 'isIrrelevant: false'. 9 | 10 | # Constraints 11 | - Do not make assumptions beyond available data 12 | - Clearly indicate if relevant information is not found 13 | - Maintain objectivity in source selection 14 | `; 15 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Superagent Technologies Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /typescript/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@superagent-ai/reag", 3 | "version": "0.0.3", 4 | "description": "Reasoning Augmented Generation", 5 | "main": "dist/client.js", 6 | "types": "dist/client.d.ts", 7 | "files": [ 8 | "dist" 9 | ], 10 | "scripts": { 11 | "build": "tsc", 12 | "prepublishOnly": "npm run build", 13 | "clean": "rm -rf dist", 14 | "test": "jest" 15 | }, 16 | "repository": { 17 | "type": "git", 18 | "url": "git+https://github.com/superagent-ai/reag" 19 | }, 20 | "keywords": ["ai", "document-analysis", "typescript", "rag", "reasoning"], 21 | "author": "Ismail Pelaseyed", 22 | "license": "MIT", 23 | "bugs": { 24 | "url": "https://github.com/superagent-ai/reag/issues" 25 | }, 26 | "homepage": "https://github.com/superagent-ai/reag#readme", 27 | "devDependencies": { 28 | "@types/jest": "^29.5.14", 29 | "@types/node": "^18.16.3", 30 | "jest": "^29.7.0", 31 | "ts-jest": "^29.2.5", 32 | "ts-node": "^10.9.2", 33 | "typescript": "^4.9.5" 34 | }, 35 | "peerDependencies": { 36 | "@ai-sdk/openai": "^1.1.9", 37 | "ai": "^4.1.16", 38 | "dotenv": "^16.4.7", 39 | "zod": "^3.24.1" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to ReAG 2 | 3 | Thank you for your interest in contributing to ReAG! Your contributions help make this project better for everyone. Please take a moment to review these guidelines to ensure a smooth and collaborative experience. 4 | 5 | ## How to Contribute 6 | 7 | ### Reporting Issues 8 | - Use GitHub Issues to report bugs or request new features. 9 | - Search existing issues to avoid duplicates before filing a new one. 10 | - Provide clear and concise details: 11 | - Steps to reproduce the issue 12 | - Expected behavior versus the observed behavior 13 | - Information about your environment (OS, version, etc.) 14 | 15 | ### Submitting Pull Requests 16 | - Fork the repository and create a feature branch off of the main branch. 17 | - Clearly describe your changes and link any related issues in your pull request. 18 | - Ensure your code follows the project’s coding standards and style guidelines. 19 | - Include tests if applicable to verify your changes. 20 | - Keep pull requests focused on a single purpose or issue. 21 | 22 | ## Code Style Guidelines 23 | - Follow the project’s existing code style and best practices. 24 | - Write clear, self-explanatory commit messages. 25 | - Document your code where necessary and update related documentation if needed. 26 | 27 | ## Communication 28 | - Engage with maintainers and fellow contributors via GitHub Discussions or chat channels. 29 | - For larger changes, please open an issue first for discussion before submitting a pull request. 30 | 31 | ## Licensing 32 | - By contributing, you agree that your contributions will be licensed under the project's license. 33 | 34 | Thank you again for helping to improve ReAG. We look forward to your contributions! -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "reag" 3 | version = "0.0.6" 4 | description = "ReAG SDK - Reasoning Augmented Generation framework for Python" 5 | authors = ["Ismail Pelaseyed "] 6 | readme = "README.md" 7 | packages = [{include = "reag", from = "src"}] 8 | license = "MIT" 9 | homepage = "https://github.com/superagent-ai/reag" 10 | repository = "https://github.com/superagent-ai/reag" 11 | documentation = "https://github.com/superagent-ai/reag#readme" 12 | keywords = ["llm", "ai", "reasoning", "generation", "nlp"] 13 | classifiers = [ 14 | "Development Status :: 3 - Alpha", 15 | "Intended Audience :: Developers", 16 | "License :: OSI Approved :: MIT License", 17 | "Operating System :: OS Independent", 18 | "Programming Language :: Python :: 3", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | "Topic :: Software Development :: Libraries :: Python Modules", 24 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 25 | ] 26 | 27 | [tool.poetry.dependencies] 28 | python = ">=3.9,<3.13" 29 | pydantic = "^2.0.0" 30 | httpx = "^0.25.0" 31 | litellm = "^1.60.0" 32 | 33 | [tool.poetry.group.dev.dependencies] 34 | pytest = "^7.4.0" 35 | pytest-asyncio = "^0.21.0" 36 | black = "^23.7.0" 37 | isort = "^5.12.0" 38 | mypy = "^1.5.0" 39 | 40 | [build-system] 41 | requires = ["poetry-core"] 42 | build-backend = "poetry.core.masonry.api" 43 | 44 | [tool.black] 45 | line-length = 88 46 | 47 | [tool.isort] 48 | profile = "black" 49 | multi_line_output = 3 50 | 51 | [tool.pytest.ini_options] 52 | pythonpath = [ 53 | "src" 54 | ] 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🎓 ReAG - Reasoning Augmented Generation 2 | 3 | ## Introduction 4 | 5 | Traditional Retrieval-Augmented Generation (RAG) systems rely on a two-step process: first, semantic search retrieves documents based on surface-level similarities; then, a language model generates answers from those documents. While this method works, it often misses deeper contextual insights and can pull in irrelevant information. ReAG – Reasoning Augmented Generation – offers a robust alternative by feeding raw documents directly to the language model, allowing it to assess and integrate the full context. This unified approach leads to more accurate, nuanced, and context-aware responses. 6 | 7 | ## How ReAG Works 8 | 9 | ReAG transforms document querying with a streamlined process: 10 | 11 | - Raw Document Ingestion: Documents are processed in full, without prior chunking or indexing. 12 | - Holistic Evaluation: The language model reads entire texts to determine their relevance and extract key information. 13 | - Dynamic Synthesis: Relevant details are combined into comprehensive answers, mirroring human research methods. 14 | 15 | This method eliminates the pitfalls of over-simplified semantic matches and delivers insights that truly address the query's intent. 16 | 17 | ## Table of Contents 18 | 19 | - [Features](#features) 20 | - [Installation](#installation) 21 | - [Python](https://github.com/superagent-ai/reag/tree/main/python) 22 | - [Typescript](https://github.com/superagent-ai/reag/tree/main/typescript) 23 | - [Contributing](#contributing) 24 | - [License](#license) 25 | - [Additional Resources](#additional-resources) 26 | - [Contact](#contact) 27 | 28 | ## Features 29 | - **Multi-language Support:** Available for both Python and Typescript. 30 | - **Document Ingestion:** Ingest markdown formatted documents with associated metadata. 31 | - **Intelligent Querying:** Retrieve sources and insights based on contextual queries. 32 | - **Language Model Agnostic:** Works with any preferred language model. 33 | - **Production Ready:** Robust, scalable, and designed for real-world applications. 34 | 35 | 36 | ## Contributing 37 | 38 | We welcome contributions from the community. Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for guidelines on reporting issues, suggesting improvements, and submitting pull requests. 39 | 40 | ## License 41 | 42 | This project is licensed under the [MIT License](LICENSE). 43 | 44 | ## Additional Resources 45 | - [ReAG Blog Post](https://www.superagent.sh/blog/reag-reasoning-augmented-generation) - A deep dive into ReAG. 46 | 47 | ## Contact 48 | 49 | For support or inquiries, please contact: 50 | - [Create Issue](https://github.com/superagent-ai/reag/issues) 51 | - X: [@superagent_ai](https://x.com/superagent_ai) 52 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # 🎓 ReAG Python SDK 2 | 3 | ## Installation 4 | 1. Ensure Python 3.9+ is installed. 5 | 2. Install using pip or poetry: 6 | ```bash 7 | pip install reag 8 | # or 9 | poetry add reag 10 | ``` 11 | 12 | ## Quick Start 13 | ```python 14 | from reag.client import ReagClient, Document 15 | 16 | async with ReagClient( 17 | model="ollama/deepseek-r1:7b", 18 | model_kwargs={"api_base": "http://localhost:11434"} 19 | ) as client: 20 | docs = [ 21 | Document( 22 | name="Superagent", 23 | content="Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 24 | metadata={ 25 | "url": "https://superagent.sh", 26 | "source": "web", 27 | }, 28 | ), 29 | ] 30 | response = await client.query("What is Superagent?", documents=docs) 31 | 32 | ``` 33 | 34 | ## API Reference 35 | 36 | ### Initialization 37 | Initialize the client by providing required configuration options: 38 | 39 | ```typescript 40 | client = new ReagClient( 41 | model: "gpt-4o-mini", // LiteLLM model name 42 | system: Optional[str] // Optional system prompt 43 | batchSize: Optional[Number] // Optional batch size 44 | schema: Optional[BaseModel] // Optional Pydantic schema 45 | ); 46 | ``` 47 | 48 | ### Document Structure 49 | Documents should follow this structure: 50 | ```python 51 | document = Document( 52 | name="Superagent", 53 | content="Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 54 | metadata={ 55 | "url": "https://superagent.sh", 56 | "source": "web", 57 | }, 58 | ) 59 | ``` 60 | 61 | ### Querying 62 | Query documents with optional filters: 63 | 64 | ```python 65 | docs = [ 66 | Document( 67 | name="Superagent", 68 | content="Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 69 | metadata={ 70 | "url": "https://superagent.sh", 71 | "source": "web", 72 | "id": "sa-1", 73 | }, 74 | ), 75 | Document( 76 | name="Superagent", 77 | content="Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 78 | metadata={ 79 | "url": "https://superagent.sh", 80 | "source": "web", 81 | "id": "sa-2", 82 | }, 83 | ), 84 | ] 85 | options = {"filter": [{"key": "id", "value": "sa-1", "operator": "equals"}]} 86 | response = await client.query( 87 | "What is Superagent?", documents=docs, options=options 88 | ) 89 | ``` 90 | 91 | Response structure: 92 | ```python 93 | content: str 94 | reasoning: str 95 | is_irrelevant: bool 96 | document: Document 97 | ``` 98 | 99 | Example filters: 100 | - Filter by metadata field: 101 | ```python 102 | options = {"filter": [{"key": "id", "value": "sa-1", "operator": "equals"}]} 103 | ``` 104 | - Filter by numeric values: 105 | ```python 106 | options = { 107 | "filter": [{"key": "version", "value": 2, "operator": "greaterThanOrEqual"}] 108 | } 109 | ``` 110 | 111 | ## Contributing 112 | 113 | We welcome contributions from the community. Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for guidelines on reporting issues, suggesting improvements, and submitting pull requests. 114 | 115 | ## License 116 | 117 | This project is licensed under the [MIT License](LICENSE). 118 | 119 | ## Additional Resources 120 | - [ReAG Blog Post](https://www.superagent.sh/blog/reag-reasoning-augmented-generation) - A deep dive into ReAG. 121 | 122 | ## Contact 123 | 124 | For support or inquiries, please contact: 125 | - [Create Issue](https://github.com/superagent-ai/reag/issues) 126 | - X: [@superagent_ai](https://x.com/superagent_ai) 127 | -------------------------------------------------------------------------------- /typescript/README.md: -------------------------------------------------------------------------------- 1 | # 🎓 ReAG TypeScript SDK 2 | 3 | ## Installation 4 | 1. Ensure Node.js (14+) is installed. 5 | 2. Install using npm: 6 | ```bash 7 | npm install @superagent-ai/reag 8 | npm install @ai-sdk/openai 9 | ``` 10 | 3. Or using Yarn: 11 | ```bash 12 | yarn add @superagent-ai/reag 13 | yarn add @ai-sdk/openai 14 | ``` 15 | 16 | ## Quick Start 17 | ```typescript 18 | import { ReagClient, ClientOptions } from '@superagent-ai/reag'; 19 | import { openai } from "@ai-sdk/openai"; 20 | 21 | // Initialize the SDK with required options 22 | const client = new ReagClient({ 23 | model: openai("o3-mini", { structuredOutputs: true }), 24 | // system: optional system prompt here or use the default 25 | }); 26 | 27 | // Example document with metadata 28 | const document = { 29 | name: "Getting Started", 30 | content: "ReAG SDK is a framework for Reasoning Augmented Generation...", 31 | metadata: { 32 | url: "https://docs.example.com/getting-started", 33 | source: "documentation", 34 | id: "doc-1" 35 | } 36 | }; 37 | 38 | // Query with document context and filters 39 | const response = await client.query( 40 | "Describe the main features of the SDK", 41 | [document], 42 | { 43 | filter: [ 44 | { 45 | key: "source", 46 | value: "documentation", 47 | operator: "equals" 48 | } 49 | ] 50 | } 51 | ); 52 | 53 | // Response includes: content, reasoning, isIrrelevant, and document reference 54 | console.log('Query Response:', response); 55 | ``` 56 | 57 | ## API Reference 58 | 59 | ### Initialization 60 | Initialize the client by providing required configuration options: 61 | 62 | ```typescript 63 | const client = new ReagClient({ 64 | model: openai("o3-mini", { structuredOutputs: true }), 65 | system?: string // Optional system prompt 66 | batchSize?: number // Optional batch size 67 | schema?: z.ZodSchema // Optional schema 68 | }); 69 | ``` 70 | 71 | ### Document Structure 72 | Documents should follow this structure: 73 | ```typescript 74 | interface Document { 75 | name: string; 76 | content: string; 77 | metadata: { 78 | [key: string]: any; // Custom metadata fields 79 | } 80 | } 81 | ``` 82 | 83 | ### Querying 84 | Query documents with optional filters: 85 | 86 | ```typescript 87 | const response = await client.query( 88 | query: string, 89 | documents: Document[], 90 | options?: { 91 | filter?: Array<{ 92 | key: string; 93 | value: string | number; 94 | operator: "equals" | "greaterThanOrEqual" // and other operators 95 | }> 96 | } 97 | ); 98 | ``` 99 | 100 | Response structure: 101 | ```typescript 102 | interface QueryResponse { 103 | content: string; // Generated response 104 | reasoning: string; // Reasoning behind the response 105 | isIrrelevant: boolean; // Relevance indicator 106 | document: Document; // Reference to source document 107 | } 108 | ``` 109 | 110 | Example filters: 111 | - Filter by metadata field: 112 | ```typescript 113 | { 114 | filter: [ 115 | { 116 | key: "source", 117 | value: "documentation", 118 | operator: "equals" 119 | } 120 | ] 121 | } 122 | ``` 123 | - Filter by numeric values: 124 | ```typescript 125 | { 126 | filter: [ 127 | { 128 | key: "version", 129 | value: 2, 130 | operator: "greaterThanOrEqual" 131 | } 132 | ] 133 | } 134 | ``` 135 | 136 | ## Contributing 137 | 138 | We welcome contributions from the community. Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for guidelines on reporting issues, suggesting improvements, and submitting pull requests. 139 | 140 | ## License 141 | 142 | This project is licensed under the [MIT License](LICENSE). 143 | 144 | ## Additional Resources 145 | - [ReAG Blog Post](https://www.superagent.sh/blog/reag-reasoning-augmented-generation) - A deep dive into ReAG. 146 | 147 | ## Contact 148 | 149 | For support or inquiries, please contact: 150 | - [Create Issue](https://github.com/superagent-ai/reag/issues) 151 | - X: [@superagent_ai](https://x.com/superagent_ai) 152 | -------------------------------------------------------------------------------- /python/tests/test_client.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from reag.client import ReagClient, Document, QueryResult 3 | 4 | 5 | @pytest.mark.asyncio 6 | async def test_query_with_documents(): 7 | async with ReagClient() as client: 8 | docs = [ 9 | Document( 10 | name="Superagent", 11 | content="Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 12 | metadata={ 13 | "url": "https://superagent.sh", 14 | "source": "web", 15 | }, 16 | ), 17 | ] 18 | response = await client.query("What is Superagent?", documents=docs) 19 | assert response is not None 20 | assert len(response) == 1 21 | result = response[0] 22 | assert isinstance(result, QueryResult) 23 | assert result.content is not None 24 | assert result.document is not None 25 | assert result.reasoning is not None 26 | assert isinstance(result.is_irrelevant, bool) 27 | 28 | 29 | @pytest.mark.asyncio 30 | async def test_query_with_metadata_filter(): 31 | async with ReagClient() as client: 32 | docs = [ 33 | Document( 34 | name="Superagent", 35 | content="Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 36 | metadata={ 37 | "url": "https://superagent.sh", 38 | "source": "web", 39 | "id": "sa-1", 40 | }, 41 | ), 42 | Document( 43 | name="Superagent", 44 | content="Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 45 | metadata={ 46 | "url": "https://superagent.sh", 47 | "source": "web", 48 | "id": "sa-2", 49 | }, 50 | ), 51 | ] 52 | options = {"filter": [{"key": "id", "value": "sa-1", "operator": "equals"}]} 53 | response = await client.query( 54 | "What is Superagent?", documents=docs, options=options 55 | ) 56 | assert response is not None 57 | assert len(response) == 1 58 | result = response[0] 59 | assert isinstance(result, QueryResult) 60 | assert result.document.metadata["id"] == "sa-1" 61 | 62 | 63 | @pytest.mark.asyncio 64 | async def test_query_with_integer_filter(): 65 | async with ReagClient() as client: 66 | docs = [ 67 | Document( 68 | name="Superagent", 69 | content="Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 70 | metadata={ 71 | "version": 1, 72 | "source": "web", 73 | "id": "sa-1", 74 | }, 75 | ), 76 | Document( 77 | name="Superagent", 78 | content="Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 79 | metadata={ 80 | "version": 2, 81 | "source": "web", 82 | "id": "sa-2", 83 | }, 84 | ), 85 | ] 86 | options = { 87 | "filter": [{"key": "version", "value": 2, "operator": "greaterThanOrEqual"}] 88 | } 89 | response = await client.query( 90 | "What is Superagent?", documents=docs, options=options 91 | ) 92 | assert response is not None 93 | assert len(response) == 1 94 | result = response[0] 95 | assert isinstance(result, QueryResult) 96 | assert result.document.metadata["version"] == 2 97 | 98 | 99 | @pytest.mark.asyncio 100 | async def test_query_returns_empty_for_irrelevant_docs(): 101 | async with ReagClient() as client: 102 | docs = [ 103 | Document( 104 | name="Irrelevant Doc", 105 | content="This document contains completely unrelated content about cooking recipes.", 106 | metadata={"type": "recipe", "cuisine": "italian"}, 107 | ) 108 | ] 109 | response = await client.query("What is Superagent?", documents=docs) 110 | print(response) 111 | assert response is not None 112 | assert len(response) == 0 # Should be empty since doc is irrelevant 113 | -------------------------------------------------------------------------------- /typescript/tests/client.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect, beforeEach } from "@jest/globals"; 2 | import { openai } from "@ai-sdk/openai"; 3 | 4 | import { ReagClient, ClientOptions } from "../src/client"; 5 | 6 | describe("typescript client", () => { 7 | let client: ReagClient; 8 | 9 | beforeEach(() => { 10 | // Create a fresh mock for each test with all required properties 11 | const options: ClientOptions = { 12 | model: openai("o3-mini", { structuredOutputs: true }), 13 | system: "test system", 14 | }; 15 | 16 | client = new ReagClient(options); 17 | }); 18 | 19 | describe("query", () => { 20 | it("should successfully return model response", async () => { 21 | const testPrompt = "What is Superagent?"; 22 | const result = await client.query(testPrompt, [ 23 | { 24 | name: "Superagent", 25 | content: 26 | "Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 27 | metadata: { 28 | url: "https://superagent.sh", 29 | source: "web", 30 | }, 31 | }, 32 | ]); 33 | console.log(result); 34 | 35 | expect(result).toBeInstanceOf(Array); 36 | expect(result[0]).toHaveProperty("content"); 37 | expect(result[0]).toHaveProperty("reasoning"); 38 | expect(result[0]).toHaveProperty("isIrrelevant"); 39 | }, 30_000); 40 | }); 41 | 42 | describe("filtered query", () => { 43 | it("should successfully return model response with string filters", async () => { 44 | const testPrompt = "What is Superagent?"; 45 | const result = await client.query( 46 | testPrompt, 47 | [ 48 | { 49 | name: "Superagent", 50 | content: 51 | "Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 52 | metadata: { 53 | url: "https://superagent.sh", 54 | source: "web", 55 | id: "sa-1", 56 | }, 57 | }, 58 | { 59 | name: "Superagent", 60 | content: 61 | "Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 62 | metadata: { 63 | url: "https://superagent.sh", 64 | source: "web", 65 | id: "sa-2", 66 | }, 67 | }, 68 | ], 69 | { 70 | filter: [ 71 | { 72 | key: "id", 73 | value: "sa-1", 74 | operator: "equals", 75 | }, 76 | ], 77 | } 78 | ); 79 | 80 | expect(result).toBeInstanceOf(Array); 81 | expect(result.length).toBe(1); 82 | expect(result[0]).toHaveProperty("content"); 83 | expect(result[0]).toHaveProperty("reasoning"); 84 | expect(result[0]).toHaveProperty("isIrrelevant"); 85 | expect(result[0].document.metadata?.id).toBe("sa-1"); 86 | }, 30_000); 87 | 88 | it("should successfully return model response with integer filters", async () => { 89 | const testPrompt = "What is Superagent?"; 90 | const result = await client.query( 91 | testPrompt, 92 | [ 93 | { 94 | name: "Superagent", 95 | content: 96 | "Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 97 | metadata: { 98 | version: 1, 99 | source: "web", 100 | id: "sa-1", 101 | }, 102 | }, 103 | { 104 | name: "Superagent", 105 | content: 106 | "Superagent is a workspace for AI-agents that learn, perform work, and collaborate.", 107 | metadata: { 108 | version: 2, 109 | source: "web", 110 | id: "sa-2", 111 | }, 112 | }, 113 | ], 114 | { 115 | filter: [ 116 | { 117 | key: "version", 118 | value: 2, 119 | operator: "greaterThanOrEqual", 120 | }, 121 | ], 122 | } 123 | ); 124 | 125 | expect(result).toBeInstanceOf(Array); 126 | expect(result.length).toBe(1); 127 | expect(result[0]).toHaveProperty("content"); 128 | expect(result[0]).toHaveProperty("reasoning"); 129 | expect(result[0]).toHaveProperty("isIrrelevant"); 130 | expect(result[0].document.metadata?.version).toBe(2); 131 | }, 30_000); 132 | }); 133 | }); 134 | -------------------------------------------------------------------------------- /typescript/src/client.ts: -------------------------------------------------------------------------------- 1 | import { 2 | generateText, 3 | LanguageModel, 4 | GenerateObjectResult, 5 | generateObject, 6 | } from "ai"; 7 | import { z } from "zod"; 8 | 9 | import { REAG_SYSTEM_PROMPT } from "./prompt"; 10 | import { RESPONSE_SCHEMA } from "./schema"; 11 | 12 | export interface ClientOptions { 13 | /** 14 | * The language model instance to use for queries. 15 | * This should be an instance of a model that implements the Vercel AI SDK's LanguageModel interface. 16 | * See: https://sdk.vercel.ai/docs/foundations/providers-and-models 17 | */ 18 | model: LanguageModel; 19 | /** 20 | * The system prompt that provides context and instructions to the model. 21 | * This string sets the behavior and capabilities of the model for all queries. 22 | */ 23 | system: string; 24 | /** 25 | * The size of the batch to process documents in. 26 | * This is used to limit the number of documents processed at once. 27 | */ 28 | batchSize?: number; 29 | /** 30 | * The schema to use for the response. 31 | */ 32 | schema?: z.ZodSchema; 33 | } 34 | 35 | export interface Document { 36 | name: string; 37 | content: string; 38 | metadata?: Record; 39 | } 40 | 41 | export interface QueryResult { 42 | results: { 43 | relevant: T[]; 44 | irrelevant?: boolean; 45 | }; 46 | document: Document; 47 | } 48 | 49 | export interface MetadataFilter { 50 | key: string; 51 | value: string | number; 52 | operator?: 53 | | "equals" 54 | | "contains" 55 | | "startsWith" 56 | | "endsWith" 57 | | "notEquals" 58 | | "greaterThan" 59 | | "lessThan" 60 | | "greaterThanOrEqual" 61 | | "lessThanOrEqual" 62 | | "regex"; 63 | } 64 | 65 | const DEFAULT_BATCH_SIZE = 20; 66 | 67 | /** 68 | * The Client class that wraps a language model and exposes query methods. 69 | */ 70 | export class ReagClient { 71 | private readonly model: LanguageModel; 72 | private readonly system: string; 73 | private readonly batchSize: number; 74 | private readonly schema: z.ZodSchema; 75 | 76 | /** 77 | * Constructs a new Client instance. 78 | * @param options Configuration options for the Client. 79 | */ 80 | constructor(options: ClientOptions) { 81 | this.model = options.model; 82 | this.system = options.system || REAG_SYSTEM_PROMPT; 83 | this.batchSize = options.batchSize || DEFAULT_BATCH_SIZE; 84 | this.schema = options.schema || RESPONSE_SCHEMA; 85 | } 86 | 87 | /** 88 | * Filters documents based on metadata criteria 89 | */ 90 | private filterDocumentsByMetadata( 91 | documents: Document[], 92 | filter?: MetadataFilter[] 93 | ): Document[] { 94 | if (!filter?.length) return documents; 95 | 96 | return documents.filter((doc) => { 97 | return filter.every((filter) => { 98 | const metadataValue = doc.metadata?.[filter.key]; 99 | if (!metadataValue) return false; 100 | 101 | // Handle string operations only if both values are strings 102 | if ( 103 | typeof metadataValue === "string" && 104 | typeof filter.value === "string" 105 | ) { 106 | switch (filter.operator) { 107 | case "contains": 108 | return metadataValue.includes(filter.value); 109 | case "startsWith": 110 | return metadataValue.startsWith(filter.value); 111 | case "endsWith": 112 | return metadataValue.endsWith(filter.value); 113 | case "regex": 114 | return new RegExp(filter.value).test(metadataValue); 115 | } 116 | } 117 | 118 | // Handle numeric comparisons and equality checks 119 | switch (filter.operator) { 120 | case "equals": 121 | return metadataValue === filter.value; 122 | case "notEquals": 123 | return metadataValue !== filter.value; 124 | case "greaterThan": 125 | return metadataValue > filter.value; 126 | case "lessThan": 127 | return metadataValue < filter.value; 128 | case "greaterThanOrEqual": 129 | return metadataValue >= filter.value; 130 | case "lessThanOrEqual": 131 | return metadataValue <= filter.value; 132 | default: 133 | return metadataValue === filter.value; 134 | } 135 | }); 136 | }); 137 | } 138 | 139 | /** 140 | * Executes a query on the assigned language model with document batching 141 | */ 142 | async query( 143 | prompt: string, 144 | documents: Document[], 145 | options?: { 146 | filter?: MetadataFilter[]; 147 | } 148 | ): Promise>[]> { 149 | try { 150 | const filteredDocuments = this.filterDocumentsByMetadata( 151 | documents, 152 | options?.filter 153 | ); 154 | 155 | const formatDoc = (doc: Document) => 156 | `Name: ${doc.name}\nMetadata: ${JSON.stringify( 157 | doc.metadata 158 | )}\nContent: ${doc.content}`; 159 | 160 | const batches = Array.from( 161 | { length: Math.ceil(filteredDocuments.length / this.batchSize) }, 162 | (_, i) => 163 | filteredDocuments.slice(i * this.batchSize, (i + 1) * this.batchSize) 164 | ); 165 | 166 | const batchResults = await Promise.all( 167 | batches.map(async (batch) => { 168 | // Process each document in the batch individually 169 | const batchResponses = await Promise.all( 170 | batch.map(async (document) => { 171 | const system = `${ 172 | this.system 173 | }\n\n# Available source\n\n${formatDoc(document)}`; 174 | const response = await generateObject({ 175 | model: this.model, 176 | system, 177 | prompt, 178 | schema: this.schema, 179 | }); 180 | 181 | return { 182 | response, 183 | document, 184 | }; 185 | }) 186 | ); 187 | return batchResponses; 188 | }) 189 | ); 190 | 191 | const results = batchResults.flat().map(({ response, document }) => ({ 192 | ...response.object, 193 | document, 194 | })); 195 | 196 | return results; 197 | } catch (error) { 198 | throw new Error(`Query failed: ${error}`); 199 | } 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /python/src/reag/client.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import asyncio 3 | import json 4 | import re 5 | from typing import List, Optional, TypeVar, Dict, Union 6 | from pydantic import BaseModel 7 | from litellm import acompletion 8 | 9 | from reag.prompt import REAG_SYSTEM_PROMPT 10 | from reag.schema import ResponseSchemaMessage 11 | 12 | 13 | class Document(BaseModel): 14 | name: str 15 | content: str 16 | metadata: Optional[Dict[str, Union[str, int]]] = None 17 | 18 | 19 | class MetadataFilter(BaseModel): 20 | key: str 21 | value: Union[str, int] 22 | operator: Optional[str] = None 23 | 24 | 25 | T = TypeVar("T") 26 | 27 | 28 | class QueryResult(BaseModel): 29 | content: str 30 | reasoning: str 31 | is_irrelevant: bool 32 | document: Document 33 | 34 | 35 | DEFAULT_BATCH_SIZE = 20 36 | 37 | 38 | class ReagClient: 39 | def __init__( 40 | self, 41 | model: str = "gpt-4o-mini", 42 | system: str = None, 43 | batch_size: int = DEFAULT_BATCH_SIZE, 44 | schema: Optional[BaseModel] = None, 45 | model_kwargs: Optional[Dict] = None, 46 | ): 47 | self.model = model 48 | self.system = system or REAG_SYSTEM_PROMPT 49 | self.batch_size = batch_size 50 | self.schema = schema or ResponseSchemaMessage 51 | self.model_kwargs = model_kwargs or {} 52 | self._http_client = None 53 | 54 | async def __aenter__(self): 55 | self._http_client = httpx.AsyncClient() 56 | return self 57 | 58 | async def __aexit__(self, exc_type, exc_val, exc_tb): 59 | if self._http_client: 60 | await self._http_client.aclose() 61 | 62 | def _filter_documents_by_metadata( 63 | self, documents: List[Document], filters: Optional[List[MetadataFilter]] = None 64 | ) -> List[Document]: 65 | if not filters: 66 | return documents 67 | 68 | filtered_docs = [] 69 | for doc in documents: 70 | matches_all_filters = True 71 | 72 | for filter_item in filters: 73 | metadata_value = ( 74 | doc.metadata.get(filter_item.key) if doc.metadata else None 75 | ) 76 | if metadata_value is None: 77 | matches_all_filters = False 78 | break 79 | 80 | if isinstance(metadata_value, str) and isinstance( 81 | filter_item.value, str 82 | ): 83 | if filter_item.operator == "contains": 84 | if not filter_item.value in metadata_value: 85 | matches_all_filters = False 86 | break 87 | elif filter_item.operator == "startsWith": 88 | if not metadata_value.startswith(filter_item.value): 89 | matches_all_filters = False 90 | break 91 | elif filter_item.operator == "endsWith": 92 | if not metadata_value.endswith(filter_item.value): 93 | matches_all_filters = False 94 | break 95 | elif filter_item.operator == "regex": 96 | import re 97 | 98 | if not re.match(filter_item.value, metadata_value): 99 | matches_all_filters = False 100 | break 101 | 102 | if filter_item.operator == "equals": 103 | if metadata_value != filter_item.value: 104 | matches_all_filters = False 105 | break 106 | elif filter_item.operator == "notEquals": 107 | if metadata_value == filter_item.value: 108 | matches_all_filters = False 109 | break 110 | elif filter_item.operator == "greaterThan": 111 | if not metadata_value > filter_item.value: 112 | matches_all_filters = False 113 | break 114 | elif filter_item.operator == "lessThan": 115 | if not metadata_value < filter_item.value: 116 | matches_all_filters = False 117 | break 118 | elif filter_item.operator == "greaterThanOrEqual": 119 | if not metadata_value >= filter_item.value: 120 | matches_all_filters = False 121 | break 122 | elif filter_item.operator == "lessThanOrEqual": 123 | if not metadata_value <= filter_item.value: 124 | matches_all_filters = False 125 | break 126 | 127 | if matches_all_filters: 128 | filtered_docs.append(doc) 129 | 130 | return filtered_docs 131 | 132 | def _extract_think_content(self, text: str) -> tuple[str, str, bool]: 133 | """Extract content from think tags and parse the bulleted response format.""" 134 | # Extract think content 135 | think_match = re.search(r'(.*?)', text, flags=re.DOTALL) 136 | reasoning = think_match.group(1).strip() if think_match else "" 137 | 138 | # Remove think tags and get remaining text 139 | remaining_text = re.sub(r'.*?', '', text, flags=re.DOTALL).strip() 140 | 141 | # Initialize default values 142 | content = "" 143 | is_irrelevant = True 144 | 145 | # Extract is_irrelevant value 146 | irrelevant_match = re.search(r'\*\*isIrrelevant:\*\*\s*(true|false)', remaining_text, re.IGNORECASE) 147 | if irrelevant_match: 148 | is_irrelevant = irrelevant_match.group(1).lower() == 'true' 149 | 150 | # Extract content value 151 | content_match = re.search(r'\*\*Answer:\*\*\s*(.*?)(?:\n|$)', remaining_text, re.DOTALL) 152 | if content_match: 153 | content = content_match.group(1).strip() 154 | 155 | return content, reasoning, is_irrelevant 156 | 157 | async def query( 158 | self, prompt: str, documents: List[Document], options: Optional[Dict] = None 159 | ) -> List[QueryResult]: 160 | try: 161 | # Convert dictionary filters to MetadataFilter objects 162 | filters = None 163 | if options and "filter" in options: 164 | raw_filters = options["filter"] 165 | if isinstance(raw_filters, list): 166 | filters = [ 167 | MetadataFilter(**f) if isinstance(f, dict) else f 168 | for f in raw_filters 169 | ] 170 | elif isinstance(raw_filters, dict): 171 | filters = [MetadataFilter(**raw_filters)] 172 | 173 | filtered_documents = self._filter_documents_by_metadata(documents, filters) 174 | 175 | def format_doc(doc: Document) -> str: 176 | return f"Name: {doc.name}\nMetadata: {doc.metadata}\nContent: {doc.content}" 177 | 178 | batch_size = self.batch_size 179 | batches = [ 180 | filtered_documents[i : i + batch_size] 181 | for i in range(0, len(filtered_documents), batch_size) 182 | ] 183 | 184 | results = [] 185 | for batch in batches: 186 | tasks = [] 187 | # Create tasks for parallel processing within the batch 188 | for document in batch: 189 | system = f"{self.system}\n\n# Available source\n\n{format_doc(document)}" 190 | tasks.append( 191 | acompletion( 192 | model=self.model, 193 | messages=[ 194 | {"role": "system", "content": system}, 195 | {"role": "user", "content": prompt}, 196 | ], 197 | response_format=self.schema, 198 | **self.model_kwargs, 199 | ) 200 | ) 201 | 202 | # Process all documents in the batch concurrently 203 | batch_responses = await asyncio.gather(*tasks) 204 | 205 | # Process the responses 206 | for document, response in zip(batch, batch_responses): 207 | message_content = response.choices[0].message.content 208 | 209 | try: 210 | if self.model.startswith("ollama/"): 211 | content, reasoning, is_irrelevant = self._extract_think_content(message_content) 212 | results.append( 213 | QueryResult( 214 | content=content, 215 | reasoning=reasoning, 216 | is_irrelevant=is_irrelevant, 217 | document=document, 218 | ) 219 | ) 220 | else: 221 | # Ensure it's parsed as a dict 222 | data = ( 223 | json.loads(message_content) 224 | if isinstance(message_content, str) 225 | else message_content 226 | ) 227 | 228 | if data["source"].get("is_irrelevant", True): 229 | continue 230 | 231 | results.append( 232 | QueryResult( 233 | content=data["source"].get("content", ""), 234 | reasoning=data["source"].get("reasoning", ""), 235 | is_irrelevant=data["source"].get("is_irrelevant", False), 236 | document=document, 237 | ) 238 | ) 239 | except json.JSONDecodeError: 240 | print("Error: Could not parse response:", message_content) 241 | continue 242 | 243 | return results 244 | 245 | except Exception as e: 246 | raise Exception(f"Query failed: {str(e)}") 247 | --------------------------------------------------------------------------------