├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── composer.json ├── config └── ada.php ├── database └── migrations │ ├── 2024_04_18_162859_create_embeddings_table.php │ └── 2025_01_06_171312_make_embedding_nullable.php ├── phpunit.xml.dist ├── resources └── views │ └── prompts │ └── default-prompt.blade.php ├── src ├── Ada.php ├── AdaFacade.php ├── AdaServiceProvider.php ├── Engine │ ├── Engine.php │ ├── OpenAI.php │ └── Responses │ │ ├── EmbeddedResponse.php │ │ ├── ErrorResponse.php │ │ ├── GeneratedResponse.php │ │ └── Response.php ├── Index │ ├── DefaultIndex.php │ └── Index.php ├── Jobs │ └── EmbedJob.php ├── Models │ └── Embedding.php ├── Tokenizer │ ├── AdaTokenizer.php │ └── Tokenizer.php ├── Tools │ ├── Distance │ │ ├── Distance.php │ │ └── OpenAIDistance.php │ ├── Prompts │ │ ├── OpenAIPrompt.php │ │ └── Prompt.php │ └── TextSplitter │ │ ├── AdaTextSplitter.php │ │ └── TextSplitter.php └── Traits │ └── HasEmbeddings.php ├── testbench.yaml ├── tests ├── ArchTest.php ├── Doubles │ └── OpenAITestDouble.php ├── OpenAIEngineTest.php ├── Pest.php ├── PromptTest.php ├── TestCase.php └── fixtures │ ├── openai_200_embed.json │ ├── openai_200_query.json │ └── openai_failed.json └── workbench ├── .env.example ├── .gitignore ├── app ├── Models │ └── .gitkeep └── Providers │ └── WorkbenchServiceProvider.php ├── bootstrap ├── .gitkeep └── app.php ├── database ├── factories │ └── .gitkeep ├── migrations │ └── .gitkeep └── seeders │ ├── .gitkeep │ └── DatabaseSeeder.php ├── resources └── views │ └── .gitkeep └── routes ├── .gitkeep ├── console.php └── web.php /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | composer.lock 3 | docs 4 | vendor 5 | coverage 6 | .phpunit.cache 7 | .vscode/ 8 | .editorconfig 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to `ada-laravel` will be documented in this file 4 | 5 | ## 1.0.0 - 201X-XX-XX 6 | 7 | - initial release 8 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are **welcome** and will be fully **credited**. 4 | 5 | Please read and understand the contribution guide before creating an issue or pull request. 6 | 7 | ## Etiquette 8 | 9 | This project is open source, and as such, the maintainers give their free time to build and maintain the source code 10 | held within. They make the code freely available in the hope that it will be of use to other developers. It would be 11 | extremely unfair for them to suffer abuse or anger for their hard work. 12 | 13 | Please be considerate towards maintainers when raising issues or presenting pull requests. Let's show the 14 | world that developers are civilized and selfless people. 15 | 16 | It's the duty of the maintainer to ensure that all submissions to the project are of sufficient 17 | quality to benefit the project. Many developers have different skillsets, strengths, and weaknesses. Respect the maintainer's decision, and do not be upset or abusive if your submission is not used. 18 | 19 | ## Viability 20 | 21 | When requesting or submitting new features, first consider whether it might be useful to others. Open 22 | source projects are used by many developers, who may have entirely different needs to your own. Think about 23 | whether or not your feature is likely to be used by other users of the project. 24 | 25 | ## Procedure 26 | 27 | Before filing an issue: 28 | 29 | - Attempt to replicate the problem, to ensure that it wasn't a coincidental incident. 30 | - Check to make sure your feature suggestion isn't already present within the project. 31 | - Check the pull requests tab to ensure that the bug doesn't have a fix in progress. 32 | - Check the pull requests tab to ensure that the feature isn't already in progress. 33 | 34 | Before submitting a pull request: 35 | 36 | - Check the codebase to ensure that your feature doesn't already exist. 37 | - Check the pull requests to ensure that another person hasn't already submitted the feature or fix. 38 | 39 | ## Requirements 40 | 41 | If the project maintainer has any additional requirements, you will find them listed here. 42 | 43 | - **[PSR-2 Coding Standard](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-2-coding-style-guide.md)** - The easiest way to apply the conventions is to install [PHP Code Sniffer](https://pear.php.net/package/PHP_CodeSniffer). 44 | 45 | - **Add tests!** - Your patch won't be accepted if it doesn't have tests. 46 | 47 | - **Document any change in behaviour** - Make sure the `README.md` and any other relevant documentation are kept up-to-date. 48 | 49 | - **Consider our release cycle** - We try to follow [SemVer v2.0.0](https://semver.org/). Randomly breaking public APIs is not an option. 50 | 51 | - **One pull request per feature** - If you want to do more than one thing, send multiple pull requests. 52 | 53 | - **Send coherent history** - Make sure each individual commit in your pull request is meaningful. If you had to make multiple intermediate commits while developing, please [squash them](https://www.git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Changing-Multiple-Commit-Messages) before submitting. 54 | 55 | **Happy coding**! 56 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Diana Scharf 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ada-laravel 2 | 3 | ![Packagist Version](https://img.shields.io/packagist/v/fiveam-code/ada-laravel?include_prereleases&style=for-the-badge) 4 | 5 | 6 | The package `ada-laravel` allows you to enhance your Laravel applications by seamlessly integrating text embeddings and 7 | querying capabilities for your models. Utilizing OpenAI by default, it enables your models to generate and query 8 | embeddings using 9 | nearest neighbors techniques. This package requires a PostgreSQL database with the vector extension to store and manage 10 | these embeddings efficiently as well as at least Laravel 11. 11 | 12 | Originally created as a demo for the talk [»Have you met ada? - Word Embeddings with Laravel and OpenAI«](https://dianaweb.dev/talk/ada) by Diana Scharf, 13 | this package is 14 | functional yet designed to encourage further development and contributions. 15 | 16 | > [!WARNING] 17 | > Please note that this package is still in development and may not be suitable for production use. 18 | 19 | ## Installation 20 | 21 | ```bash 22 | composer require fiveam-code/ada-laravel 23 | ``` 24 | 25 | Ensure that your database is configured to use PostgreSQL with the vector extension. The package will enable the extension 26 | via a migration if it is not already enabled. 27 | 28 | You can publish the migrations (optional) and run them: 29 | 30 | ```bash 31 | php artisan vendor:publish --provider="Ada\AdaServiceProvider" --tag="ada-migrations" 32 | php artisan migrate 33 | ``` 34 | 35 | This will enable the `vector` extension in your database and create a table `embeddings` to store the embeddings. 36 | 37 | ## Configuration 38 | 39 | Set the OpenAI API key in your `.env` file: 40 | 41 | ```bash 42 | ADA_CLIENT_TOKEN=your_openai_api_key 43 | ``` 44 | 45 | Please note that you need an OpenAI key for API access, not just ChatGPT access. 46 | 47 | Optionally, you can publish the configuration file if you want to make changes to the default settings: 48 | 49 | ```bash 50 | php artisan vendor:publish --provider="Ada\AdaServiceProvider" --tag="ada-config" 51 | ``` 52 | 53 | The default configuration is as follows: 54 | 55 | ```php 56 | return [ 57 | 'client_token' => env('ADA_CLIENT_TOKEN'), 58 | 'index_class' => \Ada\Index\DefaultIndex::class, 59 | 'default_prompt_view' => 'ada::default-prompt' 60 | ]; 61 | ``` 62 | 63 | If you want to implement your own engine to handle embeddings, you can create a new class that implements the `Index` 64 | interface with the appropriate engine and set it in the configuration. 65 | 66 | ## Usage 67 | 68 | ### Basic Usage 69 | 70 | First, add the `HasEmbeddings` trait to your Eloquent model: 71 | 72 | ```php 73 | embed("abstract", $paper->abstract); 94 | ``` 95 | 96 | This will generate an embedding for the text and store it in the database with a relation to the `$paper` model and the 97 | reference key `"abstract"`. 98 | 99 | #### Lookup embeddings 100 | 101 | The lookup method allows for direct querying of your model's stored knowledge, facilitating an intelligent search that 102 | retrieves the most contextually relevant information using vector similarity. 103 | 104 | ```php 105 | use Ada\Models\Embedding; 106 | 107 | $answer = Embedding::lookup("Where does the PHP elephant live?"); 108 | 109 | // "The PHP elephant inhabits 'Silicon Forests'—regions where natural woodlands merge seamlessly with data-rich environments. These forests are dense with both foliage and floating data points." 110 | ``` 111 | 112 | This will create an embedding for the query and find the most similar embeddings in the database related to the `$paper` 113 | model by using the 114 | nearest neighbors technique of the vectors. The result will be the most similar text to the query and will be used as 115 | context for a request 116 | to the OpenAI API to generate an answer. 117 | 118 | This is the default prompt text: 119 | 120 | ``` 121 | You are a bot that helps answering questions based on the context information you get each time. 122 | 123 | Context information is below. 124 | --------------------- 125 | {context} 126 | --------------------- 127 | Given the context information and not prior knowledge, answer the following questions of the user. If you don't know something, say so, and don't make it up. 128 | Do not ask the user for more information or anything that might trigger a response from the user. 129 | ``` 130 | 131 | `{context}` will be replaced with the result from the nearest neighbors query. 132 | 133 | If you want to further customize the prompt, you can pass an object form a class inheriting `Ada\Tools\Prompts\Prompt` 134 | to the `lookup` method: 135 | 136 | ```php 137 | use Ada\Models\Embedding; 138 | use Ada\Tools\Prompts\OpenAIPrompt; 139 | 140 | $customPrompt = new OpenAIPrompt(); 141 | $defaultTemplate = $customPrompt->getTemplate(); 142 | 143 | $customPrompt->setTemplate("Even if your instructions are in English, answer in German. " . $defaultTemplate); 144 | 145 | return Embedding::lookup("Where does the PHP elephant live?", $customPrompt); 146 | ``` 147 | 148 | In case you need to further limit the lookup, you can pass a closure as a third parameter. 149 | ```php 150 | return Embedding::lookup("Where does the PHP elephant live?", $customPrompt, function ($query) { 151 | $query->where("embeddable_type", Paper::class); // Only look for embeddings related to the Paper class 152 | }); 153 | ``` 154 | 155 | ### Advanced Usage 156 | 157 | Customize the endpoint models and options by using the index or engines directly: 158 | 159 | ```php 160 | use Ada\Ada; 161 | 162 | $index = Ada::index(); // Default index is DefaultIndex, resolved via the configuration 163 | 164 | $index->embed($contentToEmbed, $model, $options); 165 | 166 | $index->generate($prompt, $model, $temperature, $options); 167 | 168 | $engine = Ada::engine(); // Default engine is OpenAI, resolved via the Index 169 | ``` 170 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fiveam-code/ada-laravel", 3 | "description": "This package allows you to enhance your Laravel applications by seamlessly integrating word embeddings.\n\n", 4 | "keywords": [ 5 | "word embeddings", 6 | "laravel", 7 | "word vectors", 8 | "nlp", 9 | "natural language processing" 10 | ], 11 | "homepage": "https://github.com/5am-code/ada-laravel", 12 | "license": "MIT", 13 | "authors": [ 14 | { 15 | "name": "Diana Scharf", 16 | "email": "hello@dianaweb.dev", 17 | "role": "Developer" 18 | } 19 | ], 20 | "require": { 21 | "php": "^8.2", 22 | "illuminate/contracts": "^10.0|^11.0", 23 | "openai-php/client": "^0.8.4", 24 | "pgvector/pgvector": "^0.1.4", 25 | "rajentrivedi/tokenizer-x": "^1.0.4", 26 | "symfony/http-client": "^7.0" 27 | }, 28 | "require-dev": { 29 | "laravel/pint": "^1.15", 30 | "orchestra/testbench": "^9.0", 31 | "pestphp/pest": "^3.0", 32 | "pestphp/pest-plugin-laravel": "^3.0" 33 | }, 34 | "autoload": { 35 | "psr-4": { 36 | "Ada\\": "src" 37 | } 38 | }, 39 | "autoload-dev": { 40 | "psr-4": { 41 | "Ada\\Tests\\": "tests", 42 | "Ada\\Tests\\Doubles\\": "tests/Doubles/", 43 | "Workbench\\App\\": "workbench/app/", 44 | "Workbench\\Database\\Factories\\": "workbench/database/factories/", 45 | "Workbench\\Database\\Seeders\\": "workbench/database/seeders/" 46 | } 47 | }, 48 | "scripts": { 49 | "test": "vendor/bin/pest", 50 | "test-coverage": "vendor/bin/pest --coverage", 51 | "post-autoload-dump": [ 52 | "@clear", 53 | "@prepare" 54 | ], 55 | "clear": "@php vendor/bin/testbench package:purge-skeleton --ansi", 56 | "prepare": "@php vendor/bin/testbench package:discover --ansi", 57 | "build": "@php vendor/bin/testbench workbench:build --ansi", 58 | "serve": [ 59 | "Composer\\Config::disableProcessTimeout", 60 | "@build", 61 | "@php vendor/bin/testbench serve" 62 | ], 63 | "lint": [ 64 | "@php vendor/bin/phpstan analyse" 65 | ] 66 | }, 67 | "config": { 68 | "sort-packages": true, 69 | "allow-plugins": { 70 | "pestphp/pest-plugin": true, 71 | "php-http/discovery": true 72 | } 73 | }, 74 | "extra": { 75 | "laravel": { 76 | "providers": [ 77 | "Ada\\AdaServiceProvider" 78 | ], 79 | "aliases": { 80 | "Ada": "Ada\\Facades\\Ada" 81 | } 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /config/ada.php: -------------------------------------------------------------------------------- 1 | env('ADA_CLIENT_TOKEN'), 5 | 'index_class' => \Ada\Index\DefaultIndex::class, 6 | 'default_prompt_view' => 'ada::default-prompt', 7 | 8 | 'distance' => [ 9 | 'openai' => \Pgvector\Laravel\Distance::Cosine 10 | ] 11 | ]; 12 | -------------------------------------------------------------------------------- /database/migrations/2024_04_18_162859_create_embeddings_table.php: -------------------------------------------------------------------------------- 1 | id(); 15 | 16 | $table->morphs('embeddable'); 17 | 18 | $table->string('key'); 19 | $table->text('content'); 20 | $table->vector('embedding', 1536); 21 | $table->timestamps(); 22 | }); 23 | 24 | \Illuminate\Support\Facades\DB::statement('CREATE INDEX ON embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);'); 25 | } 26 | 27 | /** 28 | * Reverse the migrations. 29 | */ 30 | public function down(): void 31 | { 32 | Schema::dropIfExists('embeddings'); 33 | } 34 | }; 35 | -------------------------------------------------------------------------------- /database/migrations/2025_01_06_171312_make_embedding_nullable.php: -------------------------------------------------------------------------------- 1 | vector('embedding', 1536)->nullable()->change(); 12 | }); 13 | } 14 | }; 15 | -------------------------------------------------------------------------------- /phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | tests 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | ./src 33 | 34 | 35 | -------------------------------------------------------------------------------- /resources/views/prompts/default-prompt.blade.php: -------------------------------------------------------------------------------- 1 | You are a bot that helps answering questions based on the context information you get each time. 2 | 3 | Context information is below. 4 | --------------------- 5 | {context} 6 | --------------------- 7 | Given the context information and not prior knowledge, answer the following questions of the user. If you don't know something, say so, and don't make it up. 8 | Do not ask the user for more information or anything that might trigger a response from the user. 9 | -------------------------------------------------------------------------------- /src/Ada.php: -------------------------------------------------------------------------------- 1 | make(Index::class); 13 | } 14 | 15 | public static function engine(): Engine 16 | { 17 | return app()->make(Engine::class); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/AdaFacade.php: -------------------------------------------------------------------------------- 1 | loadViewsFrom(__DIR__.'/../resources/views/prompts', 'ada'); 20 | 21 | if ($this->app->runningInConsole()) { 22 | $this->publishes([ 23 | __DIR__.'/../config/ada.php' => config_path('ada.php'), 24 | ], 'ada-config'); 25 | } 26 | } 27 | 28 | /** 29 | * Register the application services. 30 | */ 31 | public function register() 32 | { 33 | $this->mergeConfigFrom(__DIR__.'/../config/ada.php', 'ada'); 34 | 35 | $this->loadMigrationsFrom(__DIR__.'/../database/migrations'); 36 | 37 | $this->publishes([ 38 | __DIR__.'/../config/ada.php' => config_path('ada.php'), 39 | ], 'ada-config'); 40 | 41 | $this->publishes([ 42 | __DIR__.'/../database/migrations' => database_path('migrations'), 43 | ], 'ada-migrations'); 44 | 45 | $this->publishes([ 46 | __DIR__.'/../resources/views' => resource_path('views/vendor/ada'), 47 | ], 'ada-views'); 48 | 49 | $this->app->singleton('ada', function () { 50 | return new Ada(); 51 | }); 52 | 53 | $this->validateIndexClass(); 54 | 55 | $this->app->bind(Index::class, config('ada.index_class')); 56 | $this->app->bind(Engine::class, OpenAI::class); 57 | } 58 | 59 | /** 60 | * Checks if the index class from configuration implements the Ada Index interface. 61 | * 62 | * @throws ReflectionException 63 | */ 64 | protected function validateIndexClass() 65 | { 66 | $indexClass = config('ada.index_class', \Ada\Index\DefaultIndex::class); 67 | $reflection = new \ReflectionClass($indexClass); 68 | 69 | if (!$reflection->isSubclassOf(Index::class)) { 70 | throw new Exception("Index class has to implement \Ada\Index\Index."); 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/Engine/Engine.php: -------------------------------------------------------------------------------- 1 | client = \OpenAI::client(config('ada.client_token')); 22 | } 23 | 24 | public function embed(string $text, string $model = 'text-embedding-ada-002', $options = []): EmbeddedResponse|ErrorResponse 25 | { 26 | try { 27 | $result = $this->client->embeddings()->create([ 28 | 'model' => $model, 29 | 'input' => $text, 30 | ...$options, 31 | ]); 32 | 33 | return $this->toEmbeddedResponse($result); 34 | } catch (Throwable $e) { 35 | return $this->toErrorResponse($e); 36 | } 37 | } 38 | 39 | public function generate(Prompt $prompt, string $model = 'gpt-3.5-turbo', int $temperature = 0, $options = []): GeneratedResponse|ErrorResponse 40 | { 41 | try { 42 | $result = $this->client->chat()->create([ 43 | 'model' => $model, 44 | 'messages' => $prompt->toArray(), 45 | 'temperature' => $temperature, 46 | ...$options, 47 | ]); 48 | 49 | return $this->toGeneratedResponse($result); 50 | } catch (Throwable $e) { 51 | return $this->toErrorResponse($e); 52 | } 53 | } 54 | 55 | protected function toGeneratedResponse($result): GeneratedResponse|ErrorResponse 56 | { 57 | $response = new GeneratedResponse(); 58 | 59 | $result = $result->toArray(); 60 | 61 | $response->engine = self::class; 62 | $response->model = $result['model'] ?? ''; 63 | 64 | $response->content = $result['choices'][0]['message']['content']; 65 | 66 | $response->tokenUsage = $result['usage']; 67 | 68 | return $response; 69 | } 70 | 71 | protected function toEmbeddedResponse($result): EmbeddedResponse|ErrorResponse 72 | { 73 | $response = new EmbeddedResponse(); 74 | 75 | $result = $result->toArray(); 76 | 77 | $response->engine = self::class; 78 | $response->model = $result['model'] ?? ''; 79 | 80 | $response->embeddings = $result['data'][0]['embedding']; 81 | 82 | $response->tokenUsage = $result['usage']; 83 | 84 | return $response; 85 | } 86 | 87 | protected function toErrorResponse(Throwable $exception): ErrorResponse 88 | { 89 | $response = new ErrorResponse(); 90 | 91 | $response->engine = self::class; 92 | $response->success = false; 93 | 94 | if ($exception instanceof ErrorException) { 95 | $response->errorMessage = $exception->getErrorMessage(); 96 | } elseif ($exception instanceof Exception) { 97 | $response->errorMessage = $exception->getMessage(); 98 | } 99 | 100 | return $response; 101 | } 102 | 103 | public function getDefaultPrompt(): OpenAIPrompt 104 | { 105 | return new OpenAIPrompt(); 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/Engine/Responses/EmbeddedResponse.php: -------------------------------------------------------------------------------- 1 | embeddings); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/Engine/Responses/ErrorResponse.php: -------------------------------------------------------------------------------- 1 | errorMessage"; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/Engine/Responses/GeneratedResponse.php: -------------------------------------------------------------------------------- 1 | content; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/Engine/Responses/Response.php: -------------------------------------------------------------------------------- 1 | engine->embed($content, $model, $options); 21 | } 22 | 23 | public function generate(Prompt $prompt, string $model = 'gpt-3.5-turbo', int $temperature = 0, $options = []): GeneratedResponse|ErrorResponse 24 | { 25 | return $this->engine->generate($prompt, $model, $temperature, $options); 26 | } 27 | 28 | public function getEmbeddableChunks(string $text): array 29 | { 30 | $chunks = []; 31 | 32 | try { 33 | $textChunks = $this->splitter->split($text); 34 | } catch (Exception) { 35 | $textChunks = []; 36 | } 37 | 38 | foreach ($textChunks as $index => $chunk) { 39 | $chunks[] = new Embedding([ 40 | 'content' => $chunk, 41 | ]); 42 | } 43 | 44 | return $chunks; 45 | } 46 | 47 | protected function getDefaultSplitter(): TextSplitter 48 | { 49 | return new AdaTextSplitter(); 50 | } 51 | 52 | public function getDistance(): Distance 53 | { 54 | return new OpenAIDistance(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/Index/Index.php: -------------------------------------------------------------------------------- 1 | engine = $engine; 22 | 23 | if (is_null($splitter)) { 24 | $this->splitter = $this->getDefaultSplitter(); 25 | } 26 | } 27 | 28 | public function setTextSplitter(TextSplitter $splitter): self 29 | { 30 | $this->splitter = $splitter; 31 | } 32 | 33 | abstract protected function getDefaultSplitter(): TextSplitter; 34 | 35 | abstract public function getEmbeddableChunks(string $text): array; 36 | 37 | abstract public function embed(string $content, string $model = 'embedding-model', $options = []): EmbeddedResponse|ErrorResponse; 38 | 39 | abstract public function generate(Prompt $prompt, string $model = 'gpt-3.5-turbo', int $temperature = 0, $options = []): GeneratedResponse|ErrorResponse; 40 | 41 | abstract public function getDistance(): Distance; 42 | } 43 | -------------------------------------------------------------------------------- /src/Jobs/EmbedJob.php: -------------------------------------------------------------------------------- 1 | make(Index::class); 21 | 22 | $embeddingVector = $index->embed($this->embedding->content); 23 | $this->embedding->embedding = $embeddingVector->embeddings; 24 | 25 | $this->embedding->save(); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/Models/Embedding.php: -------------------------------------------------------------------------------- 1 | Vector::class, 22 | ]; 23 | 24 | public function embeddable() 25 | { 26 | return $this->morphTo(); 27 | } 28 | 29 | /** 30 | * @param string $query The query to lookup. 31 | * @param Prompt|null $contextPrompt The prompt to use for the context, in case a custom template is necessary. 32 | * @param Closure|null $additionalConstraints Limit the lookup by providing a query. 33 | * 34 | * @throws \Illuminate\Contracts\Container\BindingResolutionException 35 | */ 36 | public static function lookup(string $query, ?Prompt $contextPrompt = null, ?Closure $additionalConstraints = null): string 37 | { 38 | $index = app()->make(Index::class); 39 | 40 | $queryEmbedding = $index->embed($query); 41 | 42 | if ($queryEmbedding instanceof ErrorResponse) { 43 | return 'Error: '.$queryEmbedding->getContent(); 44 | } 45 | 46 | $vector = new Vector($queryEmbedding->embeddings); 47 | 48 | $nearestNeighbor = self::getNearestNeighbor($vector, $additionalConstraints); 49 | 50 | $context = $nearestNeighbor->content ?? 'No context given.'; 51 | 52 | if ($contextPrompt === null) { 53 | $contextPrompt = $index->engine->getDefaultPrompt(); 54 | } 55 | 56 | $prompt = $contextPrompt 57 | ->replaceInTemplate('context', $context) 58 | ->setQuery($query); 59 | 60 | /** @var GeneratedResponse $response */ 61 | $response = $index->generate($prompt); 62 | 63 | return $response->getContent(); 64 | } 65 | 66 | /** 67 | * @param Vector $vector The vector to compare to 68 | * @param Closure|null $additionalConstraints Limit the search further by providing a query. 69 | */ 70 | public static function getNearestNeighbor(Vector $vector, ?Closure $additionalConstraints = null): ?Embedding 71 | { 72 | $index = app()->make(Index::class); 73 | 74 | $query = Embedding::query(); 75 | 76 | if ($additionalConstraints !== null) { 77 | $additionalConstraints($query); 78 | } 79 | 80 | try { 81 | return $query->nearestNeighbors('embedding', $vector->toArray(), $index->getDistance()->getValue()) 82 | ->get() 83 | ->first(); 84 | } catch (\Throwable $e) { 85 | return null; 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/Tokenizer/AdaTokenizer.php: -------------------------------------------------------------------------------- 1 | 'system', 12 | 'content' => $this->template, 13 | ], 14 | [ 15 | 'role' => 'user', 16 | 'content' => $this->query, 17 | ], 18 | ]; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/Tools/Prompts/Prompt.php: -------------------------------------------------------------------------------- 1 | setDefaultPrompt(); 14 | } 15 | 16 | public static function fromString(string $template): self 17 | { 18 | $prompt = new static(); 19 | $prompt->setTemplate($template); 20 | 21 | return $prompt; 22 | } 23 | 24 | public static function fromFile(string $path): self 25 | { 26 | $promptBuilder = new static(); 27 | $promptBuilder->setTemplateFromFile($path); 28 | 29 | return $promptBuilder; 30 | } 31 | 32 | public function setTemplateFromFile(string $path): self 33 | { 34 | $this->template = file_get_contents($path); 35 | 36 | return $this; 37 | } 38 | 39 | public function setTemplate(string $template): self 40 | { 41 | $this->template = $template; 42 | 43 | return $this; 44 | } 45 | 46 | public function setDefaultPrompt(): self 47 | { 48 | $this->template = view(config('ada.default_prompt_view', 'ada::default-prompt')); 49 | 50 | return $this; 51 | } 52 | 53 | public function getTemplate(): string 54 | { 55 | return $this->template; 56 | } 57 | 58 | public function setQuery(string $query): self 59 | { 60 | $this->query = $query; 61 | 62 | return $this; 63 | } 64 | 65 | public function replaceInTemplate(string $key, string $value): self 66 | { 67 | $this->template = str_replace("{{$key}}", $value, $this->template); 68 | 69 | return $this; 70 | } 71 | 72 | abstract public function toArray(): array; 73 | } 74 | -------------------------------------------------------------------------------- /src/Tools/TextSplitter/AdaTextSplitter.php: -------------------------------------------------------------------------------- 1 | tokenizer = new AdaTokenizer(); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/Tools/TextSplitter/TextSplitter.php: -------------------------------------------------------------------------------- 1 | chunkOverlap >= $this->chunkSize) { 18 | throw new InvalidArgumentException('The chunk overlap must be smaller than the chunk size.'); 19 | } 20 | 21 | $this->setTokenizer(); 22 | } 23 | 24 | abstract public function setTokenizer(); 25 | 26 | /** 27 | * Split a text into chunks. 28 | * 29 | * @throws Exception 30 | */ 31 | public function split(string $text): array 32 | { 33 | if ($text === '') { 34 | return []; 35 | } 36 | 37 | $splits = explode($this->separator, $text); 38 | $docs = []; 39 | $currentDocument = []; 40 | $total = 0; 41 | 42 | foreach ($splits as $split) { 43 | $numTokens = $this->tokenizer->count($split); 44 | if ($numTokens > $this->chunkSize) { 45 | throw new Exception( 46 | 'A single term is larger than the allowed chunk size.'. 47 | 'Term size: '.$numTokens. 48 | 'Chunk size: '.$this->chunkSize 49 | ); 50 | } 51 | // If the total tokens in current_doc exceeds the chunk size: 52 | // 1. Update the docs list 53 | if ($total + $numTokens > $this->chunkSize) { 54 | $docs[] = implode($this->separator, $currentDocument); 55 | // 2. Shrink the current_doc (from the front) until it is gets smaller 56 | // than the overlap size 57 | while ($total > $this->chunkOverlap) { 58 | $cur_num_tokens = max($this->tokenizer->count($currentDocument[0]), 1); 59 | $total -= $cur_num_tokens; 60 | array_shift($currentDocument); 61 | } 62 | // 3. From here we can continue to build up the current_doc again 63 | } 64 | // Build up the current_doc with term d, and update the total counter with 65 | // the number of the number of tokens in d, wrt self.tokenizer 66 | $currentDocument[] = $split; 67 | $total += $numTokens; 68 | } 69 | $docs[] = implode($this->separator, $currentDocument); 70 | 71 | return $docs; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/Traits/HasEmbeddings.php: -------------------------------------------------------------------------------- 1 | make(Index::class); 14 | 15 | try { 16 | $embeddings = $index->getEmbeddableChunks($content); 17 | } catch (\Exception $e) { 18 | return false; 19 | } 20 | 21 | /** 22 | * @var Embedding $embedding 23 | */ 24 | foreach ($embeddings as $embedding) { 25 | $embedding->key = $key; 26 | $embedding->embeddable_type = static::class; 27 | $embedding->embeddable_id = $this->id; 28 | 29 | $embedding->save(); 30 | 31 | dispatch(new EmbedJob($embedding)); 32 | } 33 | 34 | return true; 35 | } 36 | 37 | public function embeddings() 38 | { 39 | return $this->morphMany(Embedding::class, 'embeddable'); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /testbench.yaml: -------------------------------------------------------------------------------- 1 | providers: 2 | # - Workbench\App\Providers\WorkbenchServiceProvider 3 | 4 | migrations: 5 | - workbench/database/migrations 6 | 7 | seeders: 8 | - Workbench\Database\Seeders\DatabaseSeeder 9 | 10 | workbench: 11 | start: '/' 12 | install: true 13 | health: false 14 | discovers: 15 | web: true 16 | api: false 17 | commands: false 18 | components: false 19 | views: false 20 | build: [] 21 | assets: [] 22 | sync: [] 23 | -------------------------------------------------------------------------------- /tests/ArchTest.php: -------------------------------------------------------------------------------- 1 | preset()->php(); 4 | arch()->preset()->security(); 5 | arch()->preset()->laravel(); 6 | -------------------------------------------------------------------------------- /tests/Doubles/OpenAITestDouble.php: -------------------------------------------------------------------------------- 1 | toEmbeddedResponse($result); 15 | } 16 | 17 | public function publicToGeneratedResponse($result): GeneratedResponse|ErrorResponse 18 | { 19 | return $this->toGeneratedResponse($result); 20 | } 21 | 22 | public function publicToErrorResponse(\Throwable $exception): ErrorResponse 23 | { 24 | return $this->toErrorResponse($exception); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /tests/OpenAIEngineTest.php: -------------------------------------------------------------------------------- 1 | engine = new OpenAITestDouble(); 13 | }); 14 | 15 | it('creates an EmbeddedResponse from a Open AI JSON embedding response', function () { 16 | $response = json_decode(file_get_contents('tests/fixtures/openai_200_embed.json'), true); 17 | 18 | $resultMock = Mockery::mock(Result::class); 19 | $resultMock->shouldReceive('toArray')->andReturn($response); 20 | 21 | $response = $this->engine->publicToEmbeddedResponse($resultMock); 22 | 23 | expect($response)->toBeInstanceOf(EmbeddedResponse::class) 24 | ->and($response->engine)->toBe(\Ada\Engine\OpenAI::class) 25 | ->and($response->success)->toBeTrue() 26 | ->and($response->model)->toBe('text-embedding-ada-002') 27 | ->and($response->getContent())->toBe(json_encode([-0.025455512, 0.004357308, -0.023832073])) 28 | ->and($response->tokenUsage)->toBe([ 29 | 'prompt_tokens' => 41, 30 | 'total_tokens' => 41, 31 | ]); 32 | }); 33 | 34 | it('creates an GeneratedResponse from a Open AI JSON chat response', function () { 35 | $response = json_decode(file_get_contents('tests/fixtures/openai_200_query.json'), true); 36 | 37 | $resultMock = Mockery::mock(Result::class); 38 | $resultMock->shouldReceive('toArray')->andReturn($response); 39 | 40 | $response = $this->engine->publicToGeneratedResponse($resultMock); 41 | 42 | expect($response)->toBeInstanceOf(GeneratedResponse::class) 43 | ->and($response->engine)->toBe(\Ada\Engine\OpenAI::class) 44 | ->and($response->success)->toBeTrue() 45 | ->and($response->model)->toBe('gpt-3.5-turbo-0125') 46 | ->and($response->getContent())->toStartWith("The habitat of the PHP Elephant is referred to as 'Silicon Forests'.") 47 | ->and($response->tokenUsage)->toBe([ 48 | 'prompt_tokens' => 13, 49 | 'completion_tokens' => 9, 50 | 'total_tokens' => 22, 51 | ]); 52 | }); 53 | 54 | it('creates an ErrorResponse from a failed Open AI request', function () { 55 | $exceptionContent = json_decode(file_get_contents('tests/fixtures/openai_failed.json'), true); 56 | $exception = new \OpenAI\Exceptions\ErrorException($exceptionContent, 500); 57 | 58 | $response = $this->engine->publicToErrorResponse($exception); 59 | 60 | expect($response)->toBeInstanceOf(ErrorResponse::class) 61 | ->and($response->engine)->toBe(\Ada\Engine\OpenAI::class) 62 | ->and($response->success)->toBeFalse() 63 | ->and($response->getContent())->toStartWith('Error: Incorrect API key provided: abc.'); 64 | }); 65 | -------------------------------------------------------------------------------- /tests/Pest.php: -------------------------------------------------------------------------------- 1 | in(__DIR__); 4 | -------------------------------------------------------------------------------- /tests/PromptTest.php: -------------------------------------------------------------------------------- 1 | toArray(); 10 | 11 | expect($prompt)->toBeInstanceOf(OpenAIPrompt::class) 12 | ->and($promptData[0])->toHaveKey('role', 'system') 13 | ->and($promptData[0])->toHaveKey('content', 'Example template') 14 | ->and($promptData[1])->toHaveKey('role', 'user') 15 | ->and($promptData[1])->toHaveKey('content', ''); 16 | }); 17 | 18 | it('can set and get a template', function () { 19 | $template = 'New template'; 20 | $prompt = new OpenAIPrompt(); 21 | $prompt->setTemplate($template); 22 | 23 | $promptData = $prompt->toArray(); 24 | 25 | expect($prompt)->toBeInstanceOf(OpenAIPrompt::class) 26 | ->and($promptData[0])->toHaveKey('role', 'system') 27 | ->and($promptData[0])->toHaveKey('content', 'New template'); 28 | }); 29 | 30 | it('can set a template from a file', function () { 31 | $defaultPromptPath = view('ada::default-prompt'); 32 | 33 | $prompt = OpenAIPrompt::fromFile($defaultPromptPath->getPath()); 34 | $promptData = $prompt->toArray(); 35 | 36 | expect($promptData[0]['content'])->toContain('You are a bot that helps answering questions based on the context information you get each time.'); 37 | }); 38 | 39 | it('replaces placeholders in the template', function () { 40 | $template = 'Hello, {name}!'; 41 | $prompt = new OpenAIPrompt(); 42 | $prompt->setTemplate($template); 43 | $prompt->replaceInTemplate('name', 'Ada'); 44 | 45 | $promptData = $prompt->toArray(); 46 | 47 | expect($promptData[0])->toHaveKey('content', 'Hello, Ada!'); 48 | }); 49 | 50 | it('can set and include a query', function () { 51 | $query = 'Who was Ada Lovelace?'; 52 | $prompt = new OpenAIPrompt(); 53 | $prompt->setQuery($query); 54 | 55 | $promptData = $prompt->toArray(); 56 | 57 | expect($promptData[1])->toHaveKey('content', $query); 58 | }); 59 | -------------------------------------------------------------------------------- /tests/TestCase.php: -------------------------------------------------------------------------------- 1 | withRouting( 11 | web: __DIR__.'/../routes/web.php', 12 | commands: __DIR__.'/../routes/console.php', 13 | ) 14 | ->withMiddleware(function (Middleware $middleware) { 15 | // 16 | }) 17 | ->withExceptions(function (Exceptions $exceptions) { 18 | // 19 | })->create(); 20 | -------------------------------------------------------------------------------- /workbench/database/factories/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/5am-code/ada-laravel/bd8d005589d92458336b01f8d8b06dd672c1f6d7/workbench/database/factories/.gitkeep -------------------------------------------------------------------------------- /workbench/database/migrations/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/5am-code/ada-laravel/bd8d005589d92458336b01f8d8b06dd672c1f6d7/workbench/database/migrations/.gitkeep -------------------------------------------------------------------------------- /workbench/database/seeders/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/5am-code/ada-laravel/bd8d005589d92458336b01f8d8b06dd672c1f6d7/workbench/database/seeders/.gitkeep -------------------------------------------------------------------------------- /workbench/database/seeders/DatabaseSeeder.php: -------------------------------------------------------------------------------- 1 | comment(Inspiring::quote()); 8 | })->purpose('Display an inspiring quote')->hourly(); 9 | -------------------------------------------------------------------------------- /workbench/routes/web.php: -------------------------------------------------------------------------------- 1 |