├── CHANGELOG.md ├── LICENSE.md ├── README.md ├── composer.json ├── config └── tiktoken.php └── src ├── Facades └── Tiktoken.php ├── Tiktoken.php └── TiktokenServiceProvider.php /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to `tiktoken-for-laravel` will be documented in this file. 4 | 5 | ## 0.1.4 - 2025-03-05 6 | 7 | Add support for Laravel 12. 8 | Upgrade yethee/tiktoken to 0.7.0 to support o200k_base encoding. 9 | 10 | ## 0.1.3 - 2024-03-26 11 | 12 | add support for Laravel 11 13 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) mis3085 <17059877+mis3085@users.noreply.github.com> 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This is tiktoken-php (yethee/tiktoken) wrapper for Laravel 2 | 3 | [![Latest Version on Packagist](https://img.shields.io/packagist/v/mis3085/tiktoken-for-laravel.svg?style=flat-square)](https://packagist.org/packages/mis3085/tiktoken-for-laravel) 4 | [![GitHub Tests Action Status](https://img.shields.io/github/actions/workflow/status/mis3085/tiktoken-for-laravel/run-tests.yml?branch=main&label=tests&style=flat-square)](https://github.com/mis3085/tiktoken-for-laravel/actions?query=workflow%3Arun-tests+branch%3Amain) 5 | [![GitHub Code Style Action Status](https://img.shields.io/github/actions/workflow/status/mis3085/tiktoken-for-laravel/fix-php-code-style-issues.yml?branch=main&label=code%20style&style=flat-square)](https://github.com/mis3085/tiktoken-for-laravel/actions?query=workflow%3A"Fix+PHP+code+style+issues"+branch%3Amain) 6 | [![Total Downloads](https://img.shields.io/packagist/dt/mis3085/tiktoken-for-laravel.svg?style=flat-square)](https://packagist.org/packages/mis3085/tiktoken-for-laravel) 7 | 8 | Use the "tiktoken-php" package to encode a string to tokens, decode tokens to a string or calculate token usage for OpenAI models in Laravel. 9 | 10 | ## Installation 11 | 12 | You can install the package via composer: 13 | 14 | ```bash 15 | composer require mis3085/tiktoken-for-laravel 16 | ``` 17 | 18 | You can publish the config file with: 19 | 20 | ```bash 21 | php artisan vendor:publish --tag="tiktoken-for-laravel-config" 22 | ``` 23 | 24 | This is the contents of the published config file: 25 | 26 | ```php 27 | return [ 28 | // Cache folder for vocab files 29 | 'cache_dir' => storage_path('framework/cache/tiktoken'), 30 | 31 | /** 32 | * The default encoder 33 | * cl100k_base: gpt-4, gpt-3.5-turbo, text-embedding-ada-002 34 | * p50k_base: Codex models, text-davinci-002, text-davinci-003 35 | * r50k_base: text-davinci-001 36 | */ 37 | 'default_encoder' => 'cl100k_base', 38 | ]; 39 | ``` 40 | 41 | ## Usage 42 | 43 | ```php 44 | use Mis3085\Tiktoken\Facades\Tiktoken; 45 | // or 46 | use Tiktoken; 47 | 48 | // Use the default encoder: cl100k_base 49 | Tiktoken::encode('this is a test'); 50 | // [ 576, 374, 264, 1296 ] 51 | 52 | Tiktoken::encode('測試'); 53 | // [ 35086, 105, 50520, 99 ] 54 | 55 | // Count tokens 56 | Tiktoken::count('測試'); 57 | // 4 58 | 59 | // Truncate a string to the specified length of tokens 60 | Tiktoken::limit('this is a test', 2); 61 | // this is 62 | Tiktoken::limit('測試', 2); 63 | // 測 64 | Tiktoken::limit('測試', 1); 65 | // EMPTY STRING 66 | 67 | // Decode 68 | Tiktoken::decode([ 35086, 105, 50520, 99 ]); 69 | // 測試 70 | 71 | // Change encoder in runtime 72 | Tiktoken::setEncoder('p50k_base'); 73 | Tiktoken::encode('this is a test'); 74 | // [ 5661, 318, 257, 1332 ] 75 | 76 | Tiktoken::setEncoder('p50k_base')->encode('測試'); 77 | // [ 162, 116, 105, 164, 102, 99 ] 78 | 79 | Tiktoken::setEncoderForModel('text-davinci-003')->encode('測試'); 80 | // [ 162, 116, 105, 164, 102, 99 ] 81 | ``` 82 | 83 | ## Testing 84 | 85 | ```bash 86 | composer test 87 | ``` 88 | 89 | ## Credits 90 | 91 | - [yethee/tiktoken-php](https://github.com/yethee/tiktoken-php) 92 | - [All Contributors](../../contributors) 93 | 94 | ## License 95 | 96 | The MIT License (MIT). Please see [License File](LICENSE.md) for more information. 97 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mis3085/tiktoken-for-laravel", 3 | "description": "This is tiktoken-php (yethee/tiktoken) wrapper for Laravel", 4 | "keywords": [ 5 | "laravel", 6 | "tiktoken", 7 | "tiktoken-for-laravel" 8 | ], 9 | "homepage": "https://github.com/mis3085/tiktoken-for-laravel", 10 | "license": "MIT", 11 | "authors": [ 12 | { 13 | "name": "mis3085", 14 | "email": "17059877+mis3085@users.noreply.github.com", 15 | "role": "Developer" 16 | } 17 | ], 18 | "require": { 19 | "php": "^8.1", 20 | "illuminate/contracts": "^9.28|^10.0|^11.0|^12.0", 21 | "spatie/laravel-package-tools": "^1.14.0", 22 | "yethee/tiktoken": "^0.7.0" 23 | }, 24 | "require-dev": { 25 | "laravel/pint": "^1.0", 26 | "nunomaduro/collision": "^5.11|^6.0|^7.9", 27 | "nunomaduro/larastan": "^1.0|^2.0.1", 28 | "orchestra/testbench": "^7.0|^8.0", 29 | "pestphp/pest": "^1.21|^2.0", 30 | "pestphp/pest-plugin-laravel": "^1.1|^2.0", 31 | "phpstan/extension-installer": "^1.1", 32 | "phpstan/phpstan-deprecation-rules": "^1.0", 33 | "phpstan/phpstan-phpunit": "^1.0" 34 | }, 35 | "autoload": { 36 | "psr-4": { 37 | "Mis3085\\Tiktoken\\": "src/" 38 | } 39 | }, 40 | "autoload-dev": { 41 | "psr-4": { 42 | "Mis3085\\Tiktoken\\Tests\\": "tests/" 43 | } 44 | }, 45 | "scripts": { 46 | "post-autoload-dump": "@php ./vendor/bin/testbench package:discover --ansi", 47 | "analyse": "vendor/bin/phpstan analyse", 48 | "test": "vendor/bin/pest", 49 | "test-coverage": "vendor/bin/pest --coverage", 50 | "format": "vendor/bin/pint" 51 | }, 52 | "config": { 53 | "sort-packages": true, 54 | "allow-plugins": { 55 | "pestphp/pest-plugin": true, 56 | "phpstan/extension-installer": true 57 | } 58 | }, 59 | "extra": { 60 | "laravel": { 61 | "providers": [ 62 | "Mis3085\\Tiktoken\\TiktokenServiceProvider" 63 | ], 64 | "aliases": { 65 | "Tiktoken": "Mis3085\\Tiktoken\\Facades\\Tiktoken" 66 | } 67 | } 68 | }, 69 | "minimum-stability": "dev", 70 | "prefer-stable": true 71 | } 72 | -------------------------------------------------------------------------------- /config/tiktoken.php: -------------------------------------------------------------------------------- 1 | storage_path('framework/cache/tiktoken'), 7 | 8 | /** 9 | * The default encoder 10 | * cl100k_base: gpt-4, gpt-3.5-turbo, text-embedding-ada-002 11 | * p50k_base: Codex models, text-davinci-002, text-davinci-003 12 | * r50k_base: text-davinci-001 13 | */ 14 | 'default_encoder' => 'cl100k_base', 15 | ]; 16 | -------------------------------------------------------------------------------- /src/Facades/Tiktoken.php: -------------------------------------------------------------------------------- 1 | useDefaultEncoder(); 23 | } 24 | 25 | public function useDefaultEncoder(): static 26 | { 27 | $this->setEncoder($this->defaultEncoder); 28 | 29 | return $this; 30 | } 31 | 32 | public function setEncoderForModel(string $model): static 33 | { 34 | $this->encoder = $this->encoderProvider->getForModel($model); 35 | 36 | return $this; 37 | } 38 | 39 | public function setEncoder(string $encoder): static 40 | { 41 | $this->encoder = $this->encoderProvider->get($encoder); 42 | 43 | return $this; 44 | } 45 | 46 | public function encode(string $text): array 47 | { 48 | return $this->encoder->encode($text); 49 | } 50 | 51 | public function count(string $text): int 52 | { 53 | return count($this->encoder->encode($text)); 54 | } 55 | 56 | public function decode(array $tokens): string 57 | { 58 | return $this->encoder->decode($tokens); 59 | } 60 | 61 | public function limit(string $text, int $limit): string 62 | { 63 | if ($limit < 1) { 64 | return $text; 65 | } 66 | 67 | $tokens = $this->encode($text); 68 | 69 | $new = $this->decode(array_slice($tokens, 0, $limit)); 70 | 71 | while (!mb_check_encoding($new, 'UTF-8') && $limit >= 1) { 72 | $limit--; 73 | $new = $this->decode(array_slice($tokens, 0, $limit)); 74 | } 75 | 76 | return $new; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/TiktokenServiceProvider.php: -------------------------------------------------------------------------------- 1 | name('tiktoken-for-laravel') 20 | ->hasConfigFile('tiktoken'); 21 | } 22 | 23 | public function packageRegistered() 24 | { 25 | $this->app->singleton(Tiktoken::class, function () { 26 | $provider = new EncoderProvider(); 27 | $provider->setVocabCache(config('tiktoken.cache_dir')); 28 | 29 | return new Tiktoken($provider, config('tiktoken.default_encoder')); 30 | }); 31 | } 32 | } 33 | --------------------------------------------------------------------------------