├── CHANGELOG.md
├── LICENSE.md
├── README.md
├── composer.json
├── examples
    ├── embedding.php
    ├── generate-with-events.php
    └── generate.php
├── llama-php.gif
├── phpcs.xml
└── src
    ├── Context.php
    ├── Events
        └── TokenGeneratedEvent.php
    ├── Exception
        ├── InvalidArgumentException.php
        ├── LLamaCppException.php
        ├── MissingLibraryException.php
        └── NotImplementedException.php
    ├── LLamaCPP.php
    ├── Native
        ├── LLamaCPPFFI.php
        ├── LocateLib.php
        ├── Locator.php
        └── llama-ffi.h
    └── Parameters
        ├── GenerationParameters.php
        └── ModelParameters.php


/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | 
3 | All notable changes to `llama.cpp-php` will be documented in this file.
4 | 
5 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) Kambo <bohuslav@simek.si>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Experimental bindings for LLama C++ library
 2 | 
 3 | [![Latest Version on Packagist](https://img.shields.io/packagist/v/kambo/llama-cpp-php.svg?style=flat-square)](https://packagist.org/packages/kambo/llama-cpp-php)
 4 | [![Tests](https://img.shields.io/github/actions/workflow/status/kambo-1st/llama-cpp-php/run-tests.yml?branch=main&label=tests&style=flat-square)](https://github.com/kambo-1st/llama-cpp-php/actions/workflows/run-tests.yml)
 5 | [![Total Downloads](https://img.shields.io/packagist/dt/kambo/llama-cpp-php.svg?style=flat-square)](https://packagist.org/packages/kambo/llama-cpp-php)
 6 | 
 7 | The package enables the use of the LLama C++ library in PHP, thereby allowing the setup and execution of LLM models in PHP on your local machine.
 8 | 
 9 | **This is highly experimental and not suitable for production use!**
10 | 
11 | **Use at your own risk!**
12 | 
13 | **Only Linux is supported!**
14 | 
15 | ![asciicast](llama-php.gif)
16 | 
17 | ## Installation
18 | 
19 | You can install the package via composer:
20 | 
21 | ```bash
22 | composer require kambo/llama-cpp-php kambo/llama-cpp-php-linux-lib
23 | ```
24 | Note: the kambo/llama-cpp-php-linux-lib package contains a binary library for Linux.
25 | 
26 | ## Usage
27 | 
28 | Get model, you can use for example this command:
29 | ```bash
30 | wget https://huggingface.co/LLukas22/gpt4all-lora-quantized-ggjt/resolve/main/ggjt-model.bin
31 | ```
32 | 
33 | ```php
34 | $template = "You are a programmer, write PHP class that will add two numbers and print the result. Stop at class end.";
35 | $context = Context::createWithParameter(new ModelParameters(__DIR__ .'/models/ggjt-model.bin'));
36 | $llama = new LLamaCPP($context);
37 | echo "Prompt: \033[0;32m".$template."\033[0m".PHP_EOL;
38 | 
39 | foreach ($llama->generate($template, new GenerationParameters(predictLength: 200)) as $token) {
40 |     echo $token;
41 | }
42 | ```
43 | 
44 | ## License
45 | 
46 | The MIT License (MIT). Please see [License File](LICENSE.md) for more information.
47 | 


--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "kambo/llama-cpp-php",
 3 |     "description": "The package enables the use of the LLama C++ library in PHP, thereby allowing the setup and execution of LLM models in PHP on your local machine.",
 4 |     "keywords": [
 5 |         "Kambo",
 6 |         "llama-cpp-php"
 7 |     ],
 8 |     "homepage": "https://github.com/kambo/llama-cpp-php",
 9 |     "license": "MIT",
10 |     "authors": [
11 |         {
12 |             "name": "Bohuslav Šimek",
13 |             "email": "bohuslav@simek.si",
14 |             "role": "Developer"
15 |         }
16 |     ],
17 |     "require": {
18 |         "php": "^8.1",
19 |         "symfony/event-dispatcher": "^6.2"
20 |     },
21 |     "require-dev": {
22 |         "phpunit/phpunit": "^9.5",
23 |         "slevomat/coding-standard": "^8.8"
24 |     },
25 |     "autoload": {
26 |         "psr-4": {
27 |             "Kambo\\LLamaCPP\\": "src"
28 |         }
29 |     },
30 |     "autoload-dev": {
31 |         "psr-4": {
32 |             "Kambo\\LLamaCPP\\Tests\\": "tests"
33 |         }
34 |     },
35 |     "scripts": {
36 |         "test": "vendor/bin/phpunit",
37 |         "test-coverage": "XDEBUG_MODE=coverage vendor/bin/phpunit",
38 |         "format": "vendor/bin/phpcbf",
39 |         "lint": "vendor/bin/phpcs"
40 |     },
41 |     "config": {
42 |         "sort-packages": true,
43 |         "allow-plugins": {
44 |             "phpstan/extension-installer": true,
45 |             "dealerdirect/phpcodesniffer-composer-installer": true
46 |         }
47 |     },
48 |     "minimum-stability": "dev",
49 |     "prefer-stable": true
50 | }
51 | 


--------------------------------------------------------------------------------
/examples/embedding.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | require_once __DIR__ . '/../vendor/autoload.php';
 4 | 
 5 | use Kambo\LLamaCPP\LLamaCPP;
 6 | use Kambo\LLamaCPP\Context;
 7 | use Kambo\LLamaCPP\Parameters\ModelParameters;
 8 | use Kambo\LLamaCPP\Parameters\GenerationParameters;
 9 | 
10 | $template = "You are a programmer, write PHP class that will add two numbers and print the result. Stop at class end.";
11 | $context = Context::createWithParameter(
12 |     new ModelParameters(
13 |         modelPath:__DIR__ .'/models/ggjt-model.bin',
14 |         embedding: true,
15 |     )
16 | );
17 | $llama = new LLamaCPP($context);
18 | 
19 | $embeddings = $llama->createEmbedding($template);
20 | 
21 | var_dump($embeddings);
22 | 


--------------------------------------------------------------------------------
/examples/generate-with-events.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | require_once __DIR__ . '/../vendor/autoload.php';
 4 | 
 5 | use Kambo\LLamaCPP\LLamaCPP;
 6 | use Kambo\LLamaCPP\Context;
 7 | use Kambo\LLamaCPP\Parameters\ModelParameters;
 8 | use Kambo\LLamaCPP\Parameters\GenerationParameters;
 9 | use Kambo\LLamaCPP\Events\TokenGeneratedEvent;
10 | use Symfony\Component\EventDispatcher\EventDispatcher;
11 | 
12 | $dispatcher = new EventDispatcher();
13 | $dispatcher->addListener(TokenGeneratedEvent::NAME, function (TokenGeneratedEvent $event) {
14 |     // will be executed when the token is generated
15 |     echo $event->getToken();
16 | });
17 | 
18 | $template = "You are a programmer, write PHP class that will add two numbers and print the result. Stop at class end.";
19 | $context = Context::createWithParameter(new ModelParameters(__DIR__ .'/models/ggjt-model.bin'));
20 | $llama = new LLamaCPP($context, $dispatcher);
21 | echo "Prompt: \033[0;32m".$template."\033[0m".PHP_EOL;
22 | 
23 | $llama->generateAll($template, new GenerationParameters(predictLength: 200));
24 | 


--------------------------------------------------------------------------------
/examples/generate.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | require_once __DIR__ . '/../vendor/autoload.php';
 4 | 
 5 | use Kambo\LLamaCPP\LLamaCPP;
 6 | use Kambo\LLamaCPP\Context;
 7 | use Kambo\LLamaCPP\Parameters\ModelParameters;
 8 | use Kambo\LLamaCPP\Parameters\GenerationParameters;
 9 | 
10 | $template = "You are a programmer, write PHP class that will add two numbers and print the result. Stop at class end.";
11 | $context = Context::createWithParameter(new ModelParameters(__DIR__ .'/models/ggjt-model.bin'));
12 | $llama = new LLamaCPP($context);
13 | echo "Prompt: \033[0;32m".$template."\033[0m".PHP_EOL;
14 | 
15 | foreach ($llama->generate($template, new GenerationParameters(predictLength: 200)) as $token) {
16 |     echo $token;
17 | }
18 | 


--------------------------------------------------------------------------------
/llama-php.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kambo-1st/llama-cpp-php/5762c8feba308606f6a7a37fafc21b2e1152811a/llama-php.gif


--------------------------------------------------------------------------------
/phpcs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <ruleset name="tabular-ruleset">
 3 |     <config name="installed_paths" value="vendor/slevomat/coding-standard"/>
 4 |     <arg name="colors"/>
 5 | 
 6 |     <file>src</file>
 7 |     <file>tests</file>
 8 | 
 9 |     <rule ref="Generic.Commenting.Todo"/>
10 |     <rule ref="PSR12"/>
11 |     <rule ref="Squiz.Strings.DoubleQuoteUsage" />
12 |     <rule ref="SlevomatCodingStandard.Namespaces.ReferenceUsedNamesOnly">
13 |         <properties>
14 |             <property name="allowFallbackGlobalConstants" value="false"/>
15 |             <property name="allowFallbackGlobalFunctions" value="false"/>
16 |             <property name="allowFullyQualifiedGlobalClasses" value="false"/>
17 |             <property name="allowFullyQualifiedGlobalConstants" value="false"/>
18 |             <property name="allowFullyQualifiedGlobalFunctions" value="false"/>
19 |             <property name="allowFullyQualifiedNameForCollidingClasses" value="true"/>
20 |             <property name="allowFullyQualifiedNameForCollidingConstants" value="true"/>
21 |             <property name="allowFullyQualifiedNameForCollidingFunctions" value="true"/>
22 |             <property name="searchAnnotations" value="true"/>
23 |         </properties>
24 |     </rule>
25 |     <rule ref="Generic.Files.LineLength">
26 |         <exclude-pattern>tests</exclude-pattern>
27 |     </rule>
28 |     <rule ref="PSR1.Methods.CamelCapsMethodName">
29 |         <exclude-pattern>src/Native/LLamaCPPFFI.php</exclude-pattern>
30 |     </rule>
31 | </ruleset>
32 | 


--------------------------------------------------------------------------------
/src/Context.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Kambo\LLamaCPP;
 4 | 
 5 | use Kambo\LLamaCPP\Parameters\ModelParameters;
 6 | use Kambo\LLamaCPP\Native\LLamaCPPFFI;
 7 | use FFI\CData;
 8 | 
 9 | class Context
10 | {
11 |     private CData $ctx;
12 | 
13 |     public function __construct(
14 |         private LLamaCPPFFI $ffi,
15 |         private readonly ModelParameters $modelParameters,
16 |     ) {
17 |         $lparams = $ffi->llama_context_default_params();
18 | 
19 |         $lparams->n_ctx = $modelParameters->getNCtx();
20 |         $lparams->n_parts = $modelParameters->getNParts();
21 |         $lparams->seed = $modelParameters->getSeed();
22 |         $lparams->f16_kv = $modelParameters->isF16KV();
23 |         $lparams->logits_all = $modelParameters->isLogitsAll();
24 |         $lparams->vocab_only = $modelParameters->isVocabOnly();
25 |         $lparams->use_mlock = $modelParameters->isUseMlock();
26 |         $lparams->embedding = $modelParameters->isEmbedding();
27 | 
28 |         $this->ctx = $ffi->llama_init_from_file($modelParameters->getModelPath(), $lparams);
29 |     }
30 | 
31 |     public static function createWithParameter(
32 |         ModelParameters $modelParameters,
33 |         LLamaCPPFFI $ffi = null,
34 |     ): self {
35 |         $ffi = $ffi ?? LLamaCPPFFI::getInstance();
36 | 
37 |         return new self($ffi, $modelParameters);
38 |     }
39 | 
40 |     public function __destruct()
41 |     {
42 |         $this->ffi->llama_free($this->ctx);
43 |     }
44 | 
45 |     /**
46 |      * @return CData
47 |      */
48 |     public function getCtx(): CData
49 |     {
50 |         return $this->ctx;
51 |     }
52 | 
53 |     /**
54 |      * @return ModelParameters
55 |      */
56 |     public function getModelParameters(): ModelParameters
57 |     {
58 |         return $this->modelParameters;
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/Events/TokenGeneratedEvent.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Kambo\LLamaCPP\Events;
 4 | 
 5 | use Symfony\Contracts\EventDispatcher\Event;
 6 | 
 7 | class TokenGeneratedEvent extends Event
 8 | {
 9 |     public const NAME = 'token.generated';
10 | 
11 |     public function __construct(
12 |         protected string $token,
13 |     ) {
14 |     }
15 | 
16 |     public function getToken(): string
17 |     {
18 |         return $this->token;
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/Exception/InvalidArgumentException.php:
--------------------------------------------------------------------------------
1 | <?php
2 | 
3 | namespace Kambo\LLamaCPP\Exception;
4 | 
5 | final class InvalidArgumentException extends LLamaCppException
6 | {
7 | }
8 | 


--------------------------------------------------------------------------------
/src/Exception/LLamaCppException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Kambo\LLamaCPP\Exception;
 4 | 
 5 | use Exception;
 6 | 
 7 | /**
 8 |  * Common parent for all LLamaCpp exception
 9 |  */
10 | class LLamaCppException extends Exception
11 | {
12 | }
13 | 


--------------------------------------------------------------------------------
/src/Exception/MissingLibraryException.php:
--------------------------------------------------------------------------------
1 | <?php
2 | 
3 | namespace Kambo\LLamaCPP\Exception;
4 | 
5 | class MissingLibraryException extends LLamaCppException
6 | {
7 | }
8 | 


--------------------------------------------------------------------------------
/src/Exception/NotImplementedException.php:
--------------------------------------------------------------------------------
1 | <?php
2 | 
3 | namespace Kambo\LLamaCPP\Exception;
4 | 
5 | class NotImplementedException extends LLamaCppException
6 | {
7 | }
8 | 


--------------------------------------------------------------------------------
/src/LLamaCPP.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Kambo\LLamaCPP;
  4 | 
  5 | use Kambo\LLamaCPP\Parameters\GenerationParameters;
  6 | use Symfony\Component\EventDispatcher\EventDispatcherInterface;
  7 | use Kambo\LLamaCPP\Native\LLamaCPPFFI;
  8 | use Kambo\LLamaCPP\Events\TokenGeneratedEvent;
  9 | use Kambo\LLamaCPP\Exception\InvalidArgumentException;
 10 | use Generator;
 11 | 
 12 | use function strlen;
 13 | use function iterator_to_array;
 14 | use function implode;
 15 | 
 16 | final class LLamaCPP
 17 | {
 18 |     public function __construct(
 19 |         private Context $context,
 20 |         private ?EventDispatcherInterface $eventDispatcher = null,
 21 |         private ?LLamaCPPFFI $ffi = null,
 22 |     ) {
 23 |         if ($ffi === null) {
 24 |             $this->ffi = LLamaCPPFFI::getInstance();
 25 |         }
 26 |     }
 27 | 
 28 |     public function generate(string $prompt, ?GenerationParameters $generation = null): Generator
 29 |     {
 30 |         if ($generation === null) {
 31 |             $generation = new GenerationParameters();
 32 |         }
 33 | 
 34 |         $input = $this->ffi->newArray('llama_token', strlen($prompt));
 35 | 
 36 |         $nOfTok = $this->ffi->llama_tokenize($this->context->getCtx(), $prompt, $input, strlen($prompt), true);
 37 | 
 38 |         for ($i = 0; $i < $nOfTok; $i++) {
 39 |             $this->ffi->llama_eval(
 40 |                 $this->context->getCtx(),
 41 |                 $input + $i,
 42 |                 1,
 43 |                 $i,
 44 |                 $generation->getNoOfThreads()
 45 |             );
 46 |         }
 47 | 
 48 |         $eosToken = $this->ffi->llama_token_eos();
 49 |         $desiredNumberOfTokens = $generation->getPredictLength();
 50 |         for ($i = 0; $i < $desiredNumberOfTokens; $i++) {
 51 |             $id = $this->ffi->llama_sample_top_p_top_k(
 52 |                 $this->context->getCtx(),
 53 |                 null,
 54 |                 0,
 55 |                 $generation->getTopP(),
 56 |                 $generation->getTopK(),
 57 |                 $generation->getTemperature(),
 58 |                 $generation->getRepeatPenalty(),
 59 |             );
 60 | 
 61 |             if ($id == $eosToken) {
 62 |                 break;
 63 |             }
 64 | 
 65 |             $token = $this->ffi->new('llama_token');
 66 |             $token->cdata = $id;
 67 | 
 68 |             $nOfTok++;
 69 | 
 70 |             $prediction = $this->ffi->llama_token_to_str($this->context->getCtx(), $id);
 71 | 
 72 |             $this->eventDispatcher?->dispatch(
 73 |                 new TokenGeneratedEvent($prediction),
 74 |                 TokenGeneratedEvent::NAME
 75 |             );
 76 | 
 77 |             yield $prediction;
 78 |             $this->ffi->llama_eval(
 79 |                 $this->context->getCtx(),
 80 |                 $this->ffi->addr($token),
 81 |                 1,
 82 |                 $nOfTok,
 83 |                 $generation->getNoOfThreads()
 84 |             );
 85 |         }
 86 |     }
 87 | 
 88 |     public function generateAll(string $prompt, ?GenerationParameters $generation = null): string
 89 |     {
 90 |         $tokens = iterator_to_array(
 91 |             $this->generate($prompt, $generation)
 92 |         );
 93 | 
 94 |         return implode('', $tokens);
 95 |     }
 96 | 
 97 |     public function createEmbedding(string $text, int $noOfThreads = 10): array
 98 |     {
 99 |         if (!$this->context->getModelParameters()->isEmbedding()) {
100 |             throw new InvalidArgumentException('Generation must of embedding must be turned on.');
101 |         }
102 | 
103 |         $input  = $this->ffi->newArray('llama_token', strlen($text));
104 |         $nOfTok = $this->ffi->llama_tokenize($this->context->getCtx(), $text, $input, strlen($text), true);
105 | 
106 |         for ($i = 0; $i < $nOfTok; $i++) {
107 |             $this->ffi->llama_eval($this->context->getCtx(), $input + $i, 1, $i, $noOfThreads);
108 |         }
109 | 
110 |         $nCount    = $this->ffi->llama_n_embd($this->context->getCtx());
111 |         $embedding = $this->ffi->llama_get_embeddings($this->context->getCtx());
112 | 
113 |         $embeddings = [];
114 |         for ($i = 0; $i < $nCount; $i++) {
115 |             $embeddings[] = $embedding[$i];
116 |         }
117 | 
118 |         return $embeddings;
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/src/Native/LLamaCPPFFI.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Kambo\LLamaCPP\Native;
  4 | 
  5 | use FFI;
  6 | use FFI\CData;
  7 | use FFI\CType;
  8 | 
  9 | use function file_get_contents;
 10 | 
 11 | /**
 12 |  * Wrapper for llama-ffi.h
 13 |  *
 14 |  * @package Kambo\LLamaCPP\Native
 15 |  */
 16 | class LLamaCPPFFI
 17 | {
 18 |     private static ?LLamaCPPFFI $instance = null;
 19 | 
 20 |     public function __construct(private FFI $fii)
 21 |     {
 22 |     }
 23 | 
 24 |     /**
 25 |      * @return static
 26 |      */
 27 |     public static function getInstance(): self
 28 |     {
 29 |         if (self::$instance == null) {
 30 |             self::$instance = self::create() ;
 31 |         }
 32 | 
 33 |         return self::$instance;
 34 |     }
 35 | 
 36 |     /**
 37 |      * @param ?LocateLib $locator
 38 |      *
 39 |      * @return static
 40 |      */
 41 |     public static function create(?LocateLib $locator = null): self
 42 |     {
 43 |         if ($locator === null) {
 44 |             $locator = new Locator();
 45 |         }
 46 | 
 47 |         $path = $locator->getLibraryPath();
 48 | 
 49 |         return self::createWithLibraryInPath($path);
 50 |     }
 51 | 
 52 |     /**
 53 |      * @param string $path
 54 |      *
 55 |      * @return static
 56 |      */
 57 |     public static function createWithLibraryInPath(string $path): self
 58 |     {
 59 |         $FFI = FFI::cdef(file_get_contents(__DIR__ . '/llama-ffi.h'), $path);
 60 | 
 61 |         return new self($FFI);
 62 |     }
 63 | 
 64 |     /**
 65 |      * Method that creates an arbitrary C structure.
 66 |      *
 67 |      * @param string $type
 68 |      *
 69 |      * @return CData|null
 70 |      */
 71 |     public function new(string $type): ?CData
 72 |     {
 73 |         return $this->fii->new($type);
 74 |     }
 75 | 
 76 |     /**
 77 |      * Method that creates a C array of specific type.
 78 |      *
 79 |      * @param string $type
 80 |      *
 81 |      * @return CData|null
 82 |      */
 83 |     public function newArray(string $type, int $size): ?CData
 84 |     {
 85 |         return $this->fii->new($type . '[' . $size . ']');
 86 |     }
 87 | 
 88 |     /**
 89 |      * Returns C pointer to the given C data structure. The pointer is
 90 |      * not "owned" and won't be free. Anyway, this is a potentially
 91 |      * unsafe operation, because the life-time of the returned pointer
 92 |      * may be longer than life-time of the source object, and this may
 93 |      * cause dangling pointer dereference (like in regular C).
 94 |      *
 95 |      * @param CData $ptr
 96 |      *
 97 |      * @return CData
 98 |      */
 99 |     public function addr(CData $ptr): CData
100 |     {
101 |         return FFI::addr($ptr);
102 |     }
103 | 
104 |     public function llama_context_default_params(): CData
105 |     {
106 |         return $this->fii->llama_context_default_params();
107 |     }
108 | 
109 |     /**
110 |      * Allocate (almost) all memory needed for the model.
111 |      *
112 |      * @param string $path
113 |      * @param CData  $params
114 |      *
115 |      * @return CData Return NULL on failure
116 |      */
117 |     public function llama_init_from_file(string $path, CData $params): CData
118 |     {
119 |         return $this->fii->llama_init_from_file($path, $params);
120 |     }
121 | 
122 |     /**
123 |      * Convert the provided text into tokens.
124 |      * The tokens pointer must be large enough to hold the resulting tokens.
125 |      *
126 |      * @param CData  $ctx
127 |      * @param string $text
128 |      * @param CData  $tokens
129 |      * @param int    $maxTokens
130 |      * @param bool   $addEOS
131 |      *
132 |      * @return int the number of tokens on success, no more than n_max_tokens, or -1 on error
133 |      */
134 |     public function llama_tokenize(CData $ctx, string $text, CData $tokens, int $maxTokens, bool $addEOS): int
135 |     {
136 |         return $this->fii->llama_tokenize($ctx, $text, $tokens, $maxTokens, $addEOS);
137 |     }
138 | 
139 |     /**
140 |      * Run the llama inference to obtain the logits and probabilities for the next token.
141 |      *
142 |      * @param CData $ctx
143 |      * @param CData $tokens the provided batch of new tokens to process
144 |      * @param int   $nOfTokens the provided batch of new tokens to process
145 |      * @param int   $nOfPastTokens the number of tokens to use from previous eval calls
146 |      * @param int   $nOfThreads The number of threads to use for the inference
147 |      *
148 |      * @return int
149 |      */
150 |     public function llama_eval(CData $ctx, CData $tokens, int $nOfTokens, int $nOfPastTokens, int $nOfThreads): int
151 |     {
152 |         return $this->fii->llama_eval($ctx, $tokens, $nOfTokens, $nOfPastTokens, $nOfThreads);
153 |     }
154 | 
155 |     /**
156 |      * Sample top-k and top-p from the logits.
157 |      *
158 |      * @param CData      $ctx
159 |      * @param CData|null $lastNTokens
160 |      * @param int        $lastNTokensSize
161 |      * @param float      $topP
162 |      * @param float      $topK
163 |      * @param float      $temperature
164 |      * @param float      $repeatPenalty
165 |      *
166 |      * @return int
167 |      */
168 |     public function llama_sample_top_p_top_k(
169 |         CData $ctx,
170 |         ?CData $lastNTokens,
171 |         int $lastNTokensSize,
172 |         float $topP,
173 |         float $topK,
174 |         float $temperature,
175 |         float $repeatPenalty
176 |     ): int {
177 |         return $this->fii->llama_sample_top_p_top_k(
178 |             $ctx,
179 |             $lastNTokens,
180 |             $lastNTokensSize,
181 |             $topP,
182 |             $topK,
183 |             $temperature,
184 |             $repeatPenalty
185 |         );
186 |     }
187 | 
188 |     /**
189 |      * Token Id -> String. Uses the vocabulary in the provided context
190 |      *
191 |      * @param CData $ctx
192 |      * @param int   $id
193 |      *
194 |      * @return string
195 |      */
196 |     public function llama_token_to_str(CData $ctx, int $id): string
197 |     {
198 |         return $this->fii->llama_token_to_str($ctx, $id);
199 |     }
200 | 
201 |     /**
202 |      * Frees all allocated memory
203 |      *
204 |      * @param CData $ctx
205 |      *
206 |      * @return void
207 |      */
208 |     public function llama_free(CData $ctx): void
209 |     {
210 |         $this->fii->llama_free($ctx);
211 |     }
212 | 
213 |     public function llama_token_eos()
214 |     {
215 |         return $this->fii->llama_token_eos();
216 |     }
217 | 
218 |     public function llama_n_embd(CData $ctx): int
219 |     {
220 |         return $this->fii->llama_n_embd($ctx);
221 |     }
222 | 
223 |     /**
224 |      * Get the embeddings for the input
225 |      *
226 |      * @param CData $ctx
227 |      *
228 |      * @return ?CData
229 |      */
230 |     public function llama_get_embeddings(CData $ctx): ?CData
231 |     {
232 |         return $this->fii->llama_get_embeddings($ctx);
233 |     }
234 | }
235 | 


--------------------------------------------------------------------------------
/src/Native/LocateLib.php:
--------------------------------------------------------------------------------
1 | <?php
2 | 
3 | namespace Kambo\LLamaCPP\Native;
4 | 
5 | interface LocateLib
6 | {
7 |     public function getLibraryPath(): string;
8 | }
9 | 


--------------------------------------------------------------------------------
/src/Native/Locator.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Kambo\LLamaCPP\Native;
 4 | 
 5 | use Kambo\LLamaCPP\Exception\MissingLibraryException;
 6 | use Kambo\LLamaCPP\Exception\NotImplementedException;
 7 | use Kambo\LLamaCPPLinuxLib\Info;
 8 | 
 9 | use function class_exists;
10 | 
11 | use const PHP_OS_FAMILY;
12 | 
13 | /**
14 |  * Quick and naive library locator
15 |  */
16 | final class Locator implements LocateLib
17 | {
18 |     public function getLibraryPath(): string
19 |     {
20 |         if (PHP_OS_FAMILY !== 'Linux' || PHP_OS_FAMILY !== 'Unknown') {
21 |             if (class_exists(Info::class)) {
22 |                 $info = new Info();
23 |                 return $info->getPath();
24 |             }
25 | 
26 |             throw new MissingLibraryException('Cannot find LLamaCPP library.');
27 |         }
28 | 
29 |         throw new NotImplementedException('At this moment only Linux is supported. Platform: ' . PHP_OS_FAMILY);
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/Native/llama-ffi.h:
--------------------------------------------------------------------------------
  1 | #define FFI_LIB "./libllama.so"
  2 | 
  3 | struct llama_context;
  4 | typedef int llama_token;
  5 | typedef struct llama_token_data {
  6 |     llama_token id; // token id
  7 |     float p; // probability of the token
  8 |     float plog; // log probability of the token
  9 | } llama_token_data;
 10 | typedef void (*llama_progress_callback)(float progress, void *ctx);
 11 | struct llama_context_params {
 12 |     int n_ctx; // text context
 13 |     int n_parts; // -1 for default
 14 |     int seed; // RNG seed, 0 for random
 15 |     bool f16_kv; // use fp16 for KV cache
 16 |     bool logits_all; // the llama_eval() call computes all logits, not just the last one
 17 |     bool vocab_only; // only load the vocabulary, no weights
 18 |     bool use_mlock; // force system to keep model in RAM
 19 |     bool embedding; // embedding mode only
 20 |     // called with a progress value between 0 and 1, pass NULL to disable
 21 |     llama_progress_callback progress_callback;
 22 |     // context pointer passed to the progress callback
 23 |     void * progress_callback_user_data;
 24 | };
 25 | 
 26 | struct llama_context_params llama_context_default_params();
 27 | 
 28 | // Various functions for loading a ggml llama model.
 29 | // Allocate (almost) all memory needed for the model.
 30 | // Return NULL on failure
 31 | struct llama_context * llama_init_from_file(
 32 |                          const char * path_model,
 33 |         struct llama_context_params params);
 34 | // Frees all allocated memory
 35 | void llama_free(struct llama_context * ctx);
 36 | // TODO: not great API - very likely to change
 37 | // Returns 0 on success
 38 | int llama_model_quantize(
 39 |         const char * fname_inp,
 40 |         const char * fname_out,
 41 |                int itype);
 42 | // Returns the KV cache that will contain the context for the
 43 | // ongoing prediction with the model.
 44 | const uint8_t * llama_get_kv_cache(struct llama_context * ctx);
 45 | // Returns the size of the KV cache
 46 | size_t llama_get_kv_cache_size(struct llama_context * ctx);
 47 | // Returns the number of tokens in the KV cache
 48 | int llama_get_kv_cache_token_count(struct llama_context * ctx);
 49 | // Sets the KV cache containing the current context for the model
 50 | void llama_set_kv_cache(
 51 |         struct llama_context * ctx,
 52 |                const uint8_t * kv_cache,
 53 |                       size_t n_size,
 54 |                          int n_token_count);
 55 | // Run the llama inference to obtain the logits and probabilities for the next token.
 56 | // tokens + n_tokens is the provided batch of new tokens to process
 57 | // n_past is the number of tokens to use from previous eval calls
 58 | // Returns 0 on success
 59 | int llama_eval(
 60 |         struct llama_context * ctx,
 61 |            const llama_token * tokens,
 62 |                          int n_tokens,
 63 |                          int n_past,
 64 |                          int n_threads);
 65 | // Convert the provided text into tokens.
 66 | // The tokens pointer must be large enough to hold the resulting tokens.
 67 | // Returns the number of tokens on success, no more than n_max_tokens
 68 | // Returns a negative number on failure - the number of tokens that would have been returned
 69 | // TODO: not sure if correct
 70 | int llama_tokenize(
 71 |         struct llama_context * ctx,
 72 |                   const char * text,
 73 |                  llama_token * tokens,
 74 |                          int n_max_tokens,
 75 |                         bool add_bos);
 76 | int llama_n_vocab(struct llama_context * ctx);
 77 | int llama_n_ctx (struct llama_context * ctx);
 78 | int llama_n_embd (struct llama_context * ctx);
 79 | // Token logits obtained from the last call to llama_eval()
 80 | // The logits for the last token are stored in the last row
 81 | // Can be mutated in order to change the probabilities of the next token
 82 | // Rows: n_tokens
 83 | // Cols: n_vocab
 84 | float * llama_get_logits(struct llama_context * ctx);
 85 | // Get the embeddings for the input
 86 | // shape: [n_embd] (1-dimensional)
 87 | float * llama_get_embeddings(struct llama_context * ctx);
 88 | // Token Id -> String. Uses the vocabulary in the provided context
 89 | const char * llama_token_to_str(struct llama_context * ctx, llama_token token);
 90 | // Special tokens
 91 | llama_token llama_token_bos();
 92 | llama_token llama_token_eos();
 93 | // TODO: improve the last_n_tokens interface ?
 94 | llama_token llama_sample_top_p_top_k(
 95 |    struct llama_context * ctx,
 96 |       const llama_token * last_n_tokens_data,
 97 |                     int last_n_tokens_size,
 98 |                     int top_k,
 99 |                   float top_p,
100 |                   float temp,
101 |                   float repeat_penalty);
102 | // Performance information
103 | void llama_print_timings(struct llama_context * ctx);
104 | void llama_reset_timings(struct llama_context * ctx);
105 | // Print system information
106 | const char * llama_print_system_info(void);
107 | 


--------------------------------------------------------------------------------
/src/Parameters/GenerationParameters.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Kambo\LLamaCpp\Parameters;
 4 | 
 5 | class GenerationParameters
 6 | {
 7 |     public function __construct(
 8 |         private int $predictLength = 128,
 9 |         private float $topP = 0.9,
10 |         private float $topK = 40,
11 |         private float $temperature = 0.2,
12 |         private float $repeatPenalty = 1 / 0.85,
13 |         private int $noOfThreads = 10,
14 |     ) {
15 |     }
16 | 
17 |     public function getPredictLength(): int
18 |     {
19 |         return $this->predictLength;
20 |     }
21 | 
22 |     public function getTopP(): float
23 |     {
24 |         return $this->topP;
25 |     }
26 | 
27 |     public function getTopK(): float
28 |     {
29 |         return $this->topK;
30 |     }
31 | 
32 |     public function getTemperature(): float
33 |     {
34 |         return $this->temperature;
35 |     }
36 | 
37 |     public function getRepeatPenalty(): float
38 |     {
39 |         return $this->repeatPenalty;
40 |     }
41 | 
42 |     public function getNoOfThreads(): int
43 |     {
44 |         return $this->noOfThreads;
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/Parameters/ModelParameters.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Kambo\LLamaCpp\Parameters;
  4 | 
  5 | class ModelParameters
  6 | {
  7 |     /**
  8 |      * @param string $modelPath path to used model
  9 |      * @param int    $nCtx      text context
 10 |      * @param int    $nParts    -1 for default
 11 |      * @param int    $seed      RNG seed, 0 for random
 12 |      * @param bool   $f16KV     use fp16 for KV cache
 13 |      * @param bool   $logitsAll the llama_eval() call computes all logits, not just the last one
 14 |      * @param bool   $vocabOnly only load the vocabulary, no weights
 15 |      * @param bool   $useMlock  force system to keep model in RAM
 16 |      * @param bool   $embedding embedding mode only
 17 |      */
 18 |     public function __construct(
 19 |         private string $modelPath,
 20 |         private int $nCtx = 1024,
 21 |         private int $nParts = -1,
 22 |         private int $seed = 0,
 23 |         private bool $f16KV = false,
 24 |         private bool $logitsAll = false,
 25 |         private bool $vocabOnly = false,
 26 |         private bool $useMlock = false,
 27 |         private bool $embedding = false,
 28 |     ) {
 29 |     }
 30 | 
 31 |     /**
 32 |      * @return int
 33 |      */
 34 |     public function getNCtx(): int
 35 |     {
 36 |         return $this->nCtx;
 37 |     }
 38 | 
 39 |     /**
 40 |      * @return int
 41 |      */
 42 |     public function getNParts(): int
 43 |     {
 44 |         return $this->nParts;
 45 |     }
 46 | 
 47 |     /**
 48 |      * @return int
 49 |      */
 50 |     public function getSeed(): int
 51 |     {
 52 |         return $this->seed;
 53 |     }
 54 | 
 55 |     /**
 56 |      * @return bool
 57 |      */
 58 |     public function isF16KV(): bool
 59 |     {
 60 |         return $this->f16KV;
 61 |     }
 62 | 
 63 |     /**
 64 |      * @return bool
 65 |      */
 66 |     public function isLogitsAll(): bool
 67 |     {
 68 |         return $this->logitsAll;
 69 |     }
 70 | 
 71 |     /**
 72 |      * @return bool
 73 |      */
 74 |     public function isVocabOnly(): bool
 75 |     {
 76 |         return $this->vocabOnly;
 77 |     }
 78 | 
 79 |     /**
 80 |      * @return bool
 81 |      */
 82 |     public function isUseMlock(): bool
 83 |     {
 84 |         return $this->useMlock;
 85 |     }
 86 | 
 87 |     /**
 88 |      * @return bool
 89 |      */
 90 |     public function isEmbedding(): bool
 91 |     {
 92 |         return $this->embedding;
 93 |     }
 94 | 
 95 |     public function getModelPath(): string
 96 |     {
 97 |         return $this->modelPath;
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------