├── .github └── workflows │ └── build_and_release.yml ├── .gitignore ├── .gitmodules ├── README.md ├── images ├── bmp │ ├── 128x128 │ │ └── llama_cpp.bmp │ ├── 16x16 │ │ └── llama_cpp.bmp │ ├── 24x24 │ │ └── llama_cpp.bmp │ └── 32x32 │ │ └── llama_cpp.bmp └── png │ ├── 128x128 │ └── llama_cpp.png │ ├── 16x16 │ └── llama_cpp.png │ ├── 24x24 │ └── llama_cpp.png │ └── 32x32 │ └── llama_cpp.png ├── packages ├── LlamaCpp.dpk ├── LlamaCpp.dproj ├── LlamaCppBindings.groupproj ├── dclLlamaCpp.dpk └── dclLlamaCpp.dproj ├── samples └── SimpleChatWithDownload │ ├── DownloadForm.fmx │ ├── DownloadForm.pas │ ├── Entitlement.TemplateOSX.xml │ ├── MainForm.fmx │ ├── MainForm.pas │ ├── README.md │ ├── SimpleChatWithDownload.deployproj │ ├── SimpleChatWithDownload.dpr │ ├── SimpleChatWithDownload.dproj │ └── lib │ ├── macos_arm64 │ ├── libggml-base.dylib │ ├── libggml-blas.dylib │ ├── libggml-cpu.dylib │ ├── libggml-metal.dylib │ ├── libggml-rpc.dylib │ ├── libggml.dylib │ ├── libllama.dylib │ └── libllava_shared.dylib │ └── windows_x64 │ ├── ggml-base.dll │ ├── ggml-cpu.dll │ ├── ggml-rpc.dll │ ├── ggml.dll │ ├── llama.dll │ └── llava_shared.dll ├── src ├── Api │ ├── LlamaCpp.Api.Ggml.pas │ ├── LlamaCpp.Api.Llama.pas │ ├── LlamaCpp.Api.Llava.pas │ └── LlamaCpp.Api.pas ├── CType │ ├── Ggml │ │ ├── LlamaCpp.CType.Ggml.Backend.pas │ │ ├── LlamaCpp.CType.Ggml.Cpu.pas │ │ └── LlamaCpp.CType.Ggml.pas │ ├── Llama │ │ └── LlamaCpp.CType.Llama.pas │ └── Llava │ │ └── LlamaCpp.CType.Llava.pas ├── Common │ ├── Cache │ │ ├── LlamaCpp.Common.Cache.Base.pas │ │ ├── LlamaCpp.Common.Cache.Disk.pas │ │ └── LlamaCpp.Common.Cache.Ram.pas │ ├── Chat │ │ ├── Completion │ │ │ └── LlamaCpp.Common.Chat.Completion.Collection.pas │ │ ├── Formatter │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Adapter.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Alpaca.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Baichuan.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Baichuan2.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.ChatGLM3.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Chatml.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Gemma.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Intel.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Jinja2.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Llama2.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Llama3.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.MilstralLite.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.MistralInstruct.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.OasstLlama.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.OpenBuddy.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.OpenChat.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.OpenOrca.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Phind.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Pygmalion.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Qwen.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.RedpajamaIncite.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Registration.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Saiga.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Snoozy.pas │ │ │ ├── LlamaCpp.Common.Chat.Formatter.Vicuna.pas │ │ │ └── LlamaCpp.Common.Chat.Formatter.Zephyr.pas │ │ ├── LlamaCpp.Common.Chat.Format.pas │ │ └── LlamaCpp.Common.Chat.Types.pas │ ├── Grammar │ │ └── LlamaCpp.Common.Grammar.pas │ ├── LlamaCpp.Common.Settings.pas │ ├── LlamaCpp.Common.State.pas │ ├── LlamaCpp.Common.TokenArray.pas │ ├── LlamaCpp.Common.Types.pas │ ├── Processor │ │ ├── LlamaCpp.Common.Processor.LogitsScore.pas │ │ └── LlamaCpp.Common.Processor.StoppingCriteria.pas │ ├── Sampling │ │ ├── LlamaCpp.Common.Sampling.Context.pas │ │ ├── LlamaCpp.Common.Sampling.CustomSampler.pas │ │ ├── LlamaCpp.Common.Sampling.Params.pas │ │ └── LlamaCpp.Common.Sampling.Sampler.pas │ ├── Speculative │ │ └── LlamaCpp.Common.Speculative.LookupDecoding.pas │ └── Tokenizer │ │ ├── LlamaCpp.Common.Tokenizer.Base.pas │ │ └── LlamaCpp.Common.Tokenizer.pas ├── LlamaCpp.ChatCompletion.pas ├── LlamaCpp.Completion.pas ├── LlamaCpp.Download.pas ├── LlamaCpp.Embedding.pas ├── LlamaCpp.Evaluator.pas ├── LlamaCpp.Exception.pas ├── LlamaCpp.Generator.pas ├── LlamaCpp.Helper.pas ├── LlamaCpp.Llama.pas ├── LlamaCpp.Registration.pas ├── LlamaCpp.Sampler.pas ├── LlamaCpp.Tokenization.pas ├── LlamaCpp.Types.pas └── Wrapper │ ├── LlamaCpp.Wrapper.LlamaBatch.pas │ ├── LlamaCpp.Wrapper.LlamaContext.pas │ └── LlamaCpp.Wrapper.LlamaModel.pas └── test ├── ChatFormatters.pas ├── HighLevelAPI.pas ├── LlamaCppTests.dpr ├── LlamaCppTests.dproj └── Utils.pas /.gitignore: -------------------------------------------------------------------------------- 1 | # Uncomment these types if you want even more clean repository. But be careful. 2 | # It can make harm to an existing project source. Read explanations below. 3 | # 4 | # Resource files are binaries containing manifest, project icon and version info. 5 | # They can not be viewed as text or compared by diff-tools. Consider replacing them with .rc files. 6 | #*.res 7 | # 8 | # Type library file (binary). In old Delphi versions it should be stored. 9 | # Since Delphi 2009 it is produced from .ridl file and can safely be ignored. 10 | #*.tlb 11 | # 12 | # Diagram Portfolio file. Used by the diagram editor up to Delphi 7. 13 | # Uncomment this if you are not using diagrams or use newer Delphi version. 14 | #*.ddp 15 | # 16 | # Visual LiveBindings file. Added in Delphi XE2. 17 | # Uncomment this if you are not using LiveBindings Designer. 18 | #*.vlb 19 | # 20 | # Deployment Manager configuration file for your project. Added in Delphi XE2. 21 | # Uncomment this if it is not mobile development and you do not use remote debug feature. 22 | #*.deployproj 23 | # 24 | # C++ object files produced when C/C++ Output file generation is configured. 25 | # Uncomment this if you are not using external objects (zlib library for example). 26 | #*.obj 27 | # 28 | 29 | # Default Delphi compiler directories 30 | # Content of this directories are generated with each Compile/Construct of a project. 31 | # Most of the time, files here have not there place in a code repository. 32 | #Win32/ 33 | #Win64/ 34 | #OSX64/ 35 | #OSXARM64/ 36 | #Android/ 37 | #Android64/ 38 | #iOSDevice64/ 39 | #Linux64/ 40 | 41 | # Delphi compiler-generated binaries (safe to delete) 42 | *.exe 43 | *.dll 44 | *.bpl 45 | *.bpi 46 | *.dcp 47 | *.so 48 | *.apk 49 | *.drc 50 | *.map 51 | *.dres 52 | *.rsm 53 | *.tds 54 | *.dcu 55 | *.lib 56 | *.a 57 | *.o 58 | *.ocx 59 | 60 | # Delphi autogenerated files (duplicated info) 61 | *.cfg 62 | *.hpp 63 | *Resource.rc 64 | 65 | # Delphi local files (user-specific info) 66 | *.local 67 | *.identcache 68 | *.projdata 69 | *.tvsconfig 70 | *.dsk 71 | 72 | # Delphi history and backups 73 | __history/ 74 | __recovery/ 75 | *.~* 76 | 77 | # Castalia statistics file (since XE7 Castalia is distributed with Delphi) 78 | *.stat 79 | 80 | # Boss dependency manager vendor folder https://github.com/HashLoad/boss 81 | modules/ 82 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/llama.cpp"] 2 | path = vendor/llama.cpp 3 | url = https://github.com/ggerganov/llama.cpp.git 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🐫 llama-cpp-delphi 2 | 3 | Welcome to **llama-cpp-delphi**, the Delphi bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp)! This project allows you to integrate the power of Llama-based Large Language Models (LLMs) into your Delphi applications, enabling efficient and versatile local inference. 4 | 5 | ## 🚀 Features 6 | 7 | - **Delphi Integration**: Harness Llama models directly in your Delphi projects. 8 | - **Local Inference**: No external servers or APIs required—your data stays local. 9 | - **Cross-Platform Support**: Compatible with Windows, Linux, and Mac. 10 | - 🖥️ **Mac Silicon**: GPU (MPS) and CPU inference supported. 11 | - 💻 **Windows**: CPU inference supported, with options for CUDA, Vulkan, Kompute, and OpenBLAS. 12 | - 🌏 **Linux**: CPU inference supported, with options for CUDA, Vulkan, Kompute, and OpenBLAS. 13 | - 🚀 **Android and iOS support coming soon!** 14 | - **Pre-Built Libraries**: Simplified setup with pre-compiled libraries. 15 | - **Customizable Sampling**: Fine-tune your AI’s behavior with easy-to-configure samplers. 16 | 17 | ## 🔧 Getting Started 18 | 19 | ### Prerequisites 20 | 21 | 1. **Delphi IDE** installed. 22 | 2. **Git** installed (required for cloning model repositories). 23 | 3. A basic understanding of Delphi development. 24 | 25 | ### Installation 26 | 27 | 1. Clone the **llama-cpp-delphi** repository: 28 | ```bash 29 | git clone https://github.com/Embarcadero/llama-cpp-delphi.git 30 | ``` 31 | 2. Open the project in Delphi IDE. 32 | 3. Build the project for your desired platform(s): 33 | - Windows 34 | - Linux 35 | - Mac Silicon 36 | 37 | ### Libraries 38 | 39 | The necessary **llama.cpp** libraries are distributed as part of the releases of this repository. You can find them under the "Release" section in the repository. Here's an explanation of the libraries available: 40 | 41 | #### CPU Build 42 | 43 | CPU-only builds for Windows, Linux, and macOS. Inference runs slow on CPU—consider using a GPU-based library. 44 | 45 | #### BLAS Build 46 | 47 | Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Using BLAS doesn't affect the generation performance. There are several different BLAS implementations available for build and use: 48 | 49 | - **Accelerate Framework**: Available on macOS, enabled by default. 50 | - **OpenBLAS**: Provides CPU-based BLAS acceleration. Ensure OpenBLAS is installed on your machine. 51 | - **BLIS**: A high-performance portable BLAS framework. [Learn more](https://github.com/flame/blis). 52 | - **Intel oneMKL**: Optimized for Intel processors, supporting advanced instruction sets like avx\_vnni. 53 | 54 | #### SYCL 55 | 56 | SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators. 57 | 58 | llama.cpp based on SYCL is used to **support Intel GPU** (Data Center Max series, Flex series, Arc series, Built-in GPU and iGPU). 59 | 60 | For detailed info, please refer to [[llama.cpp for SYCL](./backend/SYCL.md)](https://github.com/ggerganov/llama.cpp/blob/master/docs/backend/SYCL.md). 61 | 62 | #### Metal Build 63 | 64 | On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU. 65 | 66 | When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers 0` option in the Llama settings. 67 | 68 | #### CUDA 69 | 70 | Provides GPU acceleration using an NVIDIA GPU. [Refer to the CUDA guide](https://github.com/ggerganov/llama.cpp/blob/master/docs/cuda-fedora.md) for Fedora setup. 71 | 72 | #### Vulkan 73 | 74 | Vulkan provides GPU acceleration through a modern, low-overhead API. To use Vulkan: 75 | 76 | * Ensure Vulkan is installed and supported by your GPU drivers. 77 | 78 | Learn more at the [official Vulkan site](https://vulkan.org). 79 | 80 | #### Kompute 81 | 82 | Kompute offers efficient compute operations for GPU workloads. It's designed for AI inference tasks and works seamlessly with Vulkan. 83 | 84 | #### CANN 85 | 86 | Provides NPU acceleration using the AI cores of Ascend NPUs. [Learn more about CANN](https://www.hiascend.com/en/software/cann). 87 | 88 | #### SYCL 89 | 90 | SYCL enables GPU acceleration on Intel GPUs. Refer to the [SYCL documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/backend/SYCL.md) for setup details. 91 | 92 | #### HIP 93 | 94 | Supports GPU acceleration on AMD GPUs compatible with HIP. 95 | 96 | #### MUSA 97 | 98 | Provides GPU acceleration using the MUSA cores of Moore Threads MTT GPUs. 99 | 100 | ## 🌟 Using llama-cpp-delphi 101 | 102 | ### Key Components 103 | 104 | - **Llama**: Delphi-friendly IDE component. 105 | 106 | ### Running Samples 107 | 108 | 1. Explore the `samples` directory for available examples, like **SimpleChatWithDownload**. 109 | 2. Follow the README provided in each sample folder for detailed instructions. 110 | 111 | ## 🔧 Configuration 112 | 113 | ### Models 114 | 115 | You can use any model compatible with **llama.cpp** (e.g., GGUF format). Popular options include: 116 | - **Llama-2**: A robust and general-purpose model. 117 | - **Llama-3**: A lightweight alternative with excellent performance. 118 | - **Mistral**: A compact and efficient model. 119 | - **DeepSeek**: An innovative model designed for exploratory tasks. 120 | 121 | ### Hardware Support 122 | 123 | - **Mac Silicon**: 124 | - GPU inference (via MPS) is recommended for optimal performance. 125 | - CPU inference is available but slower. 126 | - **Windows**: 127 | - CPU inference supported, with additional support for CUDA, Vulkan, Kompute, HIP, and OpenBLAS. 128 | - **Linux**: 129 | - CPU inference supported, with additional support for CUDA, Vulkan, HIP, and MUSA. 130 | 131 | ## 🤝 Contributions 132 | 133 | We welcome contributions to improve **llama-cpp-delphi**! Feel free to: 134 | - Report issues. 135 | - Submit pull requests. 136 | - Suggest enhancements. 137 | 138 | ## 📝 License 139 | 140 | This project is licensed under the MIT License—see the `LICENSE` file for details. 141 | 142 | ## 🌟 Final Notes 143 | 144 | Get started with **llama-cpp-delphi** and bring advanced AI capabilities to your Delphi projects. If you encounter any issues or have suggestions, let us know—we’re here to help! Happy coding! 🎉 145 | 146 | -------------------------------------------------------------------------------- /images/bmp/128x128/llama_cpp.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/bmp/128x128/llama_cpp.bmp -------------------------------------------------------------------------------- /images/bmp/16x16/llama_cpp.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/bmp/16x16/llama_cpp.bmp -------------------------------------------------------------------------------- /images/bmp/24x24/llama_cpp.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/bmp/24x24/llama_cpp.bmp -------------------------------------------------------------------------------- /images/bmp/32x32/llama_cpp.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/bmp/32x32/llama_cpp.bmp -------------------------------------------------------------------------------- /images/png/128x128/llama_cpp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/png/128x128/llama_cpp.png -------------------------------------------------------------------------------- /images/png/16x16/llama_cpp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/png/16x16/llama_cpp.png -------------------------------------------------------------------------------- /images/png/24x24/llama_cpp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/png/24x24/llama_cpp.png -------------------------------------------------------------------------------- /images/png/32x32/llama_cpp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/png/32x32/llama_cpp.png -------------------------------------------------------------------------------- /packages/LlamaCpp.dpk: -------------------------------------------------------------------------------- 1 | package LlamaCpp; 2 | 3 | {$R *.res} 4 | {$IFDEF IMPLICITBUILDING This IFDEF should not be used by users} 5 | {$ALIGN 8} 6 | {$ASSERTIONS ON} 7 | {$BOOLEVAL OFF} 8 | {$DEBUGINFO OFF} 9 | {$EXTENDEDSYNTAX ON} 10 | {$IMPORTEDDATA ON} 11 | {$IOCHECKS ON} 12 | {$LOCALSYMBOLS ON} 13 | {$LONGSTRINGS ON} 14 | {$OPENSTRINGS ON} 15 | {$OPTIMIZATION OFF} 16 | {$OVERFLOWCHECKS ON} 17 | {$RANGECHECKS ON} 18 | {$REFERENCEINFO ON} 19 | {$SAFEDIVIDE OFF} 20 | {$STACKFRAMES ON} 21 | {$TYPEDADDRESS OFF} 22 | {$VARSTRINGCHECKS ON} 23 | {$WRITEABLECONST OFF} 24 | {$MINENUMSIZE 1} 25 | {$IMAGEBASE $400000} 26 | {$DEFINE DEBUG} 27 | {$ENDIF IMPLICITBUILDING} 28 | {$LIBSUFFIX AUTO} 29 | {$RUNONLY} 30 | {$IMPLICITBUILD ON} 31 | 32 | requires 33 | rtl, 34 | dbrtl, 35 | FireDAC, 36 | FireDACCommonDriver, 37 | FireDACCommon, 38 | FireDACSqliteDriver, 39 | fmxFireDAC; 40 | 41 | contains 42 | LlamaCpp.Api.Llama in '..\src\Api\LlamaCpp.Api.Llama.pas', 43 | LlamaCpp.Api in '..\src\Api\LlamaCpp.Api.pas', 44 | LlamaCpp.Api.Ggml in '..\src\Api\LlamaCpp.Api.Ggml.pas', 45 | LlamaCpp.Api.Llava in '..\src\Api\LlamaCpp.Api.Llava.pas', 46 | LlamaCpp.Wrapper.LlamaModel in '..\src\Wrapper\LlamaCpp.Wrapper.LlamaModel.pas', 47 | LlamaCpp.CType.Ggml.Backend in '..\src\CType\Ggml\LlamaCpp.CType.Ggml.Backend.pas', 48 | LlamaCpp.CType.Ggml.Cpu in '..\src\CType\Ggml\LlamaCpp.CType.Ggml.Cpu.pas', 49 | LlamaCpp.CType.Ggml in '..\src\CType\Ggml\LlamaCpp.CType.Ggml.pas', 50 | LlamaCpp.CType.Llama in '..\src\CType\Llama\LlamaCpp.CType.Llama.pas', 51 | LlamaCpp.CType.Llava in '..\src\CType\Llava\LlamaCpp.CType.Llava.pas', 52 | LlamaCpp.Wrapper.LlamaContext in '..\src\Wrapper\LlamaCpp.Wrapper.LlamaContext.pas', 53 | LlamaCpp.Wrapper.LlamaBatch in '..\src\Wrapper\LlamaCpp.Wrapper.LlamaBatch.pas', 54 | LlamaCpp.Common.Sampling.Params in '..\src\Common\Sampling\LlamaCpp.Common.Sampling.Params.pas', 55 | LlamaCpp.Common.Sampling.Context in '..\src\Common\Sampling\LlamaCpp.Common.Sampling.Context.pas', 56 | LlamaCpp.Helper in '..\src\LlamaCpp.Helper.pas', 57 | LlamaCpp.Common.Sampling.CustomSampler in '..\src\Common\Sampling\LlamaCpp.Common.Sampling.CustomSampler.pas', 58 | LlamaCpp.Common.Sampling.Sampler in '..\src\Common\Sampling\LlamaCpp.Common.Sampling.Sampler.pas', 59 | LlamaCpp.Common.Tokenizer.Base in '..\src\Common\Tokenizer\LlamaCpp.Common.Tokenizer.Base.pas', 60 | LlamaCpp.Common.Tokenizer in '..\src\Common\Tokenizer\LlamaCpp.Common.Tokenizer.pas', 61 | LlamaCpp.Common.Cache.Base in '..\src\Common\Cache\LlamaCpp.Common.Cache.Base.pas', 62 | LlamaCpp.Common.State in '..\src\Common\LlamaCpp.Common.State.pas', 63 | LlamaCpp.Common.Cache.Ram in '..\src\Common\Cache\LlamaCpp.Common.Cache.Ram.pas', 64 | LlamaCpp.Common.Cache.Disk in '..\src\Common\Cache\LlamaCpp.Common.Cache.Disk.pas', 65 | LlamaCpp.Llama in '..\src\LlamaCpp.Llama.pas', 66 | LlamaCpp.Common.TokenArray in '..\src\Common\LlamaCpp.Common.TokenArray.pas', 67 | LlamaCpp.Common.Chat.Format in '..\src\Common\Chat\LlamaCpp.Common.Chat.Format.pas', 68 | LlamaCpp.Common.Chat.Types in '..\src\Common\Chat\LlamaCpp.Common.Chat.Types.pas', 69 | LlamaCpp.Common.Types in '..\src\Common\LlamaCpp.Common.Types.pas', 70 | LlamaCpp.Common.Grammar in '..\src\Common\Grammar\LlamaCpp.Common.Grammar.pas', 71 | LlamaCpp.Common.Processor.LogitsScore in '..\src\Common\Processor\LlamaCpp.Common.Processor.LogitsScore.pas', 72 | LlamaCpp.Common.Processor.StoppingCriteria in '..\src\Common\Processor\LlamaCpp.Common.Processor.StoppingCriteria.pas', 73 | LlamaCpp.Common.Speculative.LookupDecoding in '..\src\Common\Speculative\LlamaCpp.Common.Speculative.LookupDecoding.pas', 74 | LlamaCpp.Common.Chat.Completion.Collection in '..\src\Common\Chat\Completion\LlamaCpp.Common.Chat.Completion.Collection.pas', 75 | LlamaCpp.Common.Chat.Formatter.Adapter in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Adapter.pas', 76 | LlamaCpp.Common.Settings in '..\src\Common\LlamaCpp.Common.Settings.pas', 77 | LlamaCpp.Common.Chat.Formatter.Llama2 in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Llama2.pas', 78 | LlamaCpp.Common.Chat.Formatter.Registration in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Registration.pas', 79 | LlamaCpp.Tokenization in '..\src\LlamaCpp.Tokenization.pas', 80 | LlamaCpp.Evaluator in '..\src\LlamaCpp.Evaluator.pas', 81 | LlamaCpp.Sampler in '..\src\LlamaCpp.Sampler.pas', 82 | LlamaCpp.Types in '..\src\LlamaCpp.Types.pas', 83 | LlamaCpp.Generator in '..\src\LlamaCpp.Generator.pas', 84 | LlamaCpp.Embedding in '..\src\LlamaCpp.Embedding.pas', 85 | LlamaCpp.Completion in '..\src\LlamaCpp.Completion.pas', 86 | LlamaCpp.ChatCompletion in '..\src\LlamaCpp.ChatCompletion.pas', 87 | LlamaCpp.Exception in '..\src\LlamaCpp.Exception.pas', 88 | LlamaCpp.Common.Chat.Formatter.Llama3 in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Llama3.pas', 89 | LlamaCpp.Common.Chat.Formatter.Alpaca in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Alpaca.pas', 90 | LlamaCpp.Common.Chat.Formatter.Qwen in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Qwen.pas', 91 | LlamaCpp.Common.Chat.Formatter.Vicuna in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Vicuna.pas', 92 | LlamaCpp.Common.Chat.Formatter.OasstLlama in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.OasstLlama.pas', 93 | LlamaCpp.Common.Chat.Formatter.Baichuan2 in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Baichuan2.pas', 94 | LlamaCpp.Common.Chat.Formatter.Baichuan in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Baichuan.pas', 95 | LlamaCpp.Common.Chat.Formatter.OpenBuddy in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.OpenBuddy.pas', 96 | LlamaCpp.Common.Chat.Formatter.RedpajamaIncite in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.RedpajamaIncite.pas', 97 | LlamaCpp.Common.Chat.Formatter.Snoozy in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Snoozy.pas', 98 | LlamaCpp.Common.Chat.Formatter.Phind in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Phind.pas', 99 | LlamaCpp.Common.Chat.Formatter.Intel in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Intel.pas', 100 | LlamaCpp.Common.Chat.Formatter.OpenOrca in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.OpenOrca.pas', 101 | LlamaCpp.Common.Chat.Formatter.MilstralLite in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.MilstralLite.pas', 102 | LlamaCpp.Common.Chat.Formatter.Zephyr in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Zephyr.pas', 103 | LlamaCpp.Common.Chat.Formatter.Pygmalion in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Pygmalion.pas', 104 | LlamaCpp.Common.Chat.Formatter.Chatml in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Chatml.pas', 105 | LlamaCpp.Common.Chat.Formatter.MistralInstruct in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.MistralInstruct.pas', 106 | LlamaCpp.Common.Chat.Formatter.ChatGLM3 in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.ChatGLM3.pas', 107 | LlamaCpp.Common.Chat.Formatter.OpenChat in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.OpenChat.pas', 108 | LlamaCpp.Common.Chat.Formatter.Saiga in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Saiga.pas', 109 | LlamaCpp.Common.Chat.Formatter.Gemma in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Gemma.pas', 110 | LlamaCpp.Download in '..\src\LlamaCpp.Download.pas', 111 | LlamaCpp.Common.Chat.Formatter.Jinja2 in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Jinja2.pas'; 112 | 113 | end. 114 | 115 | 116 | -------------------------------------------------------------------------------- /packages/LlamaCppBindings.groupproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | {4393AAEC-1CA8-4DFA-82FE-C3984AE5D33A} 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | Default.Personality.12 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /packages/dclLlamaCpp.dpk: -------------------------------------------------------------------------------- 1 | package dclLlamaCpp; 2 | 3 | {$R *.res} 4 | {$R *.dres} 5 | {$IFDEF IMPLICITBUILDING This IFDEF should not be used by users} 6 | {$ALIGN 8} 7 | {$ASSERTIONS ON} 8 | {$BOOLEVAL OFF} 9 | {$DEBUGINFO OFF} 10 | {$EXTENDEDSYNTAX ON} 11 | {$IMPORTEDDATA ON} 12 | {$IOCHECKS ON} 13 | {$LOCALSYMBOLS ON} 14 | {$LONGSTRINGS ON} 15 | {$OPENSTRINGS ON} 16 | {$OPTIMIZATION OFF} 17 | {$OVERFLOWCHECKS ON} 18 | {$RANGECHECKS ON} 19 | {$REFERENCEINFO ON} 20 | {$SAFEDIVIDE OFF} 21 | {$STACKFRAMES ON} 22 | {$TYPEDADDRESS OFF} 23 | {$VARSTRINGCHECKS ON} 24 | {$WRITEABLECONST OFF} 25 | {$MINENUMSIZE 1} 26 | {$IMAGEBASE $400000} 27 | {$DEFINE DEBUG} 28 | {$ENDIF IMPLICITBUILDING} 29 | {$DESCRIPTION 'Delphi bindings for Llama.cpp'} 30 | {$LIBSUFFIX AUTO} 31 | {$DESIGNONLY} 32 | {$IMPLICITBUILD ON} 33 | 34 | requires 35 | rtl, 36 | llamacpp; 37 | 38 | contains 39 | LlamaCpp.Registration in '..\src\LlamaCpp.Registration.pas'; 40 | 41 | end. 42 | -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/DownloadForm.fmx: -------------------------------------------------------------------------------- 1 | object FormDownload: TFormDownload 2 | Left = 0 3 | Top = 0 4 | Caption = 'Clone and Load Model' 5 | ClientHeight = 280 6 | ClientWidth = 400 7 | Position = MainFormCenter 8 | Constraints.MaxHeight = 280.000000000000000000 9 | Constraints.MaxWidth = 400.000000000000000000 10 | Constraints.MinHeight = 280.000000000000000000 11 | Constraints.MinWidth = 400.000000000000000000 12 | FormFactor.Width = 320 13 | FormFactor.Height = 480 14 | FormFactor.Devices = [Desktop] 15 | DesignerMasterStyle = 0 16 | object memoDownload: TMemo 17 | Touch.InteractiveGestures = [Pan, LongTap, DoubleTap] 18 | DataDetectorTypes = [] 19 | ReadOnly = True 20 | Align = Client 21 | Size.Width = 400.000000000000000000 22 | Size.Height = 280.000000000000000000 23 | Size.PlatformDefault = False 24 | TabOrder = 0 25 | Viewport.Width = 400.000000000000000000 26 | Viewport.Height = 280.000000000000000000 27 | end 28 | object LlamaDownload1: TLlamaDownload 29 | OnWriteData = LlamaDownload1WriteData 30 | Left = 184 31 | Top = 120 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/DownloadForm.pas: -------------------------------------------------------------------------------- 1 | unit DownloadForm; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, System.Types, System.UITypes, System.Classes, System.Variants, 7 | FMX.Types, FMX.Controls, FMX.Forms, FMX.Graphics, FMX.Dialogs, FMX.Memo.Types, 8 | FMX.Controls.Presentation, FMX.ScrollBox, FMX.Memo, LlamaCpp.Llama, LlamaCpp.Download; 9 | 10 | type 11 | TFormDownload = class(TForm) 12 | memoDownload: TMemo; 13 | LlamaDownload1: TLlamaDownload; 14 | procedure LlamaDownload1WriteData(Sender: TObject; const AText: string); 15 | 16 | procedure Download(const ALlama: TLlama; const ATask: TFunc); 17 | public 18 | // HF Auth 19 | procedure HFAuth(const AUserName, AToken: string); 20 | 21 | procedure DownloadAndPrepareLlama2(const ALlama: TLlama); 22 | procedure DownloadAndPrepareLlama3(const ALlama: TLlama); 23 | procedure DownloadAndPrepareMistralLite(const ALlama: TLlama); 24 | procedure DownloadAndPrepareTinyLlama(const ALlama: TLlama); 25 | end; 26 | 27 | var 28 | FormDownload: TFormDownload; 29 | 30 | implementation 31 | 32 | uses 33 | System.Threading; 34 | 35 | {$R *.fmx} 36 | 37 | { TFormDownload } 38 | 39 | procedure TFormDownload.Download(const ALlama: TLlama; 40 | const ATask: TFunc); 41 | begin 42 | memoDownload.Lines.Add( 43 | 'Checking your local copy. It may take a while...' 44 | + sLineBreak + sLineBreak); 45 | 46 | TTask.Run(procedure() begin 47 | try 48 | ALlama.ModelPath := ATask; 49 | 50 | TThread.Queue(nil, procedure() begin 51 | memoDownload.Lines.Add('Loading...'); 52 | end); 53 | 54 | ALlama.Init(); 55 | 56 | TThread.Queue(nil, procedure() begin 57 | memoDownload.Lines.Add(String.Empty); 58 | memoDownload.Lines.Add('All done!'); 59 | end); 60 | 61 | TThread.ForceQueue(nil, procedure() begin 62 | Self.Close(); 63 | end, 500); 64 | except 65 | on E: Exception do 66 | Application.ShowException(E); 67 | end; 68 | end); 69 | 70 | Self.ShowModal(); 71 | end; 72 | 73 | procedure TFormDownload.DownloadAndPrepareLlama2(const ALlama: TLlama); 74 | begin 75 | ALlama.Settings.ChatFormat := 'llama-2'; 76 | 77 | Download(ALlama, function(): string begin 78 | Result := LlamaDownload1.DownloadLlama2_Chat_7B()[0]; 79 | end); 80 | end; 81 | 82 | procedure TFormDownload.DownloadAndPrepareLlama3(const ALlama: TLlama); 83 | begin 84 | ALlama.Settings.ChatFormat := 'llama-3'; 85 | 86 | Download(ALlama, function(): string begin 87 | Result := LlamaDownload1.DownloadLlama3_Chat_30B()[0]; 88 | end); 89 | end; 90 | 91 | procedure TFormDownload.DownloadAndPrepareMistralLite( 92 | const ALlama: TLlama); 93 | begin 94 | ALlama.Settings.ChatFormat := 'mistrallite'; 95 | 96 | Download(ALlama, function(): string begin 97 | Result := LlamaDownload1.DownloadMistrallite_7B()[0]; 98 | end); 99 | end; 100 | 101 | procedure TFormDownload.LlamaDownload1WriteData(Sender: TObject; 102 | const AText: string); 103 | begin 104 | TThread.Queue(nil, procedure() begin 105 | MemoDownload.Lines.Text := MemoDownload.Lines.Text + AText; 106 | end); 107 | end; 108 | 109 | procedure TFormDownload.DownloadAndPrepareTinyLlama(const ALlama: TLlama); 110 | begin 111 | ALlama.Settings.ChatFormat := 'zephyr'; 112 | 113 | Download(ALlama, function(): string begin 114 | Result := LlamaDownload1.DownloadTinyLlama_1_1B()[0]; 115 | end); 116 | end; 117 | 118 | procedure TFormDownload.HFAuth(const AUserName, AToken: string); 119 | begin 120 | LLamaDownload1.HuggingFace.UserName := AUserName; 121 | LLamaDownload1.HuggingFace.Token := AToken; 122 | end; 123 | 124 | end. 125 | -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/Entitlement.TemplateOSX.xml: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | <%appSandboxKeys%> 6 | 7 | 8 | -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/README.md: -------------------------------------------------------------------------------- 1 | # 🚀 SimpleChatWithDownload 2 | 3 | Experience the power of local inference! This app runs a Large Language Model (LLM) entirely on your machine, meaning no internet or external API calls are needed for predictions. By leveraging GPU (on Mac) or CPU (on Windows) for computation, you get a secure and self-contained AI experience tailored to your hardware setup. 🎉 4 | 5 | **SimpleChatWithDownload** is an exciting sample project from the **llama-cpp-delphi** bindings. This app provides a streamlined way to interact with a local LLM (Large Language Model) in a sleek chat interface, featuring automatic model downloads. Whether you’re using Mac Silicon for blazing-fast GPU inference or Windows for a **SLOW** CPU inference, this sample is a great way to get started! 🎉 6 | 7 | 8 | 9 | https://github.com/user-attachments/assets/16582374-4c12-43bd-aff8-6c4ad4f41339 10 | 11 | 12 | 13 | ## 🌟 Features 14 | 15 | - **Interactive Chat Window**: Start chatting with your local LLM in seconds! 16 | - **Automatic Model Downloads**: Download models like **Llama-2**, **Llama-3**, and **Mistral Lite** effortlessly. 🚀 17 | - Models are cloned via Git and downloaded to your system’s default download folder. 18 | - **Platform Support**: 19 | - 🖥️ **Mac Silicon**: GPU (MPS) and CPU inference supported. 20 | - 💻 **Windows**: CPU inference only. Feel free to extend it and test CUDA. 21 | - ⚡ GPU inference is recommended for Mac to avoid slower CPU performance. 22 | - **Pre-Bundled Llama.cpp Libraries**: No extra setup! All required libraries are included in the `lib` folder for easy deployment. 23 | - **Customizable Settings**: 24 | - Choose your model. 25 | - Switch between GPU and CPU inference on Mac. 26 | - Enable/disable seed settings to control response variability. 27 | 28 | ## 🛠️ Getting Started 29 | 30 | ### Note 31 | 32 | You must have Git installed on your machine to clone model repositories. 33 | 34 | ### Prerequisites 35 | 36 | 1. Ensure you have the **llama-cpp-delphi** project ready. If not, grab it from the repository. 37 | 2. A **Delphi IDE** installation. 38 | 3. For Mac deployment, make sure **PAServer** is running on your Mac. 39 | 40 | ### Steps to Run 41 | 42 | 1. **Build llama-cpp-delphi**: 43 | - Open the llama-cpp-delphi project in Delphi IDE. 44 | - Build it for **Windows** and **Mac Silicon**. 45 | 46 | 2. **Open and Build the Sample**: 47 | - Open the `SimpleChatWithDownload` sample in Delphi IDE. 48 | - Build it for your target platform: 49 | - **Mac Silicon**: Recommended for GPU inference. 50 | - **Windows**: CPU inference only. 51 | 52 | 3. **Deploy to Mac**: 53 | - Connect to your Mac using **PAServer**. 54 | - Deploy the app to your Mac. 🎉 55 | 56 | 4. **Run the App**: 57 | - The app will launch with a "Settings" menu where you can: 58 | - Select your model (Llama-2, Llama-3, Mistral Lite). 59 | - Choose GPU or CPU inference (Mac only). 60 | - Enable/disable seed randomness. 61 | 62 | ### Download and Use Models 63 | 64 | - Click the **hamburger menu** to start downloading the selected model. 65 | - Supported Models: 66 | - **Llama-2**: ~4 GB (7B.Q4_K_M). 67 | - **Llama-3**: ~5 GB (30B.Q4_K_M). 68 | - **Mistral Lite**: ~7 GB (7B.Q4_K_M). 69 | - 🔧 You can also use any GGUF-compatible models with Llama.cpp. 70 | - 💡 Feel free to test **DeepSeek** locally for additional insights and functionality! 71 | 72 | - After the model download is complete, the chat window will activate. 73 | 74 | ## 💡 Usage Tips 75 | 76 | - **Start Chatting**: 77 | - Type your message in the chat box and press **Enter** or click the **Play** button. 78 | - Use the **Stop** button to pause responses. 79 | 80 | - **Customize Inference**: 81 | - Mac users: Switch between GPU (fast) and CPU (fallback) modes via the "Settings" menu. 82 | - Windows users: For better performance, explore CUDA builds in the llama-cpp-delphi "Release" section. 💪 83 | 84 | - **Seed Option**: 85 | - Prevent repetitive responses for the same questions by enabling the seed setting. 86 | 87 | ## 📁 Libraries 88 | 89 | All required libraries are bundled in the `lib` folder of the sample’s root directory: 90 | - **Mac**: Deployment is pre-configured. Deploy via PAServer, and you’re good to go! 91 | - **Windows**: The app automatically loads libraries from the `lib` folder. 92 | 93 | For additional builds (e.g., CUDA versions), visit the llama-cpp-delphi "Release" section. 94 | 95 | ## 🌟 Final Notes 96 | 97 | Enjoy chatting with cutting-edge LLMs in your own app! If you run into any issues or have feedback, feel free to contribute or reach out. Happy coding! 🚀 98 | 99 | -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/SimpleChatWithDownload.dpr: -------------------------------------------------------------------------------- 1 | program SimpleChatWithDownload; 2 | 3 | uses 4 | System.StartUpCopy, 5 | FMX.Forms, 6 | MainForm in 'MainForm.pas' {FormMain}, 7 | DownloadForm in 'DownloadForm.pas' {FormDownload}; 8 | 9 | {$R *.res} 10 | 11 | begin 12 | Application.Initialize; 13 | Application.CreateForm(TFormMain, FormMain); 14 | Application.Run; 15 | end. 16 | 17 | -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/macos_arm64/libggml-base.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-base.dylib -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/macos_arm64/libggml-blas.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-blas.dylib -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/macos_arm64/libggml-cpu.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-cpu.dylib -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/macos_arm64/libggml-metal.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-metal.dylib -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/macos_arm64/libggml-rpc.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-rpc.dylib -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/macos_arm64/libggml.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml.dylib -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/macos_arm64/libllama.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libllama.dylib -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/macos_arm64/libllava_shared.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libllava_shared.dylib -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/windows_x64/ggml-base.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/ggml-base.dll -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/windows_x64/ggml-cpu.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/ggml-cpu.dll -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/windows_x64/ggml-rpc.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/ggml-rpc.dll -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/windows_x64/ggml.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/ggml.dll -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/windows_x64/llama.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/llama.dll -------------------------------------------------------------------------------- /samples/SimpleChatWithDownload/lib/windows_x64/llava_shared.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/llava_shared.dll -------------------------------------------------------------------------------- /src/Api/LlamaCpp.Api.Ggml.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Api.Ggml; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | LlamaCpp.Api; 8 | 9 | type 10 | TGgmlApiAccess = class(TLlamaCppLibraryLoader) 11 | protected 12 | procedure DoLoadLibrary(const ALibAddr: THandle); override; 13 | end; 14 | 15 | TGgmlApi = class(TGgmlApiAccess) 16 | private 17 | class var FInstance: TGgmlApi; 18 | public 19 | class constructor Create(); 20 | class destructor Destroy(); 21 | 22 | class property Instance: TGgmlApi read FInstance; 23 | end; 24 | 25 | implementation 26 | 27 | { TGgmlApiAccess } 28 | 29 | procedure TGgmlApiAccess.DoLoadLibrary(const ALibAddr: THandle); 30 | begin 31 | inherited; 32 | // 33 | end; 34 | 35 | { TGgmlApi } 36 | 37 | class constructor TGgmlApi.Create; 38 | begin 39 | FInstance := TGgmlApi.Create(); 40 | end; 41 | 42 | class destructor TGgmlApi.Destroy; 43 | begin 44 | FInstance.Free(); 45 | end; 46 | 47 | end. 48 | -------------------------------------------------------------------------------- /src/Api/LlamaCpp.Api.Llava.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Api.Llava; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | LlamaCpp.Api, 8 | LlamaCpp.CType.Llava, 9 | LlamaCpp.CType.Llama; 10 | 11 | type 12 | TLlavaApiAccess = class(TLlamaCppLibraryLoader) 13 | public type 14 | TLLavaValidateEmbedSize = function(const ALlamaContext: PLlamaContext; 15 | const AClipContext: PClipCtx): Boolean; cdecl; 16 | TLLavaImageEmbedMakeWithClipImg = function(AClipContext: PClipCtx; 17 | AThreadCount: Integer; const AImage: PClipImageU8; 18 | var AImageEmbedOut: PSingle; var AImagePosOut: Integer): Boolean; cdecl; 19 | TLLavaImageEmbedMakeWithBytes = function(AClipContext: PClipCtx; 20 | AThreadCount: Integer; const AImageBytes: PByte; 21 | AImageBytesLength: Integer): PLlavaImageEmbed; cdecl; 22 | TLLavaImageEmbedMakeWithFilename = function(AClipContext: PClipCtx; 23 | AThreadCount: Integer; const AImagePath: PAnsiChar) 24 | : PLlavaImageEmbed; cdecl; 25 | TLLavaImageEmbedFree = procedure(AImageEmbed: PLlavaImageEmbed); cdecl; 26 | TLLavaEvalImageEmbed = function(ALlamaContext: PLlamaContext; 27 | const AImageEmbed: PLlavaImageEmbed; ABatchSize: Integer; 28 | var APastPos: Integer): Boolean; cdecl; 29 | TClipModelLoad = function(const AFileName: PAnsiChar; AVerbosity: Integer) 30 | : PClipCtx; cdecl; 31 | TClipFree = procedure(AClipContext: PClipCtx); cdecl; 32 | protected 33 | procedure DoLoadLibrary(const ALibAddr: THandle); override; 34 | public 35 | llava_validate_embed_size: TLLavaValidateEmbedSize; 36 | llava_image_embed_make_with_clip_img: TLLavaImageEmbedMakeWithClipImg; 37 | llava_image_embed_make_with_bytes: TLLavaImageEmbedMakeWithBytes; 38 | llava_image_embed_make_with_filename: TLLavaImageEmbedMakeWithFilename; 39 | llava_image_embed_free: TLLavaImageEmbedFree; 40 | llava_eval_image_embed: TLLavaEvalImageEmbed; 41 | clip_model_load: TClipModelLoad; 42 | clip_free: TClipFree; 43 | end; 44 | 45 | TLlavaApi = class(TLlavaApiAccess) 46 | private 47 | class var FInstance: TLlavaApi; 48 | public 49 | class constructor Create(); 50 | class destructor Destroy(); 51 | 52 | class property Instance: TLlavaApi read FInstance; 53 | end; 54 | 55 | implementation 56 | 57 | { TLlavaApiAccess } 58 | 59 | procedure TLlavaApiAccess.DoLoadLibrary(const ALibAddr: THandle); 60 | begin 61 | inherited; 62 | @llava_validate_embed_size := GetProcAddress(ALibAddr, 63 | 'llava_validate_embed_size'); 64 | @llava_image_embed_make_with_clip_img := 65 | GetProcAddress(ALibAddr, 'llava_image_embed_make_with_clip_img'); 66 | @llava_image_embed_make_with_bytes := GetProcAddress(ALibAddr, 67 | 'llava_image_embed_make_with_bytes'); 68 | @llava_image_embed_make_with_filename := 69 | GetProcAddress(ALibAddr, 'llava_image_embed_make_with_filename'); 70 | @llava_image_embed_free := GetProcAddress(ALibAddr, 'llava_image_embed_free'); 71 | @llava_eval_image_embed := GetProcAddress(ALibAddr, 'llava_eval_image_embed'); 72 | @clip_model_load := GetProcAddress(ALibAddr, 'clip_model_load'); 73 | @clip_free := GetProcAddress(ALibAddr, 'clip_free'); 74 | end; 75 | 76 | { TLlavaApi } 77 | 78 | class constructor TLlavaApi.Create; 79 | begin 80 | FInstance := TLlavaAPI.Create(); 81 | end; 82 | 83 | class destructor TLlavaApi.Destroy; 84 | begin 85 | FInstance.Free(); 86 | end; 87 | 88 | end. 89 | -------------------------------------------------------------------------------- /src/Api/LlamaCpp.Api.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Api; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils 7 | {$IFDEF MSWINDOWS} 8 | , Winapi.Windows 9 | {$ENDIF MSWINDOWS}; 10 | 11 | type 12 | TLlamaCppLibraryLoader = class 13 | strict private 14 | FLibAddr: THandle; 15 | protected 16 | function GetProcAddress(const AHandle: THandle; 17 | const AProcName: string): pointer; 18 | procedure DoLoadLibrary(const ALibAddr: THandle); virtual; abstract; 19 | public 20 | procedure Load(const ALibraryPath: string); 21 | procedure Unload(); 22 | end; 23 | 24 | TLlamaCppApis = class 25 | public 26 | class procedure LoadAll(ALibDir: string = ''); 27 | class procedure UnloadAll(); 28 | end; 29 | 30 | implementation 31 | 32 | uses 33 | System.IOUtils, 34 | LlamaCpp.Api.Ggml, 35 | LlamaCpp.Api.Llava, 36 | LlamaCpp.Api.Llama; 37 | 38 | { TLlamaCppLibraryLoader } 39 | 40 | procedure TLlamaCppLibraryLoader.Load(const ALibraryPath: string); 41 | begin 42 | if not TFile.Exists(ALibraryPath) then 43 | raise Exception.CreateFmt('Library "%s" not found.', [ALibraryPath]); 44 | 45 | {$IFDEF MSWINDOWS} 46 | FLibAddr := Winapi.Windows.LoadLibrary(PWideChar(WideString(ALibraryPath))); 47 | {$ELSE} 48 | FLibAddr := System.SysUtils.LoadLibrary(PWideChar(WideString(ALibraryPath))); 49 | {$ENDIF MSWINDOWS} 50 | if FLibAddr = 0 then 51 | raise Exception.CreateFmt('Unable to load llama library. %s', [SysErrorMessage(GetLastError)]); 52 | DoLoadLibrary(FLibAddr); 53 | end; 54 | 55 | procedure TLlamaCppLibraryLoader.Unload; 56 | begin 57 | {$IFDEF MSWINDOWS} 58 | Winapi.Windows.FreeLibrary(FLibAddr); 59 | {$ELSE} 60 | System.SysUtils.FreeLibrary(FLibAddr); 61 | {$ENDIF MSWINDOWS} 62 | end; 63 | 64 | function TLlamaCppLibraryLoader.GetProcAddress(const AHandle: THandle; 65 | const AProcName: string): pointer; 66 | begin 67 | {$IFDEF MSWINDOWS} 68 | Result := Winapi.Windows.GetProcAddress(AHandle, 69 | PWideChar(WideString(AProcName))); 70 | {$ELSE} 71 | Result := System.SysUtils.GetProcAddress(AHandle, 72 | PWideChar(WideString(AProcName))); 73 | {$ENDIF MSWINDOWS} 74 | end; 75 | 76 | { TLlamaCppApis } 77 | 78 | class procedure TLlamaCppApis.LoadAll(ALibDir: string); 79 | const 80 | {$IFDEF MSWINDOWS} 81 | LIB_LLAMA = 'llama.dll'; 82 | LIB_GGML = 'ggml.dll'; 83 | LIB_LAVA = 'llava_shared.dll'; 84 | {$ELSEIF DEFINED(OSX64)} 85 | LIB_LLAMA = 'libllama.dylib'; 86 | LIB_GGML = 'libggml.dylib'; 87 | LIB_LAVA = 'libllava_shared.dylib'; 88 | {$ELSE} 89 | LIB_LLAMA = 'libllama.so'; 90 | LIB_GGML = 'libggml.so'; 91 | LIB_LAVA = 'libllava_shared.so'; 92 | {$ENDIF MSWINDOWS} 93 | begin 94 | if ALibDir.IsEmpty() then 95 | ALibDir := TPath.Combine( 96 | PWideChar(WideString(TPath.GetDirectoryName(ParamStr(0)))), 97 | 'llamacpp'); 98 | {$IFDEF MSWINDOWS} 99 | SetDllDirectory(PWideChar(WideString(ALibDir))); 100 | {$ENDIF MSWINDOWS} 101 | TLlamaApi.Instance.Load(TPath.Combine(ALibDir, LIB_LLAMA)); 102 | TGgmlApi.Instance.Load(TPath.Combine(ALibDir, LIB_GGML)); 103 | TLlavaApi.Instance.Load(TPath.Combine(ALibDir, LIB_LAVA)); 104 | end; 105 | 106 | class procedure TLlamaCppApis.UnloadAll; 107 | begin 108 | TLlamaApi.Instance.Unload(); 109 | TLlavaApi.Instance.Unload(); 110 | TGgmlApi.Instance.Unload(); 111 | end; 112 | 113 | end. 114 | -------------------------------------------------------------------------------- /src/CType/Ggml/LlamaCpp.CType.Ggml.Backend.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.CType.Ggml.Backend; 2 | 3 | interface 4 | 5 | type 6 | // Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback) 7 | // when ask == true, the scheduler wants to know if the user wants to observe this node 8 | // this allows the scheduler to batch nodes together in order to evaluate them in a single call 9 | // 10 | // when ask == false, the scheduler is passing the node tensor to the user for observation 11 | // if the user returns false, the scheduler will cancel the graph compute 12 | // 13 | // typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data); 14 | TGgmlBackendSchedEvalCallback = function(const AGgmlTensor: pointer; 15 | const AAsk: boolean; const AUserData: pointer): boolean; cdecl; 16 | 17 | implementation 18 | 19 | end. 20 | -------------------------------------------------------------------------------- /src/CType/Ggml/LlamaCpp.CType.Ggml.Cpu.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.CType.Ggml.Cpu; 2 | 3 | interface 4 | 5 | type 6 | {$MINENUMSIZE 4} 7 | TGGMLNumaStrategy = ( 8 | GGML_NUMA_STRATEGY_DISABLED = 0, 9 | GGML_NUMA_STRATEGY_DISTRIBUTE = 1, 10 | GGML_NUMA_STRATEGY_ISOLATE = 2, 11 | GGML_NUMA_STRATEGY_NUMACTL = 3, 12 | GGML_NUMA_STRATEGY_MIRROR = 4, 13 | GGML_NUMA_STRATEGY_COUNT 14 | ); 15 | {$MINENUMSIZE 1} 16 | 17 | implementation 18 | 19 | end. 20 | -------------------------------------------------------------------------------- /src/CType/Ggml/LlamaCpp.CType.Ggml.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.CType.Ggml; 2 | 3 | interface 4 | 5 | type 6 | TGgml = class 7 | public const 8 | GGML_ROPE_TYPE_NEOX = 2; 9 | end; 10 | 11 | {$MINENUMSIZE 4} 12 | TGGMLType = (GGML_TYPE_F32 = 0, GGML_TYPE_F16 = 1, GGML_TYPE_Q4_0 = 2, 13 | GGML_TYPE_Q4_1 = 3, 14 | // GGML_TYPE_Q4_2 = 4, support has been removed 15 | // GGML_TYPE_Q4_3 = 5, support has been removed 16 | GGML_TYPE_Q5_0 = 6, GGML_TYPE_Q5_1 = 7, GGML_TYPE_Q8_0 = 8, 17 | GGML_TYPE_Q8_1 = 9, GGML_TYPE_Q2_K = 10, GGML_TYPE_Q3_K = 11, 18 | GGML_TYPE_Q4_K = 12, GGML_TYPE_Q5_K = 13, GGML_TYPE_Q6_K = 14, 19 | GGML_TYPE_Q8_K = 15, GGML_TYPE_IQ2_XXS = 16, GGML_TYPE_IQ2_XS = 17, 20 | GGML_TYPE_IQ3_XXS = 18, GGML_TYPE_IQ1_S = 19, GGML_TYPE_IQ4_NL = 20, 21 | GGML_TYPE_IQ3_S = 21, GGML_TYPE_IQ2_S = 22, GGML_TYPE_IQ4_XS = 23, 22 | GGML_TYPE_I8 = 24, GGML_TYPE_I16 = 25, GGML_TYPE_I32 = 26, 23 | GGML_TYPE_I64 = 27, GGML_TYPE_F64 = 28, GGML_TYPE_IQ1_M = 29, 24 | GGML_TYPE_BF16 = 30, GGML_TYPE_Q4_0_4_4 = 31, GGML_TYPE_Q4_0_4_8 = 32, 25 | GGML_TYPE_Q4_0_8_8 = 33, GGML_TYPE_TQ1_0 = 34, GGML_TYPE_TQ2_0 = 35, 26 | GGML_TYPE_COUNT = Integer(36) // Number of types (excluding the commented ones) 27 | ); 28 | 29 | TGgmlLogLevel = (GGML_LOG_LEVEL_NONE = 0, GGML_LOG_LEVEL_DEBUG = 1, 30 | GGML_LOG_LEVEL_INFO = 2, GGML_LOG_LEVEL_WARN = 3, GGML_LOG_LEVEL_ERROR = 4, 31 | GGML_LOG_LEVEL_CONT = 5); 32 | {$MINENUMSIZE 1} 33 | 34 | // Abort callback 35 | // If not NULL, called before ggml computation 36 | // If it returns true, the computation is aborted 37 | // typedef bool (*ggml_abort_callback)(void * data); 38 | TGgmlAbortCallback = function(const AData: pointer): boolean; cdecl; 39 | 40 | PGgmlTensor = ^TGgmlTensor; 41 | TGgmlTensor = NativeUInt; 42 | 43 | TGGMLLogCallback = procedure(level: TGgmlLogLevel; const text: PAnsiChar; 44 | user_data: pointer); cdecl; 45 | 46 | implementation 47 | 48 | end. 49 | -------------------------------------------------------------------------------- /src/CType/Llava/LlamaCpp.CType.Llava.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.CType.Llava; 2 | 3 | interface 4 | 5 | type 6 | PLlavaImageEmbed = ^TLLavaImageEmbed; 7 | TLlavaImageEmbed = record 8 | embed: PSingle; // Pointer to a float array (Single type in Delphi) 9 | n_image_pos: Int32; 10 | end; 11 | 12 | // The struct clip_ctx is an opaque type, so we represent it as a pointer in Delphi. 13 | PClipCtx = ^TClipCtx; 14 | TClipCtx = NativeUInt; 15 | 16 | PClipImageU8 = ^TClipImageU8; 17 | TClipImageU8 = record 18 | nx: Integer; 19 | ny: Integer; 20 | buf: TArray; // This is the equivalent of std::vector 21 | end; 22 | 23 | implementation 24 | 25 | end. 26 | -------------------------------------------------------------------------------- /src/Common/Cache/LlamaCpp.Common.Cache.Base.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Cache.Base; 2 | 3 | interface 4 | 5 | uses 6 | LlamaCpp.Common.Types, 7 | LlamaCpp.Common.State; 8 | 9 | type 10 | TBaseLlamaCache = class(TInterfacedObject, ILlamaCache) 11 | public 12 | CapacityBytes: Int64; 13 | protected 14 | function GetCacheSize: Int64; virtual; abstract; 15 | function FindLongestPrefixKey(const AKey: TArray): TArray; virtual; abstract; 16 | function GetItem(const AKey: TArray): TLlamaState; virtual; abstract; 17 | function Contains(const AKey: TArray): Boolean; virtual; abstract; 18 | procedure SetItem(const AKey: TArray; const AValue: TLlamaState); virtual; abstract; 19 | protected 20 | function LongestTokenPrefix(const A, B: TArray): integer; 21 | public 22 | constructor Create(ACapacityBytes: Int64); 23 | end; 24 | 25 | implementation 26 | 27 | uses 28 | System.Math; 29 | 30 | { TBaseLlamaCache } 31 | 32 | constructor TBaseLlamaCache.Create(ACapacityBytes: Int64); 33 | begin 34 | inherited Create; 35 | CapacityBytes := ACapacityBytes; 36 | end; 37 | 38 | function TBaseLlamaCache.LongestTokenPrefix(const A, 39 | B: TArray): integer; 40 | var 41 | I: Integer; 42 | begin 43 | Result := 0; 44 | for I := 0 to Min(Length(A), Length(B)) - 1 do 45 | if A[I] = B[I] then 46 | Inc(Result) 47 | else 48 | Break; 49 | end; 50 | 51 | end. 52 | -------------------------------------------------------------------------------- /src/Common/Cache/LlamaCpp.Common.Cache.Disk.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Cache.Disk; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Classes, 8 | System.Threading, 9 | System.Generics.Collections, 10 | System.IOUtils, 11 | FireDAC.Comp.Client, 12 | FireDAC.Phys.SQLite, 13 | LlamaCpp.Common.Types, 14 | LlamaCpp.Common.State, 15 | LlamaCpp.Common.Cache.Base; 16 | 17 | type 18 | TLlamaDiskCache = class(TBaseLlamaCache) 19 | private const 20 | {$IFDEF MSWINDOWS} 21 | DEFAULT_CACHE_DIR = '.\cache\llama_cache'; 22 | {$ELSE} 23 | DEFAULT_CACHE_DIR = './cache/llama_cache'; 24 | {$ENDIF} 25 | private 26 | FCacheFileName: string; 27 | FConnection: TFDConnection; 28 | FDatS: TFDQuery; 29 | FTask: ITask; 30 | private 31 | procedure CreateCacheConnectionDefs(); 32 | procedure CreateCacheTable(); 33 | function Load(const AKey: TArray): TLlamaState; 34 | procedure Save(const AKey: TArray; const AState: TLlamaState); 35 | procedure Delete(const AKey: TArray); 36 | public 37 | constructor Create(const ACacheDir: string = DEFAULT_CACHE_DIR; 38 | ACapacityBytes: Int64 = Int64(2) shl 30); 39 | destructor Destroy; override; 40 | 41 | function GetCacheSize: Int64; override; 42 | function FindLongestPrefixKey(const AKey: TArray): TArray; override; 43 | function GetItem(const AKey: TArray): TLlamaState; override; 44 | function Contains(const AKey: TArray): Boolean; override; 45 | procedure SetItem(const AKey: TArray; const AValue: TLlamaState); override; 46 | end; 47 | 48 | implementation 49 | 50 | uses 51 | FireDAC.Stan.Intf, FireDAC.Stan.Option, 52 | FireDAC.Stan.Error, FireDAC.UI.Intf, FireDAC.Phys.Intf, FireDAC.Stan.Def, 53 | FireDAC.Stan.Pool, FireDAC.Stan.Async, FireDAC.Phys, 54 | FireDAC.Stan.ExprFuncs, FireDAC.Phys.SQLiteWrapper, FireDAC.Phys.SQLiteWrapper.Stat, 55 | FireDAC.Phys.SQLiteDef, FireDAC.Stan.Param, FireDAC.DatS, FireDAC.DApt.Intf, 56 | FireDAC.DApt, Data.DB, FireDAC.Comp.DataSet, 57 | {$IFDEF MSWINDOWS} 58 | Windows 59 | {$ELSE} 60 | Posix.Unistd 61 | {$ENDIF} 62 | ; 63 | 64 | type 65 | TCachePair = TPair, TLlamaState>; 66 | TCachePairs = TArray; 67 | 68 | { TLlamaDiskCache } 69 | 70 | constructor TLlamaDiskCache.Create(const ACacheDir: string; ACapacityBytes: Int64); 71 | var 72 | LStr: string; 73 | begin 74 | inherited Create(ACapacityBytes); 75 | 76 | if TDirectory.Exists(ACacheDir) then 77 | begin 78 | for LStr in TDirectory.GetFiles(ACacheDir, '*', TSearchOption.soAllDirectories) do 79 | try 80 | TFile.Delete(LStr); // Delete files not in use 81 | except 82 | // 83 | end; 84 | 85 | for LStr in TDirectory.GetDirectories(ACacheDir) do 86 | try 87 | TDirectory.Delete(LStr, true); // Delete files not in use 88 | except 89 | // 90 | end; 91 | end; 92 | 93 | {$IFDEF MSWINDOWS} 94 | FCacheFileName := TPath.Combine( 95 | TPath.GetFullPath(ACacheDir), 96 | GetCurrentProcessId().ToString()); 97 | {$ELSE} 98 | FCacheFileName := TPath.Combine( 99 | TPath.GetFullPath(ACacheDir), 100 | GetPID().ToString()); 101 | {$ENDIF} 102 | 103 | FCacheFileName := TPath.Combine( 104 | FCacheFileName, 105 | TThread.CurrentThread.ThreadID.ToString()); 106 | 107 | FCacheFileName := TPath.Combine(FCacheFileName, 'cache.db'); 108 | 109 | if not TDirectory.Exists(TPath.GetDirectoryName(FCacheFileName)) then 110 | TDirectory.CreateDirectory(TPath.GetDirectoryName(FCacheFileName)); 111 | 112 | FConnection := TFDConnection.Create(nil); 113 | FDatS := TFDQuery.Create(FConnection); 114 | FDatS.Connection := FConnection; 115 | 116 | CreateCacheConnectionDefs(); 117 | CreateCacheTable(); 118 | end; 119 | 120 | destructor TLlamaDiskCache.Destroy; 121 | begin 122 | if Assigned(FTask) then 123 | FTask.Wait(); 124 | 125 | FConnection.Free(); 126 | inherited; 127 | end; 128 | 129 | procedure TLlamaDiskCache.CreateCacheConnectionDefs; 130 | begin 131 | FConnection.Params.Values['database'] := FCacheFileName; 132 | FConnection.LoginPrompt := False; 133 | FConnection.DriverName := 'SQLite'; 134 | FConnection.Connected:= True; 135 | end; 136 | 137 | procedure TLlamaDiskCache.CreateCacheTable; 138 | begin 139 | FDatS.SQL.Text := ''' 140 | CREATE TABLE IF NOT EXISTS CACHE( 141 | ID INTEGER PRIMARY KEY AUTOINCREMENT, 142 | KEY BLOB, 143 | DATA BLOB 144 | ); 145 | '''; 146 | FDatS.ExecSQL; 147 | end; 148 | 149 | function TLlamaDiskCache.Load(const AKey: TArray): TLlamaState; 150 | var 151 | LStream: TMemoryStream; 152 | begin 153 | FDatS.SQL.Text := 'SELECT KEY, DATA FROM CACHE WHERE KEY = :KEY'; 154 | 155 | LStream := TMemoryStream.Create(); 156 | try 157 | LStream.WriteBuffer(AKey[0], Length(AKey) * SizeOf(Integer)); 158 | LStream.Position := 0; 159 | FDatS.ParamByName('KEY').LoadFromStream(LStream, TFieldType.ftBlob); 160 | LStream.Clear(); 161 | 162 | FDatS.Open(); 163 | 164 | if FDatS.IsEmpty() then 165 | Exit(nil); 166 | 167 | try 168 | LStream.Size := 0; 169 | TBlobField(FDatS.FieldByName('DATA')).SaveToStream(LStream); 170 | 171 | Result := TLlamaState.Create(); 172 | try 173 | LStream.Position := 0; 174 | Result.Deserialize(LStream); 175 | except 176 | on E: Exception do 177 | begin 178 | FreeAndNil(Result); 179 | raise; 180 | end; 181 | end; 182 | finally 183 | FDatS.Close(); 184 | end; 185 | 186 | finally 187 | LStream.Free; 188 | end; 189 | end; 190 | 191 | procedure TLlamaDiskCache.Save(const AKey: TArray; 192 | const AState: TLlamaState); 193 | var 194 | LStream: TMemoryStream; 195 | begin 196 | Delete(AKey); 197 | 198 | LStream := TMemoryStream.Create(); 199 | try 200 | FDatS.SQL.Text := 'INSERT INTO CACHE (KEY, DATA) VALUES (:KEY, :DATA)'; 201 | 202 | LStream.WriteBuffer(AKey[0], Length(AKey) * SizeOf(Integer)); 203 | LStream.Position := 0; 204 | FDatS.ParamByName('KEY').LoadFromStream(LStream, TFieldType.ftBlob); 205 | 206 | LStream.Clear(); 207 | LStream.Size := 0; 208 | 209 | AState.Serialize(LStream); 210 | LStream.Position := 0; 211 | FDatS.ParamByName('DATA').LoadFromStream(LStream, TFieldType.ftBlob); 212 | LStream.Clear(); 213 | 214 | FDatS.ExecSQL(); 215 | FConnection.Commit(); 216 | finally 217 | LStream.Free; 218 | end; 219 | end; 220 | 221 | procedure TLlamaDiskCache.Delete(const AKey: TArray); 222 | var 223 | LStream: TMemoryStream; 224 | begin 225 | FDatS.SQL.Text := 'DELETE FROM CACHE WHERE KEY = :KEY'; 226 | 227 | LStream := TMemoryStream.Create(); 228 | try 229 | LStream.WriteBuffer(AKey[0], Length(AKey) * SizeOf(Integer)); 230 | LStream.Position := 0; 231 | FDatS.ParamByName('KEY').LoadFromStream(LStream, TFieldType.ftBlob); 232 | LStream.Clear(); 233 | 234 | FDatS.ExecSQL(); 235 | FConnection.Commit(); 236 | finally 237 | LStream.Free; 238 | end; 239 | end; 240 | 241 | function TLlamaDiskCache.GetCacheSize: Int64; 242 | const 243 | SQL_SIZES = 'SELECT SUM(LENGTH(KEY)) + SUM(LENGTH(DATA)) FROM CACHE;'; 244 | begin 245 | FDatS.Open(SQL_SIZES); 246 | try 247 | Result := FDatS.Fields[0].Value; 248 | finally 249 | FDatS.Close(); 250 | end; 251 | end; 252 | 253 | function TLlamaDiskCache.FindLongestPrefixKey(const AKey: TArray): TArray; 254 | var 255 | LPrefixLen: Integer; 256 | LMaxPrefixLen: Integer; 257 | LKey: TArray; 258 | LStream: TMemoryStream; 259 | begin 260 | LMaxPrefixLen := 0; 261 | 262 | FDatS.Open('SELECT KEY, DATA FROM CACHE'); 263 | 264 | if FDatS.IsEmpty() then 265 | Exit(nil); 266 | 267 | FDatS.First(); 268 | 269 | LStream := TMemoryStream.Create(); 270 | try 271 | while not FDatS.Eof do 272 | begin 273 | LStream.Clear(); 274 | 275 | TBlobField(FDatS.FieldByName('KEY')).SaveToStream(LStream); 276 | 277 | LStream.Position := 0; 278 | SetLength(LKey, LStream.Size div SizeOf(Integer)); 279 | LStream.ReadBuffer(LKey[0], Length(LKey) * SizeOf(Integer)); 280 | 281 | LPrefixLen := LongestTokenPrefix(LKey, AKey); 282 | if LPrefixLen > LMaxPrefixLen then 283 | begin 284 | LMaxPrefixLen := LPrefixLen; 285 | Result := LKey; 286 | end; 287 | 288 | FDatS.Next(); 289 | end; 290 | finally 291 | LStream.Free; 292 | end; 293 | 294 | FDatS.Close(); 295 | end; 296 | 297 | function TLlamaDiskCache.Contains(const AKey: TArray): Boolean; 298 | begin 299 | if Assigned(FTask) then 300 | FTask.Wait(); 301 | 302 | Result := Assigned(FindLongestPrefixKey(AKey)); 303 | end; 304 | 305 | function TLlamaDiskCache.GetItem(const AKey: TArray): TLlamaState; 306 | var 307 | LFoundKey: TArray; 308 | begin 309 | if Assigned(FTask) then 310 | FTask.Wait(); 311 | 312 | LFoundKey := FindLongestPrefixKey(AKey); 313 | 314 | if not Assigned(LFoundKey) then 315 | raise Exception.Create('Key not found'); 316 | 317 | Result := Load(LFoundKey); 318 | 319 | Delete(LFoundKey); 320 | end; 321 | 322 | procedure TLlamaDiskCache.SetItem(const AKey: TArray; 323 | const AValue: TLlamaState); 324 | var 325 | LValue: TLlamaState; 326 | begin 327 | LValue := AValue.Clone(); 328 | FTask := TTask.Run(procedure() begin 329 | try 330 | Save(AKey, LValue); 331 | finally 332 | LValue.Free(); 333 | end; 334 | 335 | while (GetCacheSize() > CapacityBytes) do 336 | FDatS.ExecSQL('DELETE FROM CACHE WHERE ID = (SELECT MIN(ID) FROM CACHE);'); 337 | 338 | FConnection.Commit(); 339 | end); 340 | end; 341 | 342 | end. 343 | -------------------------------------------------------------------------------- /src/Common/Cache/LlamaCpp.Common.Cache.Ram.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Cache.Ram; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Cache.Base, 9 | LlamaCpp.Common.Types, 10 | LlamaCpp.Common.State; 11 | 12 | type 13 | TLlamaRAMCache = class(TBaseLlamaCache) 14 | private 15 | const DEFAULT_CAPACITY = {$IFDEF WIN32}1_073_741_824{$ELSE}Int64(2) shl 30{$ENDIF WIN32}; 16 | private 17 | FCache: TOrderedDictionary, TLlamaState>; 18 | public 19 | constructor Create(ACapacityBytes: NativeInt = DEFAULT_CAPACITY); 20 | destructor Destroy; override; 21 | 22 | function GetCacheSize: Int64; override; 23 | function FindLongestPrefixKey(const AKey: TArray): TArray; override; 24 | function GetItem(const AKey: TArray): TLlamaState; override; 25 | function Contains(const AKey: TArray): Boolean; override; 26 | procedure SetItem(const AKey: TArray; const AValue: TLlamaState); override; 27 | end; 28 | 29 | implementation 30 | 31 | { TLlamaRAMCache } 32 | 33 | constructor TLlamaRAMCache.Create(ACapacityBytes: NativeInt); 34 | begin 35 | inherited Create(ACapacityBytes); 36 | FCache := TOrderedDictionary, TLlamaState>.Create(); 37 | end; 38 | 39 | destructor TLlamaRAMCache.Destroy; 40 | var 41 | I: Integer; 42 | begin 43 | for I := 0 to FCache.Values.Count - 1 do 44 | FCache.ValueList[I].Free(); 45 | 46 | FCache.Free; 47 | inherited; 48 | end; 49 | 50 | function TLlamaRAMCache.GetCacheSize: Int64; 51 | var 52 | I: Integer; 53 | begin 54 | Result := 0; 55 | for I := 0 to FCache.Count - 1 do 56 | Result := Result + FCache.ValueList[I].GetSize(); 57 | end; 58 | 59 | function TLlamaRAMCache.FindLongestPrefixKey(const AKey: TArray): TArray; 60 | var 61 | LPrefixLen: Integer; 62 | LMaxPrefixLen: Integer; 63 | LCachedItem: TPair, TLlamaState>; 64 | begin 65 | LMaxPrefixLen := 0; 66 | 67 | for LCachedItem in FCache do 68 | begin 69 | LPrefixLen := LongestTokenPrefix(LCachedItem.Key, AKey); 70 | if LPrefixLen > LMaxPrefixLen then 71 | begin 72 | LMaxPrefixLen := LPrefixLen; 73 | Result := LCachedItem.Key; 74 | end; 75 | end; 76 | end; 77 | 78 | function TLlamaRAMCache.Contains(const AKey: TArray): Boolean; 79 | begin 80 | Result := Assigned(FindLongestPrefixKey(AKey)); 81 | end; 82 | 83 | function TLlamaRAMCache.GetItem(const AKey: TArray): TLlamaState; 84 | var 85 | LFoundKey: TArray; 86 | begin 87 | LFoundKey := FindLongestPrefixKey(AKey); 88 | 89 | if not Assigned(LFoundKey) then 90 | raise Exception.Create('Key not found'); 91 | 92 | Result := FCache[LFoundKey]; 93 | 94 | FCache.Remove(LFoundKey); 95 | end; 96 | 97 | procedure TLlamaRAMCache.SetItem(const AKey: TArray; 98 | const AValue: TLlamaState); 99 | begin 100 | FCache.AddOrSetValue(AKey, AValue.Clone()); 101 | 102 | while (GetCacheSize() > CapacityBytes) do 103 | begin 104 | FCache.ValueList[0].Free(); 105 | FCache.Remove(FCache.KeyList[0]); 106 | end; 107 | end; 108 | 109 | end. 110 | -------------------------------------------------------------------------------- /src/Common/Chat/Completion/LlamaCpp.Common.Chat.Completion.Collection.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Completion.Collection; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types; 9 | 10 | type 11 | TLlamaChatCompletionCollection = class 12 | private 13 | class var FInstance: TLlamaChatCompletionCollection; 14 | private 15 | FChatCompletionHandlers: TDictionary; 16 | private 17 | class constructor Create(); 18 | class destructor Destroy(); 19 | public 20 | constructor Create(); 21 | destructor Destroy(); override; 22 | 23 | procedure RegisterChatCompletionHandler(const AName: string; 24 | const AChatHandler: ILlamaChatCompletionHandler; 25 | const AOverwrite: boolean = false); 26 | procedure UnregisterChatHandler(const AName: string); 27 | 28 | function GetChatCompletionHandler(const AName: string) 29 | : ILlamaChatCompletionHandler; 30 | 31 | class property Instance: TLlamaChatCompletionCollection read FInstance; 32 | end; 33 | 34 | implementation 35 | 36 | uses 37 | LlamaCpp.Common.Chat.Formatter.Registration; 38 | 39 | { TLlamaChatCompletionCollection } 40 | 41 | class constructor TLlamaChatCompletionCollection.Create; 42 | begin 43 | FInstance := TLlamaChatCompletionCollection.Create(); 44 | TChatFormatterRegistration.RegisterAll(); 45 | end; 46 | 47 | class destructor TLlamaChatCompletionCollection.Destroy; 48 | begin 49 | TChatFormatterRegistration.UnregisterAll(); 50 | FInstance.Free(); 51 | end; 52 | 53 | constructor TLlamaChatCompletionCollection.Create; 54 | begin 55 | FChatCompletionHandlers := TDictionary.Create(); 56 | end; 57 | 58 | destructor TLlamaChatCompletionCollection.Destroy; 59 | begin 60 | FChatCompletionHandlers.Free(); 61 | inherited; 62 | end; 63 | 64 | procedure TLlamaChatCompletionCollection.RegisterChatCompletionHandler( 65 | const AName: string; const AChatHandler: ILlamaChatCompletionHandler; 66 | const AOverwrite: boolean); 67 | begin 68 | if not AOverwrite and FChatCompletionHandlers.ContainsKey(AName) then 69 | raise Exception.CreateFmt( 70 | 'Formatter with name "%s" already registered. Use "AOverwrite=true" to overwrite it.', [ 71 | AName]); 72 | 73 | FChatCompletionHandlers.AddOrSetValue(AName, AChatHandler); 74 | end; 75 | 76 | procedure TLlamaChatCompletionCollection.UnregisterChatHandler( 77 | const AName: string); 78 | begin 79 | if not FChatCompletionHandlers.ContainsKey(AName) then 80 | raise Exception.CreateFmt( 81 | 'No formatter registered under the name "%s".', [AName]); 82 | 83 | FChatCompletionHandlers.Remove(AName); 84 | end; 85 | 86 | function TLlamaChatCompletionCollection.GetChatCompletionHandler( 87 | const AName: string): ILlamaChatCompletionHandler; 88 | begin 89 | FChatCompletionHandlers.TryGetValue(AName, Result); 90 | end; 91 | 92 | end. 93 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Alpaca.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Alpaca; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TAlpacaChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TAlpacaChatFormatter } 23 | 24 | function TAlpacaChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSeparator: string; 29 | LSeparator2: string; 30 | LSystemMessage: string; 31 | LMessages: TArray>; 32 | LPrompt: string; 33 | begin 34 | LRoles := TDictionary.Create(); 35 | try 36 | LRoles.Add('user', '### Instruction'); 37 | LRoles.Add('assistant', '### Response'); 38 | 39 | LSeparator := sLineBreak + sLineBreak; 40 | LSeparator2 := ''; 41 | 42 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 44 | LPrompt := TLlamaChatFormat.FormatAddColonTwo( 45 | LSystemMessage, LMessages, LSeparator, LSeparator2); 46 | 47 | Result := TChatFormatterResponse.Create(LPrompt); 48 | finally 49 | LRoles.Free(); 50 | end; 51 | end; 52 | 53 | end. 54 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Baichuan.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Baichuan; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TBaichuanChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TBaichuanChatFormatter } 23 | 24 | function TBaichuanChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LSeparator: string; 30 | LMessages: TArray>; 31 | LPrompt: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', ''); 36 | LRoles.Add('assistant', ''); 37 | 38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 39 | LSeparator := ''; 40 | 41 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 42 | LMessages := LMessages + [ 43 | TPair.Create(LRoles['assistant'], '')]; 44 | 45 | LPrompt := TLlamaChatFormat.FormatNoColonSingle( 46 | LSystemMessage, LMessages, LSeparator); 47 | 48 | Result := TChatFormatterResponse.Create(LPrompt); 49 | finally 50 | LRoles.Free(); 51 | end; 52 | end; 53 | 54 | end. 55 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Baichuan2.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Baichuan2; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TBaichuan2ChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TBaichuan2ChatFormatter } 23 | 24 | function TBaichuan2ChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LSeparator: string; 30 | LMessages: TArray>; 31 | LPrompt: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', ''); 36 | LRoles.Add('assistant', ''); 37 | 38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 39 | LSeparator := ''; 40 | 41 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 42 | LMessages := LMessages + [ 43 | TPair.Create(LRoles['assistant'], '')]; 44 | 45 | LPrompt := TLlamaChatFormat.FormatNoColonSingle( 46 | LSystemMessage, LMessages, LSeparator); 47 | 48 | Result := TChatFormatterResponse.Create(LPrompt); 49 | finally 50 | LRoles.Free(); 51 | end; 52 | end; 53 | 54 | end. 55 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.ChatGLM3.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.ChatGLM3; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TChatGLM3ChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TChatGLM3ChatFormatter } 23 | 24 | function TChatGLM3ChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LMessages: TArray>; 30 | LPrompt: string; 31 | LSeparator: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', '<|user|>'); 36 | LRoles.Add('assistant', '<|assistant|>'); 37 | 38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 39 | LSystemMessage := String.Format('<|system|>'#13#10'%s', [LSystemMessage]); 40 | 41 | LSeparator := ''; 42 | 43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 44 | LMessages := LMessages + [ 45 | TPair.Create(LRoles['assistant'], '')]; 46 | finally 47 | LRoles.Free(); 48 | end; 49 | 50 | LPrompt := TLlamaChatFormat.FormatChatGML3( 51 | LSystemMessage, LMessages); 52 | 53 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]); 54 | end; 55 | 56 | end. 57 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Chatml.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Chatml; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TChatmlChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TChatmlChatFormatter } 23 | 24 | function TChatmlChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LMessages: TArray>; 30 | LPrompt: string; 31 | LSeparator: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', '<|im_start|>user'); 36 | LRoles.Add('assistant', '<|im_start|>assistant'); 37 | 38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 39 | LSystemMessage := String.Format('<|im_start|>system'#13#10'%s', [LSystemMessage]); 40 | 41 | LSeparator := '<|im_end|>'; 42 | 43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 44 | LMessages := LMessages + [ 45 | TPair.Create(LRoles['assistant'], '')]; 46 | finally 47 | LRoles.Free(); 48 | end; 49 | 50 | LPrompt := TLlamaChatFormat.FormatChatml( 51 | LSystemMessage, LMessages, LSeparator); 52 | 53 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]); 54 | end; 55 | 56 | end. 57 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Gemma.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Gemma; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TGemmaChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TGemmaChatFormatter } 23 | 24 | function TGemmaChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LMessages: TArray>; 30 | LPrompt: string; 31 | LSeparator: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', 'user' + sLineBreak); 36 | LRoles.Add('assistant', 'model' + sLineBreak); 37 | 38 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 39 | LMessages := LMessages + [ 40 | TPair.Create(LRoles['assistant'], '')]; 41 | finally 42 | LRoles.Free(); 43 | end; 44 | 45 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 46 | 47 | LSeparator := '' + sLineBreak; 48 | 49 | LPrompt := TLlamaChatFormat.FormatNoColonSingle( 50 | LSystemMessage, LMessages, LSeparator); 51 | 52 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]); 53 | end; 54 | 55 | end. 56 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Intel.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Intel; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TIntelChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TIntelChatFormatter } 23 | 24 | function TIntelChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LSeparator: string; 30 | LMessages: TArray>; 31 | LPrompt: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', '### User:'); 36 | LRoles.Add('assistant', '### Assistant:'); 37 | 38 | LSystemMessage := '### System:'#13#10''; 39 | 40 | LSeparator := sLineBreak; 41 | 42 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 43 | LMessages := LMessages + [ 44 | TPair.Create(LRoles['assistant'], '')]; 45 | finally 46 | LRoles.Free(); 47 | end; 48 | 49 | LPrompt := TLlamaChatFormat.FormatAddColonSingle( 50 | LSystemMessage, LMessages, LSeparator); 51 | 52 | Result := TChatFormatterResponse.Create(LPrompt); 53 | end; 54 | 55 | end. 56 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Jinja2.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Jinja2; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TJinja2ChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | public 19 | constructor Create( 20 | const ATemplate: string; 21 | const AEOSToken: string; 22 | const ABOSToken: string; 23 | const AAddGenerationPrompt: boolean = true; 24 | const AStopTokenIds: TArray = nil); 25 | 26 | function ToChatHandler(): ILlamaChatCompletionHandler; 27 | end; 28 | 29 | implementation 30 | 31 | uses 32 | LlamaCpp.Common.Chat.Formatter.Adapter; 33 | 34 | { TJinja2ChatFormatter } 35 | 36 | constructor TJinja2ChatFormatter.Create(const ATemplate, AEOSToken, 37 | ABOSToken: string; const AAddGenerationPrompt: boolean; 38 | const AStopTokenIds: TArray); 39 | begin 40 | // 41 | end; 42 | 43 | function TJinja2ChatFormatter.Format( 44 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 45 | begin 46 | // Working in a Jinja2 parser... 47 | raise ENotImplemented.Create( 48 | 'Please, set the "ChatFormat" option in your settings.'); 49 | end; 50 | 51 | function TJinja2ChatFormatter.ToChatHandler: ILlamaChatCompletionHandler; 52 | begin 53 | Result := TChatFormaterAdapter.ToChatCompletionHandler(Self); 54 | end; 55 | 56 | end. 57 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Llama2.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Llama2; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TLlama2ChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function FormatLlama2(const ASystemMessage: string; 17 | const AMessages: TArray>; 18 | const ASep1, ASep2: string): string; 19 | private 20 | function Format(const ASettings: TLlamaChatCompletionSettings) 21 | : TChatFormatterResponse; 22 | end; 23 | 24 | implementation 25 | 26 | { TLlama2ChatFormatter } 27 | 28 | function TLlama2ChatFormatter.FormatLlama2(const ASystemMessage: string; 29 | const AMessages: TArray>; const ASep1, 30 | ASep2: string): string; 31 | var 32 | I: Integer; 33 | LSeps: TArray; 34 | begin 35 | LSeps := [ASep1, ASep2]; 36 | Result := ASystemMessage + ASep1; 37 | 38 | for I := Low(AMessages) to High(AMessages) do 39 | begin 40 | if not ASystemMessage.IsEmpty() and (I = 0) then 41 | Result := Result + AMessages[I].Value + LSeps[I mod 2] 42 | else if not AMessages[I].Value.IsEmpty() then 43 | Result := Result + AMessages[I].Key + AMessages[I].Value + ' ' + LSeps[I mod 2] 44 | else 45 | Result := Result + AMessages[I].Key + ' '; 46 | end; 47 | end; 48 | 49 | function TLlama2ChatFormatter.Format( 50 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 51 | var 52 | LSystemTemplate: string; 53 | LRoles: TDictionary; 54 | LMessages: TArray>; 55 | LSystemMessage: string; 56 | LPrompt: string; 57 | begin 58 | LSystemTemplate := '[INST] <>'#13#10'%s'#13#10'<>'; 59 | 60 | LRoles := TDictionary.Create(); 61 | try 62 | LRoles.Add('user', '[INST]'); 63 | LRoles.Add('assistant', '[/INST]'); 64 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 65 | finally 66 | LRoles.Free(); 67 | end; 68 | 69 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 70 | 71 | if not LSystemMessage.IsEmpty() then 72 | LSystemMessage := String.Format(LSystemTemplate, [LSystemMessage]); 73 | 74 | LPrompt := FormatLlama2(LSystemMessage, LMessages, ' ', '') + '[/INST]'; 75 | 76 | Result := TChatFormatterResponse.Create(LPrompt); 77 | end; 78 | 79 | end. 80 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Llama3.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Llama3; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TLlama3ChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TLlama3ChatFormatter } 23 | 24 | function TLlama3ChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSeparator: string; 29 | LMessages: TArray>; 30 | LPrompt: string; 31 | begin 32 | LRoles := TDictionary.Create(); 33 | try 34 | LRoles.Add('system', 35 | '<|start_header_id|>system<|end_header_id|>' + sLineBreak + sLineBreak); 36 | LRoles.Add('user', 37 | '<|start_header_id|>user<|end_header_id|>' + sLineBreak + sLineBreak); 38 | LRoles.Add('assistant', 39 | '<|start_header_id|>assistant<|end_header_id|>' + sLineBreak + sLineBreak); 40 | 41 | LSeparator := '<|eot_id|>'; 42 | 43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 44 | LMessages := LMessages + [ 45 | TPair.Create(LRoles['assistant'], '')]; 46 | 47 | LPrompt := TLlamaChatFormat.FormatNoColonSingle('', LMessages, LSeparator); 48 | 49 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]); 50 | finally 51 | LRoles.Free(); 52 | end; 53 | end; 54 | 55 | end. 56 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.MilstralLite.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.MilstralLite; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TMistralLiteChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TMistralLiteChatFormatter } 23 | 24 | function TMistralLiteChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LMessages: TArray>; 30 | LPrompt: string; 31 | LSeparator: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', '<|prompter|>'); 36 | LRoles.Add('assistant', ''#13#10'<|assistant|>'); 37 | 38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 39 | LSystemMessage := String.Format('<|system|>%s', [LSystemMessage]); 40 | 41 | LSeparator := ' '; 42 | 43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 44 | LMessages := LMessages + [ 45 | TPair.Create(LRoles['assistant'], '')]; 46 | finally 47 | LRoles.Free(); 48 | end; 49 | 50 | LPrompt := TLlamaChatFormat.FormatNoColonSingle( 51 | LSystemMessage, LMessages, LSeparator); 52 | 53 | Result := TChatFormatterResponse.Create(LPrompt); 54 | end; 55 | 56 | end. 57 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.MistralInstruct.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.MistralInstruct; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TMistralInstructChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | uses 23 | System.Variants; 24 | 25 | { TMistralInstructChatFormatter } 26 | 27 | function TMistralInstructChatFormatter.Format( 28 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 29 | const 30 | EOS = ''; 31 | var 32 | LStop: string; 33 | LPrompt: string; 34 | LMessage: TChatCompletionRequestMessage; 35 | begin 36 | LStop := EOS; 37 | LPrompt := String.Empty; 38 | 39 | for LMessage in ASettings.Messages do 40 | if (LMessage.Role = 'user') and not VarIsNull(LMessage.Content) and VarIsStr(LMessage.Content) then 41 | LPrompt := LPrompt + '[INST] ' + VarToStr(LMessage.Content) 42 | else if (LMessage.Role = 'assistant') and not VarIsNull(LMessage.Content) then 43 | LPrompt := LPrompt + '[/INST] ' + VarToStr(LMessage.Content) + EOS; 44 | 45 | LPrompt := LPrompt + '[/INST]'; 46 | 47 | Result := TChatFormatterResponse.Create(LPrompt, [LStop]); 48 | end; 49 | 50 | end. 51 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.OasstLlama.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.OasstLlama; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TOasstLlamaChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TOasstLlamaChatFormatter } 23 | 24 | function TOasstLlamaChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LSeparator: string; 30 | LMessages: TArray>; 31 | LPrompt: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', '<|prompter|>'); 36 | LRoles.Add('assistant', '<|assistant|>'); 37 | 38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 39 | LSystemMessage := String.Format( 40 | '[INST] <>'#13#10'%s'#13#10'<>'#13#10#13#10'', [ 41 | LSystemMessage]); 42 | 43 | LSeparator := ''; 44 | 45 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 46 | LMessages := LMessages + [ 47 | TPair.Create(LRoles['assistant'], '')]; 48 | 49 | LPrompt := TLlamaChatFormat.FormatNoColonSingle( 50 | LSystemMessage, LMessages, LSeparator); 51 | 52 | Result := TChatFormatterResponse.Create(LPrompt); 53 | finally 54 | LRoles.Free(); 55 | end; 56 | end; 57 | 58 | end. 59 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.OpenBuddy.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.OpenBuddy; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TOpenBudyChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TOpenBudyChatFormatter } 23 | 24 | function TOpenBudyChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LSystemMessage: string; 28 | LRoles: TDictionary; 29 | LMessages: TArray>; 30 | LSeparator: string; 31 | LPrompt: string; 32 | begin 33 | LSystemMessage := ''' 34 | You are a helpful, respectful and honest INTP-T AI Assistant named Buddy. You are talking to a human User. 35 | Always answer as helpfully and logically as possible, while being safe. Your answers should not include any harmful, political, religious, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. 36 | If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. 37 | You can speak fluently in many languages, for example: English, Chinese. 38 | You cannot access the internet, but you have vast knowledge, cutoff: 2021-09. 39 | You are trained by OpenBuddy team, (https://openbuddy.ai, https://github.com/OpenBuddy/OpenBuddy), you are based on LLaMA and Falcon transformers model, not related to GPT or OpenAI. 40 | 41 | '''; 42 | 43 | LRoles := TDictionary.Create(); 44 | try 45 | LRoles.Add('user', 'User'); 46 | LRoles.Add('assistant', 'Assistant'); 47 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 48 | LMessages := LMessages + [ 49 | TPair.Create(LRoles['assistant'], '')]; 50 | finally 51 | LRoles.Free(); 52 | end; 53 | 54 | LSeparator := sLineBreak; 55 | 56 | LPrompt := TLlamaChatFormat.FormatAddColonSingle( 57 | LSystemMessage, LMessages, LSeparator); 58 | 59 | Result := TChatFormatterResponse.Create(LPrompt); 60 | end; 61 | 62 | end. 63 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.OpenChat.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.OpenChat; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TOpenChatChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TOpenChatChatFormatter } 23 | 24 | function TOpenChatChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LMessages: TArray>; 30 | LPrompt: string; 31 | LSeparator: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', 'GPT4 Correct User: '); 36 | LRoles.Add('assistant', '<|end_of_turn|>GPT4 Correct Assistant: '); 37 | 38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 39 | LSystemMessage := String.Format('%s<|end_of_turn|>', [LSystemMessage]); 40 | 41 | LSeparator := '<|end_of_turn|>'; 42 | 43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 44 | LMessages := LMessages + [ 45 | TPair.Create(LRoles['assistant'], '')]; 46 | finally 47 | LRoles.Free(); 48 | end; 49 | 50 | LPrompt := TLlamaChatFormat.FormatChatml( 51 | LSystemMessage, LMessages, LSeparator); 52 | 53 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]); 54 | end; 55 | 56 | end. 57 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.OpenOrca.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.OpenOrca; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TOpenOrcaChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TOpenOrcaChatFormatter } 23 | 24 | function TOpenOrcaChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LStop: string; 30 | LSeparator: string; 31 | LMessages: TArray>; 32 | LPrompt: string; 33 | begin 34 | LSystemMessage := 35 | ''' 36 | You are a helpful assistant. Please answer truthfully and write out your 37 | thinking step by step to be sure you get the right answer. If you make a mistake or encounter 38 | an error in your thinking, say so out loud and attempt to correct it. If you don't know or 39 | aren't sure about something, say so clearly. You will act as a professional logician, mathematician, 40 | and physicist. You will also act as the most appropriate type of expert to answer any particular 41 | question or solve the relevant problem; state which expert type your are, if so. Also think of 42 | any particular named expert that would be ideal to answer the relevant question or solve the 43 | relevant problem; name and act as them, if appropriate. 44 | '''; 45 | 46 | LStop := 'User'; 47 | 48 | LRoles := TDictionary.Create(); 49 | try 50 | LRoles.Add('User', 'User'); 51 | LRoles.Add('Assistant', 'Assistant'); 52 | 53 | LSeparator := '<|end_of_turn|>' + sLineBreak; 54 | 55 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 56 | LMessages := LMessages + [ 57 | TPair.Create(LRoles['Assistant'], '')]; 58 | finally 59 | LRoles.Free(); 60 | end; 61 | 62 | LPrompt := TLlamaChatFormat.FormatAddColonSingle( 63 | LSystemMessage, LMessages, LSeparator); 64 | 65 | Result := TChatFormatterResponse.Create(LPrompt, [LStop]); 66 | 67 | end; 68 | 69 | end. 70 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Phind.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Phind; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TPhindChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TPhindChatFormatter } 23 | 24 | function TPhindChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LMessages: TArray>; 30 | LPrompt: string; 31 | LSeparator: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', '### User Message'); 36 | LRoles.Add('assistant', '### Assistant'); 37 | 38 | LSystemMessage := '### System Prompt'#13#10'You are an intelligent programming assistant.'; 39 | 40 | LSeparator := sLineBreak + sLineBreak; 41 | 42 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 43 | LMessages := LMessages + [ 44 | TPair.Create(LRoles['assistant'], '')]; 45 | finally 46 | LRoles.Free(); 47 | end; 48 | 49 | LPrompt := TLlamaChatFormat.FormatAddColonSingle( 50 | LSystemMessage, LMessages, LSeparator); 51 | 52 | Result := TChatFormatterResponse.Create(LPrompt); 53 | end; 54 | 55 | end. 56 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Pygmalion.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Pygmalion; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TPygmalionChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TPygmalionChatFormatter } 23 | 24 | function TPygmalionChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LMessages: TArray>; 30 | LPrompt: string; 31 | LSeparator: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', '<|user|>'); 36 | LRoles.Add('assistant', '<|model|>'); 37 | 38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 39 | LSystemMessage := String.Format('<|system|>%s', [LSystemMessage]); 40 | 41 | LSeparator := sLineBreak; 42 | 43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 44 | LMessages := LMessages + [ 45 | TPair.Create(LRoles['assistant'], '')]; 46 | finally 47 | LRoles.Free(); 48 | end; 49 | 50 | LPrompt := TLlamaChatFormat.FormatChatml( 51 | LSystemMessage, LMessages, LSeparator); 52 | 53 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]); 54 | end; 55 | 56 | end. 57 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Qwen.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Qwen; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TQwenChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TQwenChatFormatter } 23 | 24 | function TQwenChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LMessages: TArray>; 30 | LSeparator: string; 31 | LPrompt: string; 32 | LSeparator2: string; 33 | begin 34 | LRoles := TDictionary.Create(); 35 | try 36 | LRoles.Add('user', '<|im_start|>user'); 37 | LRoles.Add('assistant', '<|im_start|>assistant'); 38 | 39 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 40 | if LSystemMessage.IsEmpty() then 41 | LSystemMessage := 'You are a helpful assistant.'; 42 | LSystemMessage := '<|im_start|>system' + sLineBreak + LSystemMessage; 43 | 44 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 45 | LMessages := LMessages + [ 46 | TPair.Create(LRoles['assistant'], '')]; 47 | 48 | LSeparator := '<|im_end|>'; 49 | 50 | LPrompt := TLlamaChatFormat.FormatChatml(LSystemMessage, LMessages, LSeparator); 51 | 52 | LSeparator2 := '<|endoftext|>'; 53 | 54 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator2]); 55 | finally 56 | LRoles.Free(); 57 | end; 58 | end; 59 | 60 | end. 61 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.RedpajamaIncite.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.RedpajamaIncite; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TRedpajamaInciteChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TRedpajamaInciteChatFormatter } 23 | 24 | function TRedpajamaInciteChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LSystemMessage: string; 28 | LRoles: TDictionary; 29 | LMessages: TArray>; 30 | LSeparator: WideString; 31 | LStop: string; 32 | LPrompt: string; 33 | begin 34 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 35 | 36 | LRoles := TDictionary.Create(); 37 | try 38 | LRoles.Add('user', ''); 39 | LRoles.Add('assistant', ''); 40 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 41 | LMessages := LMessages + [ 42 | TPair.Create(LRoles['assistant'], '')]; 43 | finally 44 | LRoles.Free(); 45 | end; 46 | 47 | LSeparator := sLineBreak; 48 | LStop := ''; 49 | 50 | LPrompt := TLlamaChatFormat.FormatAddColonSingle( 51 | LSystemMessage, LMessages, LSeparator); 52 | 53 | Result := TChatFormatterResponse.Create(LPrompt, [LStop]); 54 | end; 55 | 56 | end. 57 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Registration.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Registration; 2 | 3 | interface 4 | 5 | type 6 | TChatFormatterRegistration = class 7 | public 8 | class procedure RegisterAll(); 9 | class procedure UnregisterAll(); 10 | end; 11 | 12 | implementation 13 | 14 | uses 15 | LlamaCpp.Common.Chat.Completion.Collection, 16 | LlamaCpp.Common.Chat.Formatter.Adapter, 17 | LlamaCpp.Common.Chat.Formatter.Llama2, 18 | LlamaCpp.Common.Chat.Formatter.Llama3, 19 | LlamaCpp.Common.Chat.Formatter.Alpaca, 20 | LlamaCpp.Common.Chat.Formatter.Qwen, 21 | LlamaCpp.Common.Chat.Formatter.Vicuna, 22 | LlamaCpp.Common.Chat.Formatter.OasstLlama, 23 | LlamaCpp.Common.Chat.Formatter.Baichuan, 24 | LlamaCpp.Common.Chat.Formatter.Baichuan2, 25 | LlamaCpp.Common.Chat.Formatter.OpenBuddy, 26 | LlamaCpp.Common.Chat.Formatter.RedpajamaIncite, 27 | LlamaCpp.Common.Chat.Formatter.Snoozy, 28 | LlamaCpp.Common.Chat.Formatter.Phind, 29 | LlamaCpp.Common.Chat.Formatter.Intel, 30 | LlamaCpp.Common.Chat.Formatter.OpenOrca, 31 | LlamaCpp.Common.Chat.Formatter.MilstralLite, 32 | LlamaCpp.Common.Chat.Formatter.Zephyr, 33 | LlamaCpp.Common.Chat.Formatter.Pygmalion, 34 | LlamaCpp.Common.Chat.Formatter.Chatml, 35 | LlamaCpp.Common.Chat.Formatter.MistralInstruct, 36 | LlamaCpp.Common.Chat.Formatter.ChatGLM3, 37 | LlamaCpp.Common.Chat.Formatter.OpenChat, 38 | LlamaCpp.Common.Chat.Formatter.Saiga, 39 | LlamaCpp.Common.Chat.Formatter.Gemma; 40 | 41 | { TChatFormatterRegistration } 42 | 43 | class procedure TChatFormatterRegistration.RegisterAll; 44 | begin 45 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 46 | 'llama-2', TChatFormaterAdapter.ToChatCompletionHandler( 47 | TLlama2ChatFormatter.Create())); 48 | 49 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 50 | 'llama-3', TChatFormaterAdapter.ToChatCompletionHandler( 51 | TLlama3ChatFormatter.Create())); 52 | 53 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 54 | 'alpaca', TChatFormaterAdapter.ToChatCompletionHandler( 55 | TAlpacaChatFormatter.Create())); 56 | 57 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 58 | 'qwen', TChatFormaterAdapter.ToChatCompletionHandler( 59 | TQwenChatFormatter.Create())); 60 | 61 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 62 | 'vicuna', TChatFormaterAdapter.ToChatCompletionHandler( 63 | TVicunaChatFormatter.Create())); 64 | 65 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 66 | 'oasst_llama', TChatFormaterAdapter.ToChatCompletionHandler( 67 | TOasstLlamaChatFormatter.Create())); 68 | 69 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 70 | 'baichuan', TChatFormaterAdapter.ToChatCompletionHandler( 71 | TBaichuanChatFormatter.Create())); 72 | 73 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 74 | 'baichuan-2', TChatFormaterAdapter.ToChatCompletionHandler( 75 | TBaichuan2ChatFormatter.Create())); 76 | 77 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 78 | 'openbuddy', TChatFormaterAdapter.ToChatCompletionHandler( 79 | TOpenBudyChatFormatter.Create())); 80 | 81 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 82 | 'redpajama-incite', TChatFormaterAdapter.ToChatCompletionHandler( 83 | TRedpajamaInciteChatFormatter.Create())); 84 | 85 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 86 | 'snoozy', TChatFormaterAdapter.ToChatCompletionHandler( 87 | TSnoozyChatFormatter.Create())); 88 | 89 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 90 | 'phind', TChatFormaterAdapter.ToChatCompletionHandler( 91 | TPhindChatFormatter.Create())); 92 | 93 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 94 | 'intel', TChatFormaterAdapter.ToChatCompletionHandler( 95 | TIntelChatFormatter.Create())); 96 | 97 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 98 | 'open-orca', TChatFormaterAdapter.ToChatCompletionHandler( 99 | TOpenOrcaChatFormatter.Create())); 100 | 101 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 102 | 'mistrallite', TChatFormaterAdapter.ToChatCompletionHandler( 103 | TMistralLiteChatFormatter.Create())); 104 | 105 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 106 | 'zephyr', TChatFormaterAdapter.ToChatCompletionHandler( 107 | TZephyrChatFormatter.Create())); 108 | 109 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 110 | 'pygmalion', TChatFormaterAdapter.ToChatCompletionHandler( 111 | TPygmalionChatFormatter.Create())); 112 | 113 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 114 | 'chatml', TChatFormaterAdapter.ToChatCompletionHandler( 115 | TChatmlChatFormatter.Create())); 116 | 117 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 118 | 'mistral-instruct', TChatFormaterAdapter.ToChatCompletionHandler( 119 | TMistralInstructChatFormatter.Create())); 120 | 121 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 122 | 'chatglm3', TChatFormaterAdapter.ToChatCompletionHandler( 123 | TChatGLM3ChatFormatter.Create())); 124 | 125 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 126 | 'openchat', TChatFormaterAdapter.ToChatCompletionHandler( 127 | TOpenChatChatFormatter.Create())); 128 | 129 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 130 | 'saiga', TChatFormaterAdapter.ToChatCompletionHandler( 131 | TSaigaChatFormatter.Create())); 132 | 133 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler( 134 | 'gemma', TChatFormaterAdapter.ToChatCompletionHandler( 135 | TGemmaChatFormatter.Create())); 136 | end; 137 | 138 | class procedure TChatFormatterRegistration.UnregisterAll; 139 | begin 140 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('gemma'); 141 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('saiga'); 142 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('openchat'); 143 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('chatglm3'); 144 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('mistral-instruct'); 145 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('chatml'); 146 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('pygmalion'); 147 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('zephyr'); 148 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('mistrallite'); 149 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('open-orca'); 150 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('intel'); 151 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('phind'); 152 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('snoozy'); 153 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('redpajama-incite'); 154 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('openbuddy'); 155 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('baichuan-2'); 156 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('baichuan'); 157 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('oasst_llama'); 158 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('vicuna'); 159 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('qwen'); 160 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('alpaca'); 161 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('llama-3'); 162 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('llama-2'); 163 | end; 164 | 165 | end. 166 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Saiga.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Saiga; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TSaigaChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TSaigaChatFormatter } 23 | 24 | function TSaigaChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LMessageTemplate: string; 28 | LRoles: TDictionary; 29 | LMessages: TArray>; 30 | LPrompt: string; 31 | LMessage: TPair; 32 | begin 33 | LMessageTemplate := '%s'#13#10'%s'; 34 | 35 | LRoles := TDictionary.Create(); 36 | try 37 | LRoles.Add('user', 'user'); 38 | LRoles.Add('bot', 'bot'); 39 | LRoles.Add('system', 'system'); 40 | 41 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 42 | finally 43 | LRoles.Free(); 44 | end; 45 | 46 | LPrompt := String.Empty; 47 | 48 | for LMessage in LMessages do 49 | if not LMessage.Value.IsEmpty() then 50 | LPrompt := LPrompt + String.Format(LMessageTemplate, [LMessage.Key, LMessage.Value]) 51 | else 52 | LPrompt := LPrompt + '' + LMessage.Key + sLineBreak; 53 | 54 | LPrompt := LPrompt + 'bot'; 55 | 56 | Result := TChatFormatterResponse.Create(LPrompt.Trim()); 57 | end; 58 | 59 | end. 60 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Snoozy.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Snoozy; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TSnoozyChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TSnoozyChatFormatter } 23 | 24 | function TSnoozyChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LSystemMessage: string; 28 | LRoles: TDictionary; 29 | LSeparator: string; 30 | LMessages: TArray>; 31 | LPrompt: string; 32 | LStop: string; 33 | begin 34 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 35 | if LSystemMessage.IsEmpty() then 36 | LSystemMessage := 'The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.'; 37 | 38 | LSystemMessage := String.Format('### Instruction:'#13#10'%s', [ 39 | LSystemMessage]); 40 | 41 | LSeparator := sLineBreak; 42 | LStop := '###'; 43 | 44 | LRoles := TDictionary.Create(); 45 | try 46 | LRoles.Add('user', '### Prompt'); 47 | LRoles.Add('assistant', '### Response'); 48 | 49 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 50 | 51 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 52 | LMessages := LMessages + [ 53 | TPair.Create(LRoles['assistant'], '')]; 54 | finally 55 | LRoles.Free(); 56 | end; 57 | 58 | LPrompt := TLlamaChatFormat.FormatAddColonSingle( 59 | LSystemMessage, LMessages, LSeparator); 60 | 61 | Result := TChatFormatterResponse.Create(LPrompt, [LStop]); 62 | end; 63 | 64 | end. 65 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Vicuna.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Vicuna; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TVicunaChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TVicunaChatFormatter } 23 | 24 | function TVicunaChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LSeparator: string; 30 | LSeparator2: string; 31 | LMessages: TArray>; 32 | LPrompt: string; 33 | begin 34 | LRoles := TDictionary.Create(); 35 | try 36 | LSystemMessage := 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user''s questions.'; 37 | 38 | LRoles.Add('user', 'USER'); 39 | LRoles.Add('assistant', 'ASSISTANT'); 40 | 41 | LSeparator := ' '; 42 | LSeparator2 := ''; 43 | 44 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 45 | LMessages := LMessages + [ 46 | TPair.Create(LRoles['assistant'], '')]; 47 | 48 | LPrompt := TLlamaChatFormat.FormatAddColonTwo(LSystemMessage, LMessages, LSeparator, LSeparator2); 49 | 50 | Result := TChatFormatterResponse.Create(LPrompt); 51 | finally 52 | LRoles.Free(); 53 | end; 54 | end; 55 | 56 | end. 57 | -------------------------------------------------------------------------------- /src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Zephyr.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Formatter.Zephyr; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Common.Chat.Format; 12 | 13 | type 14 | TZephyrChatFormatter = class(TInterfacedObject, ILlamaChatFormater) 15 | private 16 | function Format(const ASettings: TLlamaChatCompletionSettings) 17 | : TChatFormatterResponse; 18 | end; 19 | 20 | implementation 21 | 22 | { TZephyrChatFormatter } 23 | 24 | function TZephyrChatFormatter.Format( 25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse; 26 | var 27 | LRoles: TDictionary; 28 | LSystemMessage: string; 29 | LMessages: TArray>; 30 | LPrompt: string; 31 | LSeparator: string; 32 | begin 33 | LRoles := TDictionary.Create(); 34 | try 35 | LRoles.Add('user', '<|user|>'#13#10''); 36 | LRoles.Add('assistant', '<|assistant|>'#13#10''); 37 | 38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages); 39 | LSystemMessage := String.Format('<|system|>'#13#10'%s', [LSystemMessage]); 40 | 41 | LSeparator := ''; 42 | 43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles); 44 | LMessages := LMessages + [ 45 | TPair.Create(LRoles['assistant'], '')]; 46 | finally 47 | LRoles.Free(); 48 | end; 49 | 50 | LPrompt := TLlamaChatFormat.FormatChatml( 51 | LSystemMessage, LMessages, LSeparator); 52 | 53 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]); 54 | end; 55 | 56 | end. 57 | -------------------------------------------------------------------------------- /src/Common/Chat/LlamaCpp.Common.Chat.Format.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Chat.Format; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Rtti, 8 | System.Generics.Collections, 9 | LlamaCpp.Wrapper.LlamaModel, 10 | LLamaCpp.Common.Chat.Types; 11 | 12 | type 13 | TLlamaChatFormat = class 14 | public const 15 | // Source: https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B/blob/main/tokenizer_config.json 16 | CHATML_CHAT_TEMPLATE = '{% for message in messages %}{{''<|im_start|>'' + message[''role''] + ''\n'' + message[''content''] + ''<|im_end|>'' + ''\n''}}{% endfor %}{% if add_generation_prompt %}{{ ''<|im_start|>assistant\n'' }}{% endif %}'; 17 | CHATML_BOS_TOKEN = ''; 18 | CHATML_EOS_TOKEN = '<|im_end|>'; 19 | 20 | // Source: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/blob/main/tokenizer_config.json 21 | MISTRAL_INSTRUCT_CHAT_TEMPLATE = '{{ bos_token }}{% for message in messages %}{% if (message[''role''] == ''user'') != (loop.index0 % 2 == 0) %}{{ raise_exception(''Conversation roles must alternate user/assistant/user/assistant/...'') }}{% endif %}{% if message[''role''] == ''user'' %}{{ ''[INST] '' + message[''content''] + '' [/INST]'' }}{% elif message[''role''] == ''assistant'' %}{{ message[''content''] + eos_token + '' '' }}{% else %}{{ raise_exception(''Only user and assistant roles are supported!'') }}{% endif %}{% endfor %}'; 22 | MISTRAL_INSTRUCT_BOS_TOKEN = ''; 23 | MISTRAL_INSTRUCT_EOS_TOKEN = ''; 24 | 25 | // Source: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/blob/main/tokenizer_config.json 26 | MIXTRAL_INSTRUCT_CHAT_TEMPLATE = '{{ bos_token }}{% for message in messages %}{% if (message[''role''] == ''user'') != (loop.index0 % 2 == 0) %}{{ raise_exception(''Conversation roles must alternate user/assistant/user/assistant/...'') }}{% endif %}{% if message[''role''] == ''user'' %}{{ ''[INST] '' + message[''content''] + '' [/INST]'' }}{% elif message[''role''] == ''assistant'' %}{{ message[''content''] + eos_token}}{% else %}{{ raise_exception(''Only user and assistant roles are supported!'') }}{% endif %}{% endfor %}'; 27 | 28 | // Source: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json 29 | LLAMA3_INSTRUCT_CHAT_TEMPLATE = '{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = ''<|start_header_id|>'' + message[''role''] + ''<|end_header_id|>\n\n''+ message[''content''] | trim + ''<|eot_id|>'' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ ''<|start_header_id|>assistant<|end_header_id|>\n\n'' }}{% endif %}'; 30 | public 31 | class function GuessChatFormatFromGguf(const AMetadata: TMetadata) 32 | : string; static; 33 | 34 | class function GetSystemMessage( 35 | const AMessages: TArray): string; 36 | class function MapRoles( 37 | const AMessages: TArray; 38 | const ARoleMap: TDictionary): TArray>; 39 | class function FormatNoColonSingle( 40 | const ASystemMessage: string; 41 | const AMessages: TArray>; 42 | const ASeparator: string): string; 43 | class function FormatAddColonTwo( 44 | const ASystemMessage: string; 45 | const AMessages: TArray>; 46 | const ASeparator, ASeparator2: string): string; 47 | class function FormatAddColonSingle( 48 | const ASystemMessage: string; 49 | const AMessages: TArray>; 50 | const ASeparator: string): string; 51 | class function FormatChatml( 52 | const ASystemMessage: string; 53 | const AMessages: TArray>; 54 | const ASeparator: string): string; 55 | class function FormatChatGML3( 56 | const ASystemMessage: string; 57 | const AMessages: TArray>): string; 58 | end; 59 | 60 | implementation 61 | 62 | uses 63 | System.Variants; 64 | 65 | { TLlamaChatFormat } 66 | 67 | class function TLlamaChatFormat.GuessChatFormatFromGguf(const AMetadata 68 | : TMetadata): string; 69 | begin 70 | if not AMetadata.ContainsKey('tokenizer.chat_template') then 71 | Exit(String.Empty); 72 | 73 | if AMetadata.Items['tokenizer.chat_template'] = CHATML_CHAT_TEMPLATE then 74 | Result := 'chatml' 75 | else if (AMetadata.Items['tokenizer.chat_template'] = MISTRAL_INSTRUCT_CHAT_TEMPLATE) or 76 | (AMetadata.Items['tokenizer.chat_template'] = MIXTRAL_INSTRUCT_CHAT_TEMPLATE) then 77 | Result := 'mistral-instruct' 78 | else if AMetadata.Items['tokenizer.chat_template'] = LLAMA3_INSTRUCT_CHAT_TEMPLATE then 79 | Result := 'llama-3' 80 | else 81 | Result := String.Empty; 82 | end; 83 | 84 | class function TLlamaChatFormat.GetSystemMessage( 85 | const AMessages: TArray): string; 86 | var 87 | LMessage: TChatCompletionRequestMessage; 88 | begin 89 | for LMessage in AMessages do 90 | if LMessage.Role = 'system' then 91 | Exit(VarToStr(LMessage.Content)); 92 | 93 | Result := String.Empty; 94 | end; 95 | 96 | class function TLlamaChatFormat.MapRoles( 97 | const AMessages: TArray; 98 | const ARoleMap: TDictionary): TArray>; 99 | var 100 | LMessage: TChatCompletionRequestMessage; 101 | begin 102 | Result := nil; 103 | 104 | for LMessage in AMessages do 105 | if ARoleMap.ContainsKey(LMessage.Role) then 106 | if VarIsStr(LMessage.Content) then 107 | Result := Result + [TPair.Create( 108 | ARoleMap[LMessage.Role], 109 | VarToStr(LMessage.Content) 110 | )] 111 | else 112 | Result := Result + [TPair.Create( 113 | ARoleMap[LMessage.Role], 114 | String.Empty 115 | )]; 116 | end; 117 | 118 | class function TLlamaChatFormat.FormatNoColonSingle( 119 | const ASystemMessage: string; const AMessages: TArray>; 120 | const ASeparator: string): string; 121 | var 122 | LMessage: TPair; 123 | begin 124 | Result := ASystemMessage + ASeparator; 125 | 126 | for LMessage in AMessages do 127 | begin 128 | if not LMessage.Value.IsEmpty() then 129 | Result := Result + LMessage.Key + LMessage.Value + ASeparator 130 | else 131 | Result := Result + LMessage.Key 132 | end; 133 | end; 134 | 135 | class function TLlamaChatFormat.FormatAddColonTwo(const ASystemMessage: string; 136 | const AMessages: TArray>; const ASeparator, 137 | ASeparator2: string): string; 138 | var 139 | LSeparators: TArray; 140 | I: Integer; 141 | begin 142 | LSeparators := [ASeparator, ASeparator2]; 143 | Result := ASystemMessage + LSeparators[0]; 144 | 145 | for I := Low(AMessages) to High(AMessages) do 146 | if not AMessages[I].Value.IsEmpty() then 147 | Result := Result + AMessages[I].Key + ': ' + AMessages[I].Value + LSeparators[I mod 2] 148 | else 149 | Result := Result + AMessages[I].Key + ':'; 150 | end; 151 | 152 | class function TLlamaChatFormat.FormatAddColonSingle( 153 | const ASystemMessage: string; const AMessages: TArray>; 154 | const ASeparator: string): string; 155 | var 156 | LMessage: TPair; 157 | begin 158 | Result := ASystemMessage + ASeparator; 159 | 160 | for LMessage in AMessages do 161 | if not LMessage.Value.IsEmpty() then 162 | Result := Result + LMessage.Key + ': ' + LMessage.Value + ASeparator 163 | else 164 | Result := Result + LMessage.Key + ':'; 165 | end; 166 | 167 | class function TLlamaChatFormat.FormatChatml(const ASystemMessage: string; 168 | const AMessages: TArray>; 169 | const ASeparator: string): string; 170 | var 171 | LMessage: TPair; 172 | begin 173 | if ASystemMessage.IsEmpty() then 174 | Result := String.Empty 175 | else 176 | Result := ASystemMessage + ASeparator + sLineBreak; 177 | 178 | for LMessage in AMessages do 179 | if not LMessage.Value.IsEmpty() then 180 | Result := Result + LMessage.Key + sLineBreak + LMessage.Value + ASeparator + sLineBreak 181 | else 182 | Result := Result + LMessage.Key + sLineBreak; 183 | end; 184 | 185 | class function TLlamaChatFormat.FormatChatGML3(const ASystemMessage: string; 186 | const AMessages: TArray>): string; 187 | var 188 | LMessage: TPair; 189 | begin 190 | Result := String.Empty; 191 | 192 | if not ASystemMessage.IsEmpty() then 193 | Result := ASystemMessage; 194 | 195 | for LMessage in AMessages do 196 | if not LMessage.Value.IsEmpty() then 197 | Result := Result + LMessage.Key + sLineBreak + ' ' + LMessage.Value 198 | else 199 | Result := Result + LMessage.Key; 200 | end; 201 | 202 | end. 203 | -------------------------------------------------------------------------------- /src/Common/Grammar/LlamaCpp.Common.Grammar.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Grammar; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Classes, 8 | System.Generics.Collections, 9 | System.JSON, 10 | LlamaCpp.Common.Types; 11 | 12 | const 13 | LLAMA_GRAMMAR_DEFAULT_ROOT = 'root'; 14 | 15 | type 16 | TLlamaGrammar = class(TInterfacedObject, ILlamaGrammar) 17 | private 18 | FGrammar: string; 19 | FRoot: string; 20 | function GetGrammar(): string; 21 | procedure SetGrammar(const AGrammar: string); 22 | function GetRoot(): string; 23 | procedure SetRoot(const ARoot: string); 24 | public 25 | constructor Create(const AGrammar: string); overload; 26 | 27 | procedure Reset(); 28 | 29 | // Class methods 30 | class function FromString(const AGrammar: string): ILlamaGrammar; static; 31 | class function FromFile(const AFileName: string): ILlamaGrammar; static; 32 | class function FromJsonSchema( 33 | const AJsonSchema: string): ILlamaGrammar; static; 34 | class function JsonSchemaToGBNF(const ASchema: string; 35 | const APropOrder: TArray = nil): string; static; 36 | end; 37 | 38 | const 39 | JSON_GBNF: string = ''' 40 | root ::= object 41 | value ::= object | array | string | number | ("true" | "false" | "null") ws 42 | 43 | object ::= 44 | "{" ws ( 45 | string ":" ws value 46 | ("," ws string ":" ws value)* 47 | )? "}" ws 48 | 49 | array ::= 50 | "[" ws ( 51 | value 52 | ("," ws value)* 53 | )? "]" ws 54 | 55 | string ::= 56 | "\"" ( 57 | [^"\\\x7F\x00-\x1F] | 58 | "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes 59 | )* "\"" ws 60 | 61 | number ::= 62 | ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws 63 | 64 | # Optional space: by convention, applied in this grammar after literal chars when allowed 65 | ws ::= | " " | "\n" [ \t]{0,20} 66 | '''; 67 | 68 | implementation 69 | 70 | { TLlamaGrammar } 71 | 72 | constructor TLlamaGrammar.Create(const AGrammar: string); 73 | begin 74 | inherited Create; 75 | FGrammar := AGrammar; 76 | FRoot := LLAMA_GRAMMAR_DEFAULT_ROOT; 77 | end; 78 | 79 | function TLlamaGrammar.GetGrammar: string; 80 | begin 81 | Result := FGrammar; 82 | end; 83 | 84 | function TLlamaGrammar.GetRoot: string; 85 | begin 86 | Result := FRoot; 87 | end; 88 | 89 | procedure TLlamaGrammar.SetGrammar(const AGrammar: string); 90 | begin 91 | FGrammar := AGrammar; 92 | end; 93 | 94 | procedure TLlamaGrammar.SetRoot(const ARoot: string); 95 | begin 96 | FRoot := ARoot; 97 | end; 98 | 99 | class function TLlamaGrammar.FromString(const AGrammar: string): ILlamaGrammar; 100 | begin 101 | Result := TLlamaGrammar.Create(AGrammar); 102 | end; 103 | 104 | class function TLlamaGrammar.FromFile(const AFileName: string): ILlamaGrammar; 105 | var 106 | LGrammarFile: TStringList; 107 | begin 108 | LGrammarFile := TStringList.Create; 109 | try 110 | try 111 | LGrammarFile.LoadFromFile(AFileName); 112 | 113 | if LGrammarFile.Text.Trim.IsEmpty then 114 | raise Exception.Create('Error: Grammar file is empty'); 115 | 116 | Result := TLlamaGrammar.FromString(LGrammarFile.Text); 117 | except 118 | on E: Exception do 119 | raise Exception.CreateFmt('Error reading grammar file: %s', [E.Message]); 120 | end; 121 | finally 122 | LGrammarFile.Free; 123 | end; 124 | end; 125 | 126 | class function TLlamaGrammar.FromJsonSchema(const AJsonSchema: string): ILlamaGrammar; 127 | begin 128 | Result := TLlamaGrammar.FromString(JsonSchemaToGBNF(AJsonSchema)); 129 | end; 130 | 131 | class function TLlamaGrammar.JsonSchemaToGBNF(const ASchema: string; 132 | const APropOrder: TArray = nil): string; 133 | begin 134 | raise ENotImplemented.Create('Not implemented.'); 135 | end; 136 | 137 | procedure TLlamaGrammar.Reset; 138 | begin 139 | // 140 | end; 141 | 142 | end. 143 | -------------------------------------------------------------------------------- /src/Common/LlamaCpp.Common.State.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.State; 2 | 3 | interface 4 | 5 | uses 6 | System.Classes, 7 | System.SysUtils, 8 | System.JSON.Serializers; 9 | 10 | type 11 | TLlamaState = class 12 | private 13 | FInputIds: TArray; 14 | FScores: TArray>; 15 | FNTokens: Integer; 16 | FLlamaState: TArray; 17 | FLlamaStateSize: Integer; 18 | FSeed: UInt32; 19 | public 20 | constructor Create(); overload; 21 | constructor Create( 22 | const AInputIds: TArray; 23 | const AScores: TArray>; 24 | const ANTokens: Integer; 25 | const ALlamaState: TArray; 26 | const ALlamaStateSize: Integer; 27 | const ASeed: UInt32 28 | ); overload; 29 | 30 | function GetSize(): Int64; 31 | function Clone(): TLlamaState; 32 | 33 | procedure Serialize(const AStream: TStream); 34 | procedure Deserialize(const AStream: TStream); 35 | 36 | function ToJsonString(): string; 37 | class function FromJsonString(const AJsonString: string): TLlamaState; 38 | 39 | property InputIds: TArray read FInputIds write FInputIds; 40 | property Scores: TArray> read FScores write FScores; 41 | property NTokens: Integer read FNTokens write FNTokens; 42 | property LlamaState: TArray read FLlamaState write FLlamaState; 43 | property LlamaStateSize: Integer read FLlamaStateSize write FLlamaStateSize; 44 | property Seed: UInt32 read FSeed write FSeed; 45 | end; 46 | 47 | implementation 48 | 49 | { TLlamaState } 50 | 51 | constructor TLlamaState.Create; 52 | begin 53 | // 54 | end; 55 | 56 | constructor TLlamaState.Create( 57 | const AInputIds: TArray; 58 | const AScores: TArray>; 59 | const ANTokens: Integer; 60 | const ALlamaState: TArray; 61 | const ALlamaStateSize: Integer; 62 | const ASeed: UInt32); 63 | begin 64 | inherited Create; 65 | FInputIds := AInputIds; 66 | FScores := AScores; 67 | FNTokens := ANTokens; 68 | FLlamaState := ALlamaState; 69 | FLlamaStateSize := ALlamaStateSize; 70 | FSeed := ASeed; 71 | end; 72 | 73 | function TLlamaState.GetSize: Int64; 74 | var 75 | I: Integer; 76 | begin 77 | Result := (Length(FInputIds) * SizeOf(integer)) 78 | + (Length(FLlamaState) * SizeOf(ShortInt)) 79 | + SizeOf(FNTokens) 80 | + SizeOf(FLlamaStateSize) 81 | + SizeOf(FSeed); 82 | 83 | for I := Low(FScores) to High(FScores) do 84 | Result := Result + Length(FScores[I]) * SizeOf(Single); 85 | 86 | Result := Result + (Length(FScores) * SizeOf(TArray)); 87 | end; 88 | 89 | function TLlamaState.Clone: TLlamaState; 90 | begin 91 | Result := TLlamaState.Create( 92 | FInputIds, 93 | FScores, 94 | FNTokens, 95 | FLlamaState, 96 | FLlamaStateSize, 97 | FSeed 98 | ); 99 | end; 100 | 101 | function TLlamaState.ToJsonString: string; 102 | var 103 | LSerializer: TJsonSerializer; 104 | begin 105 | LSerializer := TJSonSerializer.Create(); 106 | try 107 | Result := LSerializer.Serialize(Self); 108 | finally 109 | LSerializer.Free(); 110 | end; 111 | end; 112 | 113 | class function TLlamaState.FromJsonString( 114 | const AJsonString: string): TLlamaState; 115 | var 116 | LSerializer: TJsonSerializer; 117 | begin 118 | LSerializer := TJSonSerializer.Create(); 119 | try 120 | Result := LSerializer.Deserialize(AJsonString); 121 | finally 122 | LSerializer.Free(); 123 | end; 124 | end; 125 | 126 | procedure TLlamaState.Serialize(const AStream: TStream); 127 | var 128 | I: Integer; 129 | begin 130 | AStream.WriteData(FNTokens); 131 | AStream.WriteData(FLlamaStateSize); 132 | AStream.WriteData(FSeed); 133 | 134 | AStream.WriteData(Length(FInputIds)); 135 | AStream.Write(FInputIds[0], Length(FInputIds) * SizeOf(integer)); 136 | 137 | AStream.WriteData(Length(FLlamaState)); 138 | AStream.Write(FLlamaState[0], Length(FLlamaState) * SizeOf(ShortInt)); 139 | 140 | AStream.WriteData(Length(FScores)); 141 | for I := Low(FScores) to High(FScores) do 142 | begin 143 | AStream.WriteData(Length(FScores[I])); 144 | AStream.Write(FScores[I][0], Length(FScores[I]) * SizeOf(Single)); 145 | end; 146 | end; 147 | 148 | procedure TLlamaState.Deserialize(const AStream: TStream); 149 | var 150 | LLength: Integer; 151 | I: Integer; 152 | begin 153 | AStream.ReadData(FNTokens); 154 | AStream.ReadData(FLlamaStateSize); 155 | AStream.ReadData(FSeed); 156 | 157 | AStream.ReadData(LLength); 158 | SetLength(FInputIds, LLength); 159 | AStream.Read(FInputIds[0], LLength * SizeOf(integer)); 160 | 161 | AStream.ReadData(LLength); 162 | SetLength(FLlamaState, LLength); 163 | AStream.Read(FLlamaState[0], LLength * SizeOf(ShortInt)); 164 | 165 | AStream.ReadData(LLength); 166 | SetLength(FScores, LLength); 167 | for I := Low(FScores) to High(FScores) do 168 | begin 169 | AStream.ReadData(LLength); 170 | SetLength(FScores[I], LLength); 171 | AStream.Read(FScores[I][0], LLength * SizeOf(single)); 172 | end; 173 | end; 174 | 175 | end. 176 | -------------------------------------------------------------------------------- /src/Common/LlamaCpp.Common.TokenArray.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.TokenArray; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | LlamaCpp.CType.Llama; 8 | 9 | type 10 | TLlamaTokenDataArray = class 11 | private 12 | FCandidatesData: TArray; 13 | FCandidates: LlamaCpp.CType.Llama.TLlamaTokenDataArray; 14 | FDefaultCandidatesDataID: TArray; 15 | FDefaultCandidatesDataP: TArray; 16 | FN_Vocab: Int32; 17 | public 18 | constructor Create(const ANVocab: Int32); 19 | 20 | procedure CopyLogits(const ALogits: TArray); 21 | property Candidates: LlamaCpp.CType.Llama.TLlamaTokenDataArray read FCandidates; 22 | property CandidatesData: TArray read FCandidatesData; 23 | end; 24 | 25 | implementation 26 | 27 | { TLlamaTokenDataArray } 28 | 29 | constructor TLlamaTokenDataArray.Create(const ANVocab: Int32); 30 | var 31 | I: Int32; 32 | begin 33 | FN_Vocab := ANVocab; 34 | 35 | SetLength(FCandidatesData, FN_Vocab); 36 | SetLength(FDefaultCandidatesDataID, FN_Vocab); 37 | SetLength(FDefaultCandidatesDataP, FN_Vocab); 38 | for I := 0 to FN_Vocab - 1 do 39 | begin 40 | FDefaultCandidatesDataID[I] := I; 41 | FDefaultCandidatesDataP[I] := 0.0; 42 | end; 43 | 44 | // Initialize TLlamaTokenDataArray 45 | FCandidates.Data := @FCandidatesData[0]; 46 | FCandidates.Size := FN_Vocab; 47 | FCandidates.Sorted := False; 48 | end; 49 | 50 | procedure TLlamaTokenDataArray.CopyLogits(const ALogits: TArray); 51 | var 52 | I: Int32; 53 | begin 54 | Assert(Length(ALogits) = FN_Vocab, 'Logits size must match vocabulary size.'); 55 | 56 | for I := 0 to FN_Vocab - 1 do 57 | begin 58 | FCandidatesData[I].ID := FDefaultCandidatesDataID[I]; 59 | FCandidatesData[I].Logit := ALogits[I]; 60 | FCandidatesData[I].P := FDefaultCandidatesDataP[I]; 61 | end; 62 | 63 | FCandidates.Sorted := False; 64 | FCandidates.Size := FN_Vocab; 65 | end; 66 | 67 | end. 68 | -------------------------------------------------------------------------------- /src/Common/Processor/LlamaCpp.Common.Processor.LogitsScore.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Processor.LogitsScore; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types; 9 | 10 | type 11 | TDefaultLogitsScoreList = class(TInterfacedObject, ILogitsProcessorList) 12 | private 13 | FProcessors: TList; 14 | public 15 | constructor Create(); overload; 16 | constructor Create(const AProcessor: TLogitsProcessor); overload; 17 | destructor Destroy(); override; 18 | 19 | procedure Add(const AProcessor: TLogitsProcessor); 20 | procedure Execute(const InputIds: TArray; 21 | [ref] const Scores: TArray); 22 | end; 23 | 24 | implementation 25 | 26 | { TDefaultLogitsScoreList } 27 | 28 | constructor TDefaultLogitsScoreList.Create; 29 | begin 30 | FProcessors := TList.Create(); 31 | end; 32 | 33 | constructor TDefaultLogitsScoreList.Create( 34 | const AProcessor: TLogitsProcessor); 35 | begin 36 | Create(); 37 | Add(AProcessor); 38 | end; 39 | 40 | destructor TDefaultLogitsScoreList.Destroy; 41 | begin 42 | FProcessors.Free(); 43 | inherited; 44 | end; 45 | 46 | procedure TDefaultLogitsScoreList.Add(const AProcessor: TLogitsProcessor); 47 | begin 48 | FProcessors.Add(AProcessor); 49 | end; 50 | 51 | procedure TDefaultLogitsScoreList.Execute(const InputIds: TArray; 52 | [ref] const Scores: TArray); 53 | var 54 | LProcessor: TLogitsProcessor; 55 | LTempScores: TArray; 56 | begin 57 | LTempScores := Scores; 58 | for LProcessor in FProcessors do 59 | LProcessor(InputIds, LTempScores); 60 | end; 61 | 62 | end. 63 | -------------------------------------------------------------------------------- /src/Common/Processor/LlamaCpp.Common.Processor.StoppingCriteria.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Processor.StoppingCriteria; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types; 9 | 10 | type 11 | TDefaultStoppingCriteriaList = class(TInterfacedObject, IStoppingCriteriaList) 12 | private 13 | FCriterias: TList; 14 | public 15 | constructor Create(); overload; 16 | constructor Create(const AProcessor: TStoppingCriteria); overload; 17 | destructor Destroy(); override; 18 | 19 | procedure Add(const AProcessor: TStoppingCriteria); 20 | function Execute(const AInputIds: TArray; const ALogits: TArray): Boolean; 21 | end; 22 | 23 | implementation 24 | 25 | { TDefaultStoppingCriteriaList } 26 | 27 | constructor TDefaultStoppingCriteriaList.Create; 28 | begin 29 | FCriterias := TList.Create(); 30 | end; 31 | 32 | constructor TDefaultStoppingCriteriaList.Create( 33 | const AProcessor: TStoppingCriteria); 34 | begin 35 | Create(); 36 | Add(AProcessor); 37 | end; 38 | 39 | destructor TDefaultStoppingCriteriaList.Destroy; 40 | begin 41 | FCriterias.Free(); 42 | inherited; 43 | end; 44 | 45 | procedure TDefaultStoppingCriteriaList.Add(const AProcessor: TStoppingCriteria); 46 | begin 47 | FCriterias.Add(AProcessor); 48 | end; 49 | 50 | function TDefaultStoppingCriteriaList.Execute(const AInputIds: TArray; const ALogits: TArray): Boolean; 51 | var 52 | LStoppingCriteria: TStoppingCriteria; 53 | begin 54 | for LStoppingCriteria in FCriterias do 55 | if LStoppingCriteria(AInputIds, ALogits) then 56 | Exit(true); 57 | 58 | Result := False; 59 | end; 60 | 61 | end. 62 | -------------------------------------------------------------------------------- /src/Common/Sampling/LlamaCpp.Common.Sampling.Context.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Sampling.Context; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LLamaCpp.CType.Llama, 9 | LlamaCpp.Wrapper.LlamaContext, 10 | LlamaCpp.Common.Types, 11 | LlamaCpp.Common.Sampling.Params; 12 | 13 | type 14 | TLlamaSamplingContext = class 15 | private 16 | FParams: TLlamaSamplingParams; 17 | FMirostatMu: Single; 18 | FGrammar: ILlamaGrammar; 19 | FPrev: TList; 20 | FCur: TList; 21 | public 22 | constructor Create; 23 | destructor Destroy; override; 24 | 25 | // Properties 26 | property Params: TLlamaSamplingParams read FParams write FParams; 27 | property MirostatMu: Single read FMirostatMu write FMirostatMu; 28 | property Grammar: ILlamaGrammar read FGrammar write FGrammar; 29 | property Prev: TList read FPrev write FPrev; 30 | property Cur: TList read FCur write FCur; 31 | 32 | // Methods 33 | procedure Reset; 34 | function Copy: TLlamaSamplingContext; 35 | function Last: Int32; 36 | function PrevStr(CtxMain: TLlamaContext; N: Int32): string; 37 | function Sample(const ACtxMain: TLlamaContext; const AIdx: Int32 = 0; 38 | ALogitsArray: TArray = nil): Int32; 39 | procedure Accept(ACtxMain: TLlamaContext; AId: Int32; AApplyGrammar: Boolean); 40 | end; 41 | 42 | implementation 43 | 44 | uses 45 | System.Math, 46 | LlamaCpp.Helper, LlamaCpp.Common.TokenArray; 47 | 48 | type 49 | TListHelper = class helper for TList 50 | function Skip(Count: Integer): TArray; 51 | end; 52 | 53 | { TListHelper } 54 | 55 | function TListHelper.Skip(Count: Integer): TArray; 56 | var 57 | LList: TList; 58 | i: Integer; 59 | begin 60 | LList := TList.Create; 61 | try 62 | for i := Count to Self.Count - 1 do 63 | LList.Add(Self[i]); 64 | 65 | Result := LList.ToArray(); 66 | finally 67 | LList.Free; 68 | end; 69 | end; 70 | 71 | { TLlamaSamplingContext } 72 | 73 | constructor TLlamaSamplingContext.Create; 74 | begin 75 | inherited Create; 76 | FParams := TLlamaSamplingParams.Create; 77 | FMirostatMu := 0.0; 78 | FGrammar := nil; 79 | FPrev := TList.Create; 80 | FCur := TList.Create; 81 | end; 82 | 83 | destructor TLlamaSamplingContext.Destroy; 84 | begin 85 | FParams.Free; 86 | FPrev.Free; 87 | FCur.Free; 88 | inherited Destroy; 89 | end; 90 | 91 | procedure TLlamaSamplingContext.Reset; 92 | begin 93 | FPrev.Clear; 94 | FCur.Clear; 95 | if Assigned(FGrammar) then 96 | FGrammar.Reset; 97 | end; 98 | 99 | function TLlamaSamplingContext.Copy: TLlamaSamplingContext; 100 | begin 101 | Result := TLlamaSamplingContext.Create; 102 | Result.Params := FParams; 103 | Result.MirostatMu := FMirostatMu; 104 | Result.Grammar := FGrammar; 105 | Result.Prev := TList.Create(FPrev); 106 | Result.Cur := TList.Create(FCur); 107 | end; 108 | 109 | function TLlamaSamplingContext.Last: Int32; 110 | begin 111 | if FPrev.Count > 0 then 112 | Result := FPrev.Last 113 | else 114 | Result := -1; 115 | end; 116 | 117 | function TLlamaSamplingContext.PrevStr(CtxMain: TLlamaContext; N: Int32): string; 118 | var 119 | Tokens: TArray; 120 | begin 121 | Tokens := FPrev.Skip(FPrev.Count - N); 122 | Result := TEncoding.UTF8.GetString(CtxMain.Model.Detokenize(Tokens)); 123 | end; 124 | 125 | function TLlamaSamplingContext.Sample(const ACtxMain: TLlamaContext; 126 | const AIdx: Int32 = 0; ALogitsArray: TArray = nil): Int32; 127 | var 128 | I: integer; 129 | LNVocab: integer; 130 | LLogits: PLogitArray; 131 | LLogitsArray: TArray; 132 | LLogitPair: TPair; 133 | LTokenDataArray: TLlamaTokenDataArray; 134 | LNlToken: Integer; 135 | LNlLogit: Single; 136 | LLastTokens: TArray; 137 | LLastTokensSize: Integer; 138 | LMirostatM: Integer; 139 | LMinKeep: Integer; 140 | begin 141 | LNVocab := ACtxMain.Model.NVocab(); 142 | 143 | if not Assigned(ALogitsArray) then 144 | begin 145 | LLogits := ACtxMain.GetLogitsIth(AIdx); 146 | SetLength(LLogitsArray, SizeOf(single) * LNVocab); 147 | for I := Low(LLogitsArray) to High(LLogitsArray) do 148 | {$R-} 149 | LLogitsArray[I] := LLogits[I]; 150 | {$R+} 151 | end; 152 | 153 | for LLogitPair in FParams.LogitBias do 154 | begin 155 | LLogitsArray[LLogitPair.Key] := LLogitsArray[LLogitPair.Key] 156 | + LLogitPair.Value; 157 | end; 158 | 159 | LTokenDataArray := TLlamaTokenDataArray.Create(LNVocab); 160 | try 161 | LTokenDataArray.CopyLogits(LLogitsArray); 162 | 163 | if FPrev.Count > 0 then 164 | begin 165 | LNlToken := ACtxMain.Model.TokenNL(); 166 | LNlLogit := LLogitsArray[LNlToken]; 167 | LLastTokens := TArrayHelper.Slice(FPrev.ToArray(), - FParams.PenaltyLastN); 168 | LLastTokensSize := Min(Length(LLastTokens), FParams.PenaltyLastN); 169 | 170 | if LLastTokensSize > 0 then 171 | ACtxMain.SampleRepetitionPenalties( 172 | LTokenDataArray, 173 | TLlamaTokenArray(LLastTokens[0]), 174 | LLastTokensSize, 175 | FParams.PenaltyRepeat, 176 | FParams.PenaltyFreq, 177 | FParams.PenaltyPresent 178 | ); 179 | 180 | if not FParams.PenalizeNL then 181 | LTokenDataArray.CandidatesData[LNlToken].Logit := LNlLogit; 182 | end; 183 | 184 | if Assigned(FGrammar) then 185 | ACtxMain.SampleGrammar(LTokenDataArray, FGrammar); 186 | 187 | if FParams.Temp < 0 then 188 | begin 189 | ACtxMain.SampleSoftmax(LTokenDataArray); 190 | Result := LTokenDataArray.CandidatesData[0].Id; 191 | end 192 | else if FParams.Temp = 0 then 193 | Result := ACtxMain.SampleTokenGreedy(LTokenDataArray) 194 | else 195 | begin 196 | if FParams.Mirostat = 1 then 197 | begin 198 | LMirostatM := 100; 199 | ACtxMain.SampleTemp(LTokenDataArray, FParams.Temp); 200 | Result := ACtxMain.SampleTokenMirostat( 201 | LTokenDataArray, 202 | FParams.MirostatTau, 203 | FParams.MirostatEta, 204 | LMirostatM, 205 | @FMirostatMu 206 | ); 207 | end 208 | else 209 | begin 210 | LMinKeep := Max(1, FParams.NProbs); 211 | ACtxMain.SampleTopK(LTokenDataArray, FParams.TopK, LMinKeep); 212 | ACtxMain.SampleTypical(LTokenDataArray, FParams.TypicalP, LMinKeep); 213 | ACtxMain.SampleTopP(LTokenDataArray, FParams.TopP, LMinKeep); 214 | ACtxMain.SampleMinP(LTokenDataArray, FParams.MinP, LMinKeep); 215 | ACtxMain.SampleTemp(LTokenDataArray, FParams.Temp); 216 | Result := ACtxMain.SampleToken(LTokenDataArray); 217 | end; 218 | end; 219 | finally 220 | LTokenDataArray.Free(); 221 | end; 222 | end; 223 | 224 | procedure TLlamaSamplingContext.Accept(ACtxMain: TLlamaContext; AId: Int32; AApplyGrammar: Boolean); 225 | begin 226 | if AApplyGrammar and Assigned(FGrammar) then 227 | ACtxMain.GrammarAcceptToken(FGrammar, AId); 228 | FPrev.Add(AId); 229 | end; 230 | 231 | end. 232 | -------------------------------------------------------------------------------- /src/Common/Sampling/LlamaCpp.Common.Sampling.CustomSampler.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Sampling.CustomSampler; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | Generics.Collections, 8 | LlamaCpp.CType.Llama; 9 | 10 | type 11 | TApplyFunc = reference to procedure(const ATokenDataArray: PLlamaTokenDataArray); 12 | 13 | TCustomSampler = class 14 | private 15 | FSampler: TLlamaSampler; 16 | FSampelrI: TLlamaSamplerI; 17 | FApplyFunc: TApplyFunc; 18 | private 19 | class procedure Apply(ASmpl: PLlamaSampler; ACurrProb: PLlamaTokenDataArray); cdecl; static; 20 | public 21 | constructor Create(const AApplyFunc: TApplyFunc); 22 | destructor Destroy; override; 23 | 24 | function GetSampler: PLlamaSampler; 25 | end; 26 | 27 | implementation 28 | 29 | uses 30 | LlamaCpp.Api.Llama; 31 | 32 | { TCustomSampler } 33 | 34 | constructor TCustomSampler.Create(const AApplyFunc: TApplyFunc); 35 | begin 36 | inherited Create; 37 | FSampler := Default(TLlamaSampler); 38 | FSampelrI := Default(TLlamaSamplerI); 39 | FApplyFunc := AApplyFunc; 40 | 41 | FSampelrI.Apply := @TCustomSampler.Apply; 42 | FSampelrI.name := nil; 43 | FSampelrI.accept := nil; 44 | FSampelrI.reset := nil; 45 | FSampelrI.clone := nil; 46 | FSampelrI.free := nil; 47 | 48 | FSampler.iface := @FSampelrI; 49 | FSampler.ctx := Self; 50 | end; 51 | 52 | destructor TCustomSampler.Destroy; 53 | begin 54 | inherited Destroy; 55 | end; 56 | 57 | class procedure TCustomSampler.Apply(ASmpl: PLlamaSampler; 58 | ACurrProb: PLlamaTokenDataArray); 59 | var 60 | LCustomSampler: TCustomSampler; 61 | begin 62 | LCustomSampler := TCustomSampler(ASmpl.Ctx); 63 | 64 | if Assigned(LCustomSampler.FApplyFunc) then 65 | LCustomSampler.FApplyFunc(ACurrProb); 66 | end; 67 | 68 | function TCustomSampler.GetSampler: PLlamaSampler; 69 | begin 70 | Result := @FSampler; 71 | end; 72 | 73 | end. 74 | -------------------------------------------------------------------------------- /src/Common/Sampling/LlamaCpp.Common.Sampling.Params.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Sampling.Params; 2 | 3 | interface 4 | 5 | uses 6 | System.Generics.Collections; 7 | 8 | type 9 | TLlamaSamplingParams = class 10 | private 11 | FNPrev: Int32; 12 | FNProbs: Int32; 13 | FTopK: Int32; 14 | FTopP: Single; 15 | FMinP: Single; 16 | FTFS_Z: Single; 17 | FTypicalP: Single; 18 | FTemp: Single; 19 | FPenaltyLastN: Int32; 20 | FPenaltyRepeat: Single; 21 | FPenaltyFreq: Single; 22 | FPenaltyPresent: Single; 23 | FMirostat: Int32; 24 | FMirostatTau: Single; 25 | FMirostatEta: Single; 26 | FPenalizeNL: Boolean; 27 | FGrammar: string; 28 | FCFGNegativePrompt: string; 29 | FCFGScale: Single; 30 | FLogitBias: TDictionary; 31 | public 32 | constructor Create(); 33 | destructor Destroy(); override; 34 | 35 | property NPrev: Int32 read FNPrev write FNPrev; 36 | property NProbs: Int32 read FNProbs write FNProbs; 37 | property TopK: Int32 read FTopK write FTopK; 38 | property TopP: Single read FTopP write FTopP; 39 | property MinP: Single read FMinP write FMinP; 40 | property TFS_Z: Single read FTFS_Z write FTFS_Z; 41 | property TypicalP: Single read FTypicalP write FTypicalP; 42 | property Temp: Single read FTemp write FTemp; 43 | property PenaltyLastN: Int32 read FPenaltyLastN write FPenaltyLastN; 44 | property PenaltyRepeat: Single read FPenaltyRepeat write FPenaltyRepeat; 45 | property PenaltyFreq: Single read FPenaltyFreq write FPenaltyFreq; 46 | property PenaltyPresent: Single read FPenaltyPresent write FPenaltyPresent; 47 | property Mirostat: Int32 read FMirostat write FMirostat; 48 | property MirostatTau: Single read FMirostatTau write FMirostatTau; 49 | property MirostatEta: Single read FMirostatEta write FMirostatEta; 50 | property PenalizeNL: Boolean read FPenalizeNL write FPenalizeNL; 51 | property Grammar: string read FGrammar write FGrammar; 52 | property CFGNegativePrompt: string read FCFGNegativePrompt write FCFGNegativePrompt; 53 | property CFGScale: Single read FCFGScale write FCFGScale; 54 | property LogitBias: TDictionary read FLogitBias write FLogitBias; 55 | end; 56 | 57 | implementation 58 | 59 | { TLlamaSamplingParams } 60 | 61 | constructor TLlamaSamplingParams.Create; 62 | begin 63 | FNPrev := 64; 64 | FNProbs := 0; 65 | FTopK := 40; 66 | FTopP := 0.95; 67 | FMinP := 0.05; 68 | FTFS_Z := 1.00; 69 | FTypicalP := 1.00; 70 | FTemp := 0.80; 71 | FPenaltyLastN := 64; 72 | FPenaltyRepeat := 1.0; 73 | FPenaltyFreq := 0.00; 74 | FPenaltyPresent := 0.00; 75 | FMirostat := 0; 76 | FMirostatTau := 5.00; 77 | FMirostatEta := 0.10; 78 | FPenalizeNL := True; 79 | FGrammar := ''; 80 | FCFGNegativePrompt := ''; 81 | FCFGScale := 1.00; 82 | FLogitBias := TDictionary.Create; 83 | end; 84 | 85 | destructor TLlamaSamplingParams.Destroy; 86 | begin 87 | LogitBias.Free(); 88 | inherited; 89 | end; 90 | 91 | end. 92 | -------------------------------------------------------------------------------- /src/Common/Sampling/LlamaCpp.Common.Sampling.Sampler.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Sampling.Sampler; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Classes, 8 | System.Generics.Collections, 9 | LlamaCpp.CType.Llama, 10 | LLamaCpp.Wrapper.LlamaModel, 11 | LLamaCpp.Wrapper.LlamaContext, 12 | LlamaCpp.Common.Types, 13 | LlamaCpp.Common.Sampling.CustomSampler; 14 | 15 | type 16 | TLlamaSampler = class 17 | private 18 | FSampler: PLlamaSampler; // Pointer to the llama sampler chain 19 | FSamplers: TList; 20 | FCustomSamplers: TList>; 21 | 22 | procedure AddSampler(Sampler: PLlamaSampler); 23 | public 24 | constructor Create; 25 | destructor Destroy; override; 26 | 27 | procedure AddGreedy; 28 | procedure AddDist(Seed: UInt32); 29 | procedure AddSoftmax; 30 | procedure AddTopK(K: Integer); 31 | procedure AddTopP(P: Single; MinKeep: Integer); 32 | procedure AddMinP(P: Single; MinKeep: Integer); 33 | procedure AddTypical(P: Single; MinKeep: Integer); 34 | procedure AddTemp(Temp: Single); 35 | procedure AddTempExt(T, Delta, Exponent: Single); 36 | procedure AddMirostat(NVocab, Seed: Integer; Tau, Eta: Single; M: Integer); 37 | procedure AddMirostatV2(Seed: Integer; Tau, Eta: Single); 38 | procedure AddGrammar(Model: TLlamaModel; Grammar: ILlamaGrammar); 39 | procedure AddPenalties( 40 | NVocab, SpecialEOSID, LinefeedID, PenaltyLastN: Integer; 41 | PenaltyRepeat, PenaltyFreq, PenaltyPresent: Single; 42 | PenalizeNL, IgnoreEOS: Boolean); 43 | procedure InitLogitBias( 44 | NVocab, NLogitBias: Integer; LogitBias: PLlamaLogitBias); 45 | procedure AddCustom(ApplyFunc: TApplyFunc); 46 | 47 | function GetSeed: Integer; 48 | function Sample(ACtx: TLlamaContext; AIdx: Integer): Integer; 49 | procedure Close; 50 | end; 51 | 52 | implementation 53 | 54 | uses 55 | LlamaCpp.Api.Llama; 56 | 57 | constructor TLlamaSampler.Create; 58 | var 59 | LParams: TLlamaSamplerChainParams; 60 | begin 61 | inherited Create; 62 | LParams := Default(TLlamaSamplerChainParams); 63 | FSampler := TLlamaApi.Instance.llama_sampler_chain_init(@LParams); 64 | FSamplers := TList.Create; 65 | FCustomSamplers := TList>.Create; 66 | end; 67 | 68 | destructor TLlamaSampler.Destroy; 69 | begin 70 | Close; 71 | FSamplers.Free; 72 | FCustomSamplers.Free; 73 | inherited Destroy; 74 | end; 75 | 76 | procedure TLlamaSampler.AddGreedy; 77 | var 78 | Sampler: PLlamaSampler; 79 | begin 80 | Sampler := TLlamaApi.Instance.llama_sampler_init_greedy(); 81 | AddSampler(Sampler); 82 | end; 83 | 84 | procedure TLlamaSampler.AddDist(Seed: UInt32); 85 | var 86 | Sampler: PLlamaSampler; 87 | begin 88 | Sampler := TLlamaApi.Instance.llama_sampler_init_dist(Seed); 89 | AddSampler(Sampler); 90 | end; 91 | 92 | procedure TLlamaSampler.AddSoftmax; 93 | var 94 | Sampler: PLlamaSampler; 95 | begin 96 | Sampler := TLlamaApi.Instance.llama_sampler_init_softmax(); 97 | AddSampler(Sampler); 98 | end; 99 | 100 | procedure TLlamaSampler.AddTopK(K: Integer); 101 | var 102 | Sampler: PLlamaSampler; 103 | begin 104 | Sampler := TLlamaApi.Instance.llama_sampler_init_top_k(K); 105 | AddSampler(Sampler); 106 | end; 107 | 108 | procedure TLlamaSampler.AddTopP(P: Single; MinKeep: Integer); 109 | var 110 | Sampler: PLlamaSampler; 111 | begin 112 | Sampler := TLlamaApi.Instance.llama_sampler_init_top_p(P, MinKeep); 113 | AddSampler(Sampler); 114 | end; 115 | 116 | procedure TLlamaSampler.AddMinP(P: Single; MinKeep: Integer); 117 | var 118 | Sampler: PLlamaSampler; 119 | begin 120 | Sampler := TLlamaApi.Instance.llama_sampler_init_min_p(P, MinKeep); 121 | AddSampler(Sampler); 122 | end; 123 | 124 | procedure TLlamaSampler.AddTypical(P: Single; MinKeep: Integer); 125 | var 126 | Sampler: PLlamaSampler; 127 | begin 128 | Sampler := TLlamaApi.Instance.llama_sampler_init_typical(P, MinKeep); 129 | AddSampler(Sampler); 130 | end; 131 | 132 | procedure TLlamaSampler.AddTemp(Temp: Single); 133 | var 134 | Sampler: PLlamaSampler; 135 | begin 136 | Sampler := TLlamaApi.Instance.llama_sampler_init_temp(Temp); 137 | AddSampler(Sampler); 138 | end; 139 | 140 | procedure TLlamaSampler.AddTempExt(T, Delta, Exponent: Single); 141 | var 142 | Sampler: PLlamaSampler; 143 | begin 144 | Sampler := TLlamaApi.Instance.llama_sampler_init_temp_ext(T, Delta, Exponent); 145 | AddSampler(Sampler); 146 | end; 147 | 148 | procedure TLlamaSampler.AddMirostat(NVocab, Seed: Integer; Tau, Eta: Single; M: Integer); 149 | var 150 | Sampler: PLlamaSampler; 151 | begin 152 | Sampler := TLlamaApi.Instance.llama_sampler_init_mirostat(NVocab, Seed, Tau, Eta, M); 153 | AddSampler(Sampler); 154 | end; 155 | 156 | procedure TLlamaSampler.AddMirostatV2(Seed: Integer; Tau, Eta: Single); 157 | var 158 | Sampler: PLlamaSampler; 159 | begin 160 | Sampler := TLlamaApi.Instance.llama_sampler_init_mirostat_v2(Seed, Tau, Eta); 161 | AddSampler(Sampler); 162 | end; 163 | 164 | procedure TLlamaSampler.AddGrammar(Model: TLlamaModel; Grammar: ILlamaGrammar); 165 | var 166 | Sampler: PLlamaSampler; 167 | begin 168 | Sampler := TLlamaApi.Instance.llama_sampler_init_grammar( 169 | Model.Model, 170 | PAnsiChar(UTF8Encode(Grammar.Grammar)), 171 | PAnsiChar(UTF8Encode(Grammar.Root)) 172 | ); 173 | AddSampler(Sampler); 174 | end; 175 | 176 | procedure TLlamaSampler.AddPenalties( 177 | NVocab, SpecialEOSID, LinefeedID, PenaltyLastN: Integer; 178 | PenaltyRepeat, PenaltyFreq, PenaltyPresent: Single; 179 | PenalizeNL, IgnoreEOS: Boolean); 180 | var 181 | Sampler: PLlamaSampler; 182 | begin 183 | Sampler := TLlamaApi.Instance.llama_sampler_init_penalties( 184 | NVocab, SpecialEOSID, LinefeedID, PenaltyLastN, 185 | PenaltyRepeat, PenaltyFreq, PenaltyPresent, 186 | PenalizeNL, IgnoreEOS); 187 | AddSampler(Sampler); 188 | end; 189 | 190 | procedure TLlamaSampler.InitLogitBias( 191 | NVocab, NLogitBias: Integer; LogitBias: PLlamaLogitBias); 192 | var 193 | Sampler: PLlamaSampler; 194 | begin 195 | Sampler := TLlamaApi.Instance.llama_sampler_init_logit_bias(NVocab, NLogitBias, LogitBias); 196 | AddSampler(Sampler); 197 | end; 198 | 199 | procedure TLlamaSampler.AddCustom(ApplyFunc: TApplyFunc); 200 | var 201 | LCustomSampler: TCustomSampler; 202 | begin 203 | LCustomSampler := TCustomSampler.Create(ApplyFunc); 204 | try 205 | AddSampler(LCustomSampler.GetSampler()); 206 | FCustomSamplers.Add(TPair.Create( 207 | TLlamaApi.Instance.llama_sampler_chain_n(FSampler) - 1, LCustomSampler)); 208 | except 209 | on E: Exception do 210 | begin 211 | LCustomSampler.Free(); 212 | raise; 213 | end; 214 | end; 215 | end; 216 | 217 | procedure TLlamaSampler.AddSampler(Sampler: PLlamaSampler); 218 | begin 219 | Assert(FSampler <> nil); 220 | TLlamaApi.Instance.llama_sampler_chain_add(FSampler, Sampler); 221 | FSamplers.Add(Sampler); 222 | end; 223 | 224 | function TLlamaSampler.GetSeed: Integer; 225 | begin 226 | Assert(FSampler <> nil); 227 | Result := TLlamaApi.Instance.llama_sampler_get_seed(FSampler); 228 | end; 229 | 230 | function TLlamaSampler.Sample(ACtx: TLlamaContext; AIdx: Integer): Integer; 231 | begin 232 | Assert(FSampler <> nil); 233 | Result := TLlamaApi.Instance.llama_sampler_sample(FSampler, ACtx.Context, AIdx); 234 | end; 235 | 236 | procedure TLlamaSampler.Close; 237 | var 238 | LPair: TPair; 239 | begin 240 | if FSampler <> nil then 241 | begin 242 | for LPair in FCustomSamplers do 243 | begin 244 | TLlamaApi.Instance.llama_sampler_chain_remove(FSampler, LPair.Key); 245 | LPair.Value.Free(); 246 | end; 247 | 248 | TLlamaApi.Instance.llama_sampler_free(FSampler); 249 | FSampler := nil; 250 | end; 251 | FSamplers.Clear; 252 | FCustomSamplers.Clear; 253 | end; 254 | 255 | end. 256 | -------------------------------------------------------------------------------- /src/Common/Speculative/LlamaCpp.Common.Speculative.LookupDecoding.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Speculative.LookupDecoding; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Classes, 8 | System.Generics.Collections, 9 | System.Math, 10 | LlamaCpp.Common.Types; 11 | 12 | type 13 | TLlamaPromptLookupDecoding = class(TInterfacedObject, ILlamaDraftModel) 14 | private 15 | FMaxNGramSize: Integer; 16 | FNumPredTokens: Integer; 17 | 18 | // Function to simulate "np.lib.stride_tricks.sliding_window_view" 19 | function CreateSlidingWindow(const AInputIds: TArray; 20 | const ANgramSize: Integer): TArray>; 21 | 22 | function GetNgramArray(const AInputIds: TArray; 23 | const ANgramSize: Integer): TArray; 24 | function CompareWindowsWithNgram( 25 | const SlidingWindows: TArray>; 26 | const ANgramSize: TArray): TArray; 27 | function FindMatchIndices(const AMatches: TArray): TList; 28 | function GetPredictedTokens(const AInputIds: TArray; 29 | const AMatchIndices: TList; 30 | const ANgramSize: Integer): TArray; 31 | 32 | function FindCandidatePredTokens( 33 | const AInputIds: TArray): TArray; 34 | public 35 | constructor Create(const AMaxNGramSize: Integer = 2; 36 | const ANumPredTokens: Integer = 10); 37 | 38 | function Execute(const AInputIds: TArray): TArray; 39 | end; 40 | 41 | implementation 42 | 43 | { TLlamaPromptLookupDecoding } 44 | 45 | constructor TLlamaPromptLookupDecoding.Create( 46 | const AMaxNGramSize: Integer = 2; const ANumPredTokens: Integer = 10); 47 | begin 48 | inherited Create; 49 | FMaxNGramSize := AMaxNGramSize; 50 | FNumPredTokens := ANumPredTokens; 51 | end; 52 | 53 | function TLlamaPromptLookupDecoding.CreateSlidingWindow( 54 | const AInputIds: TArray; 55 | const ANgramSize: Integer): TArray>; 56 | var 57 | I: integer; 58 | J: integer; 59 | begin 60 | if Length(AInputIds) < ANgramSize then 61 | raise Exception.Create('Ngram size is larger than the input length.'); 62 | 63 | SetLength(Result, Length(AInputIds) - ANgramSize + 1); 64 | 65 | // Create sliding windows 66 | for I := Low(Result) to High(Result) do 67 | begin 68 | SetLength(Result[I], ANgramSize); 69 | for J := 0 to ANgramSize - 1 do 70 | begin 71 | Result[I][J] := AInputIds[I + J]; 72 | end; 73 | end; 74 | end; 75 | 76 | function TLlamaPromptLookupDecoding.GetNgramArray( 77 | const AInputIds: TArray; const ANgramSize: Integer): TArray; 78 | var 79 | I: Integer; 80 | begin 81 | SetLength(Result, ANgramSize); 82 | for I := 0 to ANgramSize - 1 do 83 | Result[I] := AInputIds[Length(AInputIds) - ANgramSize + I]; 84 | end; 85 | 86 | function TLlamaPromptLookupDecoding.CompareWindowsWithNgram( 87 | const SlidingWindows: TArray>; 88 | const ANgramSize: TArray): TArray; 89 | var 90 | I: Integer; 91 | J: Integer; 92 | begin 93 | SetLength(Result, Length(SlidingWindows)); 94 | for I := Low(SlidingWindows) to High(SlidingWindows) do 95 | begin 96 | Result[I] := True; 97 | for J := Low(ANgramSize) to High(ANgramSize) do 98 | begin 99 | if SlidingWindows[I][J] <> ANgramSize[J] then 100 | begin 101 | Result[I] := False; 102 | Break; 103 | end; 104 | end; 105 | end; 106 | end; 107 | 108 | function TLlamaPromptLookupDecoding.FindMatchIndices( 109 | const AMatches: TArray): TList; 110 | var 111 | I: Integer; 112 | begin 113 | Result := TList.Create; 114 | for I := 0 to High(AMatches) do 115 | if AMatches[I] then 116 | Result.Add(I); 117 | end; 118 | 119 | function TLlamaPromptLookupDecoding.GetPredictedTokens( 120 | const AInputIds: TArray; 121 | const AMatchIndices: TList; 122 | const ANgramSize: Integer): TArray; 123 | var 124 | LStartIdx: integer; 125 | LEndIdx: integer; 126 | I: integer; 127 | J: integer; 128 | begin 129 | for I := 0 to AMatchIndices.Count - 1 do 130 | begin 131 | LStartIdx := AMatchIndices[I] + ANgramSize; 132 | LEndIdx := LStartIdx + FNumPredTokens; 133 | LEndIdx := Min(LEndIdx, Length(AInputIds)); 134 | 135 | if LStartIdx < LEndIdx then 136 | begin 137 | SetLength(Result, LEndIdx - LStartIdx); 138 | for J := 0 to LEndIdx - LStartIdx - 1 do 139 | Result[J] := AInputIds[LStartIdx + J]; 140 | Exit; 141 | end; 142 | end; 143 | 144 | // If no valid predicted tokens found, return an empty array 145 | Result := nil; 146 | end; 147 | 148 | function TLlamaPromptLookupDecoding.FindCandidatePredTokens( 149 | const AInputIds: TArray): TArray; 150 | var 151 | LNgramSize: integer; 152 | LInputLength: integer; 153 | LSlidingWindows: TArray>; 154 | LNgramArray: TArray; 155 | LMatches: TArray; 156 | LMatchIndices: TList; 157 | begin 158 | LInputLength := Length(AInputIds); 159 | 160 | LMatchIndices := TList.Create; 161 | try 162 | // Iterate over ngram sizes, from max size down to 1 163 | for LNgramSize := Min(FMaxNGramSize, LInputLength - 1) downto 1 do 164 | begin 165 | // Create the sliding windows 166 | LSlidingWindows := CreateSlidingWindow(AInputIds, LNgramSize); 167 | 168 | // Generate the n-gram array for comparison 169 | LNgramArray := GetNgramArray(AInputIds, LNgramSize); 170 | 171 | // Compare the windows with the n-gram 172 | LMatches := CompareWindowsWithNgram(LSlidingWindows, LNgramArray); 173 | 174 | // Find the match indices 175 | LMatchIndices := FindMatchIndices(LMatches); 176 | 177 | // Extract the predicted tokens based on match indices 178 | Result := GetPredictedTokens(AInputIds, LMatchIndices, LNgramSize); 179 | 180 | if Length(Result) > 0 then 181 | Exit; 182 | end; 183 | 184 | Result := nil; 185 | finally 186 | LMatchIndices.Free; 187 | end; 188 | end; 189 | 190 | function TLlamaPromptLookupDecoding.Execute(const AInputIds: TArray): TArray; 191 | begin 192 | Result := FindCandidatePredTokens(AInputIds); 193 | end; 194 | 195 | 196 | end. 197 | -------------------------------------------------------------------------------- /src/Common/Tokenizer/LlamaCpp.Common.Tokenizer.Base.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Tokenizer.Base; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | LlamaCpp.Common.Types; 8 | 9 | type 10 | TBaseLlamaTokenizer = class(TInterfacedObject, ILlamaTokenizer) 11 | public 12 | function Tokenize( 13 | const AText: TBytes; 14 | const AAddSpecial: boolean = true; 15 | const AParseSpecial: boolean = false) 16 | : TArray; virtual; abstract; 17 | function Detokenize( 18 | const ATokens: TArray; 19 | const APrevTokens: TArray = nil; 20 | const ASpecial: boolean = false) 21 | : TBytes; virtual; abstract; 22 | 23 | function Encode( 24 | const AText: string; 25 | const AAddSpecial: boolean = true; 26 | const AParseSpecial: boolean = false) 27 | : TArray; virtual; abstract; 28 | function Decode( 29 | const ATokens: TArray; 30 | const APrevTokens: TArray = nil; 31 | const ASpecial: boolean = false) 32 | : string; virtual; abstract; 33 | end; 34 | 35 | implementation 36 | 37 | end. 38 | -------------------------------------------------------------------------------- /src/Common/Tokenizer/LlamaCpp.Common.Tokenizer.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Common.Tokenizer; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | LlamaCpp.Wrapper.LlamaModel, 8 | LlamaCpp.Common.Tokenizer.Base; 9 | 10 | type 11 | TLlamaTokenizer = class(TBaseLlamaTokenizer) 12 | private 13 | FModel: TLlamaModel; // Interface or class representing the llama model 14 | public 15 | constructor Create(AModel: TLlamaModel); 16 | 17 | function Tokenize( 18 | const AText: TBytes; 19 | const AAddSpecial: boolean; 20 | const AParseSpecial: boolean) 21 | : TArray; override; 22 | function Detokenize( 23 | const ATokens: TArray; 24 | const APrevTokens: TArray = nil; 25 | const ASpecial: boolean = false) 26 | : TBytes; override; 27 | 28 | function Encode( 29 | const AText: string; 30 | const AAddSpecial: boolean = true; 31 | const AParseSpecial: boolean = false): TArray; override; 32 | function Decode( 33 | const ATokens: TArray; 34 | const APrevTokens: TArray = nil; 35 | const ASpecial: boolean = false) 36 | : string; override; 37 | end; 38 | 39 | implementation 40 | 41 | { TLlamaTokenizer } 42 | 43 | constructor TLlamaTokenizer.Create(AModel: TLlamaModel); 44 | begin 45 | inherited Create; 46 | FModel := AModel; 47 | end; 48 | 49 | function TLlamaTokenizer.Tokenize(const AText: TBytes; 50 | const AAddSpecial, AParseSpecial: boolean): TArray; 51 | begin 52 | Result := FModel.Tokenize(AText, AAddSpecial, AParseSpecial); 53 | end; 54 | 55 | function TLlamaTokenizer.Detokenize(const ATokens: TArray; 56 | const APrevTokens: TArray; const ASpecial: boolean): TBytes; 57 | begin 58 | Result := FModel.Detokenize(ATokens, ASpecial); 59 | end; 60 | 61 | function TLlamaTokenizer.Encode(const AText: string; const AAddSpecial, 62 | AParseSpecial: boolean): TArray; 63 | begin 64 | Result := Tokenize( 65 | TEncoding.UTF8.Convert( 66 | TEncoding.Unicode, 67 | TEncoding.UTF8, 68 | TEncoding.Unicode.GetBytes(AText)), 69 | AAddSpecial, 70 | AParseSpecial); 71 | end; 72 | 73 | function TLlamaTokenizer.Decode(const ATokens: TArray; 74 | const APrevTokens: TArray = nil; 75 | const ASpecial: boolean = false): string; 76 | begin 77 | try 78 | Result := TEncoding.UTF8.GetString( 79 | Detokenize(ATokens, APrevTokens, ASpecial)); 80 | except 81 | on E: EEncodingError do 82 | Result := String.Empty; 83 | end; 84 | end; 85 | 86 | end. 87 | -------------------------------------------------------------------------------- /src/LlamaCpp.ChatCompletion.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.ChatCompletion; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Common.Types, 9 | LlamaCpp.Common.Settings, 10 | LlamaCpp.Common.Chat.Types, 11 | LlamaCpp.Types; 12 | 13 | type 14 | TLlamaChatCompletion = class(TInterfacedObject, ILlamaChatCompletion) 15 | private 16 | FSettings: TLlamaSettings; 17 | FTokenization: ILlamaTokenization; 18 | FCompletion: ILlamaCompletion; 19 | FChatHandler: ILlamaChatCompletionHandler; 20 | FChatHandlers: TDictionary; 21 | private 22 | function TokenizationTask( 23 | const AText: string; 24 | const AAddSpecial: boolean = true; 25 | const AParseSpecial: boolean = false) 26 | : TArray; 27 | function CreateCompletionTask( 28 | const ATokens: TArray; 29 | ASettings: TLlamaCompletionSettings; 30 | const AStoppingCriteria: IStoppingCriteriaList = nil; 31 | const ALogitsProcessor: ILogitsProcessorList = nil; 32 | const AGrammar: ILlamaGrammar = nil) 33 | : TCreateCompletionResponse; 34 | procedure CreateCompletionTaskAsync( 35 | const ATokens: TArray; 36 | ASettings: TLlamaCompletionSettings; 37 | const ACallback: TCompletionCallback; 38 | const AStoppingCriteria: IStoppingCriteriaList = nil; 39 | const ALogitsProcessor: ILogitsProcessorList = nil; 40 | const AGrammar: ILlamaGrammar = nil); 41 | 42 | function GetChatCompletionHandler(): ILlamaChatCompletionHandler; 43 | public 44 | constructor Create(const ALlama: ILlama); 45 | 46 | function CreateChatCompletion( 47 | const ASettings: TLlamaChatCompletionSettings; 48 | const AStoppingCriteria: IStoppingCriteriaList = nil; 49 | const ALogitsProcessor: ILogitsProcessorList = nil; 50 | const AGrammar: ILlamaGrammar = nil) 51 | : TCreateChatCompletionResponse; overload; 52 | procedure CreateChatCompletion( 53 | const ASettings: TLlamaChatCompletionSettings; 54 | const ACallback: TChatCompletionCallback; 55 | const AStoppingCriteria: IStoppingCriteriaList = nil; 56 | const ALogitsProcessor: ILogitsProcessorList = nil; 57 | const AGrammar: ILlamaGrammar = nil); overload; 58 | end; 59 | 60 | implementation 61 | 62 | uses 63 | LlamaCpp.Common.Chat.Completion.Collection; 64 | 65 | { TLlamaChatCompletion } 66 | 67 | constructor TLlamaChatCompletion.Create(const ALlama: ILlama); 68 | begin 69 | FSettings := ALlama.Settings; 70 | FChatHandler := ALlama.ChatHandler; 71 | FChatHandlers := ALlama.ChatHandlers; 72 | FTokenization := (ALlama as ILlamaTokenization); 73 | FCompletion := (ALlama as ILlamaCompletion); 74 | end; 75 | 76 | function TLlamaChatCompletion.TokenizationTask(const AText: string; 77 | const AAddSpecial, AParseSpecial: boolean): TArray; 78 | begin 79 | Result := FTokenization.Encode(AText, AAddSpecial, AParseSpecial); 80 | end; 81 | 82 | function TLlamaChatCompletion.CreateCompletionTask( 83 | const ATokens: TArray; 84 | ASettings: TLlamaCompletionSettings; 85 | const AStoppingCriteria: IStoppingCriteriaList = nil; 86 | const ALogitsProcessor: ILogitsProcessorList = nil; 87 | const AGrammar: ILlamaGrammar = nil): TCreateCompletionResponse; 88 | begin 89 | Result := FCompletion.CreateCompletion(ATokens, ASettings, 90 | AStoppingCriteria, ALogitsProcessor, AGrammar); 91 | end; 92 | 93 | procedure TLlamaChatCompletion.CreateCompletionTaskAsync( 94 | const ATokens: TArray; ASettings: TLlamaCompletionSettings; 95 | const ACallback: TCompletionCallback; 96 | const AStoppingCriteria: IStoppingCriteriaList = nil; 97 | const ALogitsProcessor: ILogitsProcessorList = nil; 98 | const AGrammar: ILlamaGrammar = nil); 99 | begin 100 | FCompletion.CreateCompletion(ATokens, ASettings, ACallback, 101 | AStoppingCriteria, ALogitsProcessor, AGrammar); 102 | end; 103 | 104 | function TLlamaChatCompletion.GetChatCompletionHandler: ILlamaChatCompletionHandler; 105 | begin 106 | if Assigned(FChatHandler) then 107 | Result := FChatHandler 108 | else if FChatHandlers.ContainsKey(FSettings.ChatFormat) then 109 | Result := FChatHandlers[FSettings.ChatFormat] 110 | else 111 | Result := TLlamaChatCompletionCollection 112 | .Instance.GetChatCompletionHandler(FSettings.ChatFormat); 113 | end; 114 | 115 | function TLlamaChatCompletion.CreateChatCompletion( 116 | const ASettings: TLlamaChatCompletionSettings; 117 | const AStoppingCriteria: IStoppingCriteriaList = nil; 118 | const ALogitsProcessor: ILogitsProcessorList = nil; 119 | const AGrammar: ILlamaGrammar = nil): TCreateChatCompletionResponse; 120 | var 121 | LHandler: ILlamaChatCompletionHandler; 122 | begin 123 | LHandler := GetChatCompletionHandler(); 124 | 125 | Result := LHandler.Handle( 126 | ASettings, TokenizationTask, CreateCompletionTask, 127 | AStoppingCriteria, ALogitsProcessor, AGrammar); 128 | end; 129 | 130 | procedure TLlamaChatCompletion.CreateChatCompletion( 131 | const ASettings: TLlamaChatCompletionSettings; 132 | const ACallback: TChatCompletionCallback; 133 | const AStoppingCriteria: IStoppingCriteriaList = nil; 134 | const ALogitsProcessor: ILogitsProcessorList = nil; 135 | const AGrammar: ILlamaGrammar = nil); 136 | var 137 | LHandler: ILlamaChatCompletionHandler; 138 | begin 139 | LHandler := GetChatCompletionHandler(); 140 | 141 | LHandler.Handle( 142 | ASettings, TokenizationTask, CreateCompletionTaskAsync, ACallback, 143 | AStoppingCriteria, ALogitsProcessor, AGrammar); 144 | end; 145 | 146 | end. 147 | -------------------------------------------------------------------------------- /src/LlamaCpp.Embedding.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Embedding; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Api.Llama, 9 | LlamaCpp.CType.Llama, 10 | LlamaCpp.Wrapper.LlamaModel, 11 | LlamaCpp.Wrapper.LlamaContext, 12 | LlamaCpp.Wrapper.LlamaBatch, 13 | LlamaCpp.Common.Settings, 14 | LlamaCpp.Common.Chat.Types, 15 | LlamaCpp.Types; 16 | 17 | type 18 | TLlamaEmbedding = class(TInterfacedObject, ILlamaEmbedding) 19 | private 20 | FModel: TLlamaModel; 21 | FContext: TLlamaContext; 22 | FBatch: TLlamaBatch; 23 | FSettings: TLlamaSettings; 24 | FModelPath: string; 25 | FTokenization: ILlamaTokenization; 26 | [weak] 27 | FLlama: ILlama; 28 | private 29 | procedure DecodeBatch(const ASeqSizes: TArray; 30 | const ANormalize: boolean; const AData: TList>); 31 | public 32 | constructor Create(const ALlama: ILlama); 33 | 34 | function Embed( 35 | const AInput: TArray; 36 | out AReturnCount: integer; 37 | const ANormalize: boolean = false; 38 | const ATruncate: boolean = true) 39 | : TArray>; 40 | function CreateEmbedding(const AInput: TArray; 41 | AModelName: string = '') 42 | : TCreateEmbeddingResponse; 43 | end; 44 | 45 | implementation 46 | 47 | uses 48 | LlamaCpp.Helper; 49 | 50 | { TLlamaEmbedding } 51 | 52 | constructor TLlamaEmbedding.Create(const ALlama: ILlama); 53 | begin 54 | FModel := ALlama.Model; 55 | FContext := ALlama.Context; 56 | FBatch := ALlama.Batch; 57 | FSettings := ALlama.Settings; 58 | FModelPath := ALlama.ModelPath; 59 | FTokenization := ALlama as ILlamaTokenization; 60 | FLlama := ALlama; 61 | end; 62 | 63 | procedure TLlamaEmbedding.DecodeBatch(const ASeqSizes: TArray; 64 | const ANormalize: boolean; const AData: TList>); 65 | var 66 | LPos: integer; 67 | LSize: integer; 68 | I: integer; 69 | J: integer; 70 | K: integer; 71 | LPtr: PEmbdArray; 72 | LEmbeddingsList: TList>; 73 | LEmbeddings: TList; 74 | begin 75 | TLlamaApi.Instance.llama_kv_cache_clear(FContext.Context); 76 | FContext.decode(FBatch); 77 | FBatch.Reset(); 78 | 79 | LEmbeddingsList := TList>.Create(); 80 | try 81 | LEmbeddings := TList.Create(); 82 | try 83 | 84 | if FContext.PoolingType() = TLlamaPoolingType.LLAMA_POOLING_TYPE_NONE then 85 | begin 86 | LPos := 0; 87 | 88 | for I := Low(ASeqSizes) to High(ASeqSizes) do 89 | begin 90 | LSize := ASeqSizes[I]; 91 | LPtr := TLlamaApi.Instance.llama_get_embeddings(FContext.Context); 92 | 93 | for J := 0 to LSize - 1 do 94 | begin 95 | 96 | {$R-} 97 | for K := LPos + J * FModel.NEmb to LPos + (j + 1) * FModel.NEmb do 98 | LEmbeddings.Add(LPtr^[K]); 99 | {$R+} 100 | 101 | if ANormalize then 102 | LEmbeddingsList.Add(TEmbedding.Normalize(LEmbeddings.ToArray())) 103 | else 104 | LEmbeddingsList.Add(LEmbeddings.ToArray()); 105 | 106 | LEmbeddings.Clear(); 107 | end; 108 | 109 | AData.AddRange(LEmbeddingsList.ToArray()); 110 | LEmbeddingsList.Clear(); 111 | 112 | LPos := LPos + LSize; 113 | end; 114 | end 115 | else 116 | begin 117 | for I := Low(ASeqSizes) to High(ASeqSizes) do 118 | begin 119 | LPtr := TLlamaApi.Instance.llama_get_embeddings_seq( 120 | FContext.Context, I); 121 | 122 | {$R-} 123 | for J := 0 to FModel.NEmb do 124 | LEmbeddings.Add(LPtr^[J]); 125 | {$R+} 126 | 127 | if ANormalize then 128 | AData.Add(TEmbedding.Normalize(LEmbeddings.ToArray())) 129 | else 130 | AData.Add(LEmbeddings.ToArray()); 131 | 132 | LEmbeddings.Clear(); 133 | end; 134 | end; 135 | finally 136 | LEmbeddings.Free(); 137 | end; 138 | finally 139 | LEmbeddingsList.Free(); 140 | end; 141 | end; 142 | 143 | function TLlamaEmbedding.Embed(const AInput: TArray; 144 | out AReturnCount: integer; const ANormalize, 145 | ATruncate: boolean): TArray>; 146 | var 147 | LNBatch: integer; 148 | LTokens: TArray; 149 | LLogitsAll: boolean; 150 | LData: TList>; 151 | LNTokens: integer; 152 | LSBatch: TArray; 153 | LTBatch: integer; 154 | LPBatch: integer; 155 | I: integer; 156 | begin 157 | LNBatch := FSettings.NBatch; 158 | LLogitsAll := FContext.PoolingType() = TLlamaPoolingType.LLAMA_POOLING_TYPE_NONE; 159 | 160 | if not FSettings.Embeddings then 161 | raise Exception.Create 162 | ('Llama model must be created with embedding=True to call this method'); 163 | 164 | if FSettings.Verbose then 165 | TLlamaApi.Instance.llama_perf_context_reset(FContext.Context); 166 | 167 | FBatch.Reset(); 168 | AReturnCount := 0; 169 | SetLength(LSBatch, 0); 170 | LTBatch := 0; 171 | LPBatch := 0; 172 | 173 | LData := TList >.Create(); 174 | try 175 | for I := Low(AInput) to High(AInput) do 176 | begin 177 | LTokens := FTokenization.Encode(AInput[I], true, false); 178 | if ATruncate and (Length(LTokens) > LNBatch) then 179 | SetLength(LTokens, LNBatch); 180 | 181 | LNTokens := Length(LTokens); 182 | AReturnCount := AReturnCount + LNTokens; 183 | 184 | if LNTokens > LNBatch then 185 | raise Exception.CreateFmt 186 | ('Requested tokens (%d) exceed batch size of %d', 187 | [LNTokens, LNBatch]); 188 | 189 | if LTBatch + LNTokens > LNBatch then 190 | begin 191 | DecodeBatch(LSBatch, ANormalize, LData); 192 | SetLength(LSBatch, 0); 193 | LTBatch := 0; 194 | LPBatch := 0; 195 | end; 196 | 197 | FBatch.AddSequence(LTokens, LPBatch, LLogitsAll); 198 | 199 | SetLength(LSBatch, Length(LSBatch) + 1); 200 | LSBatch[High(LSBatch)] := LNTokens; 201 | LTBatch := LTBatch + LNTokens; 202 | LPBatch := LPBatch + 1; 203 | end; 204 | 205 | DecodeBatch(LSBatch, ANormalize, LData); 206 | 207 | if FSettings.Verbose then 208 | TLlamaApi.Instance.llama_perf_context_print(FContext.Context); 209 | 210 | Result := LData.ToArray(); 211 | 212 | TLlamaApi.Instance.llama_kv_cache_clear(FContext.Context); 213 | 214 | FLlama.Reset(); 215 | finally 216 | LData.Free(); 217 | end; 218 | end; 219 | 220 | function TLlamaEmbedding.CreateEmbedding(const AInput: TArray; 221 | AModelName: string): TCreateEmbeddingResponse; 222 | var 223 | LTotalTokens: integer; 224 | LEmbeddings: TArray>; 225 | begin 226 | if AModelName.IsEmpty() then 227 | AModelName := FModelPath; 228 | 229 | LEmbeddings := Embed(AInput, LTotalTokens); 230 | 231 | Result := TCreateEmbeddingResponse.Create( 232 | AModelName, LEmbeddings, LTotalTokens); 233 | end; 234 | 235 | end. 236 | -------------------------------------------------------------------------------- /src/LlamaCpp.Evaluator.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Evaluator; 2 | 3 | interface 4 | 5 | uses 6 | LlamaCpp.Types, 7 | LlamaCpp.CType.Llama, 8 | LlamaCpp.Wrapper.LlamaContext, 9 | LlamaCpp.Wrapper.LlamaBatch, 10 | LlamaCpp.Common.Types; 11 | 12 | type 13 | TLlamaEvaluator = class(TInterfacedObject, ILlamaEvaluator) 14 | private 15 | FContext: TLlamaContext; 16 | FBatch: TLlamaBatch; 17 | FContextParams: TLlamaContextParams; 18 | [weak] 19 | FLlama: ILlama; 20 | public 21 | constructor Create(const ALlama: ILlama); 22 | 23 | procedure Eval(const ATokens: TArray); 24 | end; 25 | 26 | implementation 27 | 28 | uses 29 | System.Math; 30 | 31 | { TLlamaEvaluator } 32 | 33 | constructor TLlamaEvaluator.Create(const ALlama: ILlama); 34 | begin 35 | FContext := ALlama.Context; 36 | FBatch := ALlama.Batch; 37 | FContextParams := ALlama.ContextParams; 38 | FLlama := ALlama; 39 | end; 40 | 41 | procedure TLlamaEvaluator.Eval(const ATokens: TArray); 42 | var 43 | I: integer; 44 | J: integer; 45 | K: integer; 46 | LIndex: integer; 47 | LNPast: integer; 48 | LNTokens: integer; 49 | //LRows: integer; 50 | //LCols: integer; 51 | LBatch: TArray; 52 | LLogits: PLogitArray; 53 | begin 54 | FContext.KvCacheSeqRm(-1, FLlama.NumberOfTokens, -1); 55 | 56 | for I := 0 to High(ATokens) div FLlama.NumberOfBatches do 57 | begin 58 | LBatch := Copy(ATokens, I * FLlama.NumberOfBatches, 59 | Min(Length(ATokens) - I * FLlama.NumberOfBatches, FLlama.NumberOfBatches)); 60 | LNPast := FLlama.NumberOfTokens; 61 | LNTokens := Length(LBatch); 62 | 63 | FBatch.SetBatch(LBatch, LNPast, FContextParams.LogitsAll); 64 | FContext.Decode(FBatch); 65 | 66 | Move(LBatch[0], FLlama.InputIds[LNPast], LNTokens * SizeOf(integer)); 67 | 68 | if FContextParams.LogitsAll then 69 | begin 70 | //LRows := LNTokens; 71 | //LCols := NVocab; 72 | LLogits := FContext.GetLogits(); 73 | 74 | LIndex := 0; 75 | { TODO : SLOW! Make it better. } 76 | for J := LNPast to LNPast + LNTokens - 1 do 77 | for K := Low(FLlama.Scores[J]) to High(FLlama.Scores[J]) do 78 | begin 79 | {$R-} 80 | FLlama.Scores[J][K] := LLogits^[LIndex]; 81 | {$R+} 82 | Inc(LIndex); 83 | end; 84 | end 85 | else 86 | begin 87 | // Handle case where logits_all is False 88 | // This section is commented out in Python but should be considered here as needed 89 | // For this case, you would handle updating just the last row or other logic. 90 | end; 91 | 92 | FLlama.NumberOfTokens := FLlama.NumberOfTokens + LNTokens; 93 | end; 94 | end; 95 | 96 | end. 97 | -------------------------------------------------------------------------------- /src/LlamaCpp.Exception.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Exception; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils; 7 | 8 | type 9 | ELlama = class(Exception); 10 | 11 | ETensorSplitExceed = class(ELlama); 12 | 13 | ELoraAdapterInitFailure = class(ELlama); 14 | 15 | ELoraAdapterSetFailure = class(ELlama); 16 | 17 | EUnknownValueForKVOverrides = class(ELlama); 18 | 19 | ESaveStateCopy = class(ELlama); 20 | 21 | ESaveStateSet = class(ELlama); 22 | 23 | implementation 24 | 25 | end. 26 | -------------------------------------------------------------------------------- /src/LlamaCpp.Generator.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Generator; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Wrapper.LlamaContext, 9 | LlamaCpp.Common.Types, 10 | LlamaCpp.Common.Settings, 11 | LlamaCpp.Types; 12 | 13 | type 14 | TLlamaGenerator = class(TInterfacedObject, ILlamaGenerator) 15 | private 16 | FContext: TLlamaContext; 17 | FDraftModel: ILlamaDraftModel; 18 | FSampler: ILlamaSampler; 19 | FEvaluator: ILlamaEvaluator; 20 | [weak] 21 | FLlama: ILlama; 22 | public 23 | constructor Create(const ALlama: ILlama); 24 | 25 | procedure Generate( 26 | ATokens: TArray; 27 | const ASettings: TLlamaSamplerSettings; 28 | const ACallback: TGeneratorCallback; 29 | const AReset: boolean = true; 30 | const AStoppingCriteria: IStoppingCriteriaList = nil; 31 | const ALogitsProcessor: ILogitsProcessorList = nil; 32 | const AGrammar: ILlamaGrammar = nil); 33 | end; 34 | 35 | implementation 36 | 37 | uses 38 | System.Math, 39 | LlamaCpp.Common.Sampling.Sampler, 40 | LlamaCpp.Helper; 41 | 42 | { TLlamaGenerator } 43 | 44 | constructor TLlamaGenerator.Create(const ALlama: ILlama); 45 | begin 46 | FContext := ALlama.Context; 47 | FDraftModel := ALlama.DraftModel; 48 | FSampler := ALlama as ILlamaSampler; 49 | FEvaluator := ALlama as ILlamaEvaluator; 50 | FLlama := ALlama; 51 | end; 52 | 53 | procedure TLlamaGenerator.Generate(ATokens: TArray; 54 | const ASettings: TLlamaSamplerSettings; const ACallback: TGeneratorCallback; 55 | const AReset: boolean; const AStoppingCriteria: IStoppingCriteriaList; 56 | const ALogitsProcessor: ILogitsProcessorList; const AGrammar: ILlamaGrammar); 57 | var 58 | I: integer; 59 | J: integer; 60 | LReset: boolean; 61 | LLongestPrefix: integer; 62 | LSampleIdx: integer; 63 | LTokens: TList; 64 | LToken: integer; 65 | LContinue: boolean; 66 | LInputIds: TArray; 67 | LScores: TArray>; 68 | LTokensOrNone: TArray; 69 | LDraftTokens: TArray; 70 | LSampler: TLlamaSampler; 71 | begin 72 | Assert(Assigned(ACallback), 'Param "ACallback" not assigned.'); 73 | 74 | //FMirostatMu := 2.0 * ASettings.MirostatTau; 75 | LReset := AReset; 76 | LContinue := true; 77 | LInputIds := TInputIdHelper.InputId(FLlama.InputIds, FLlama.NumberOfTokens); 78 | LScores := TScoresHelper.Scores(FLlama.Scores, FLlama.NumberOfTokens); 79 | 80 | LSampler := TLlamaSampler.Create(); 81 | try 82 | FSampler.InitSampler( 83 | LInputIds, ASettings, LSampler, ALogitsProcessor, AGrammar); 84 | 85 | if LReset and (FLlama.NumberOfTokens > 0) then 86 | begin 87 | LLongestPrefix := 0; 88 | for I := 0 to Min(High(LInputIds), High(ATokens) - 1) do 89 | begin 90 | if LInputIds[I] = ATokens[I] then 91 | Inc(LLongestPrefix) 92 | else 93 | Break; 94 | end; 95 | 96 | if LLongestPrefix > 0 then 97 | begin 98 | LReset := false; 99 | ATokens := TArrayHelper.Slice(ATokens, LLongestPrefix); 100 | FLlama.NumberOfTokens := LLongestPrefix; 101 | end; 102 | end; 103 | 104 | if LReset then 105 | FLlama.Reset(); 106 | 107 | LSampleIdx := FLlama.NumberOfTokens + Length(ATokens) - 1; 108 | LTokens := TList.Create(ATokens); 109 | try 110 | while true do 111 | begin 112 | FEvaluator.Eval(LTokens.ToArray()); 113 | 114 | while LSampleIdx < FLlama.NumberOfTokens do 115 | begin 116 | LToken := FSampler.Sample(FLlama.NumberOfTokens, ASettings, LSampler, LSampleIdx); 117 | Inc(LSampleIdx); 118 | 119 | if Assigned(AStoppingCriteria) then 120 | begin 121 | LInputIds := TInputIdHelper.InputId(FLlama.InputIds, FLlama.NumberOfTokens); 122 | if AStoppingCriteria.Execute( 123 | TArrayHelper.Slice(LInputIds, Low(LInputIds), LSampleIdx), 124 | LScores[LSampleIdx - FLlama.NumberOfTokens] 125 | ) then 126 | Exit; 127 | end; 128 | 129 | LTokensOrNone := ACallback(LToken, LContinue); 130 | 131 | if not LContinue then 132 | Exit; 133 | 134 | //SetLength(LTokensOrNone, 0); 135 | LTokens.Clear; 136 | LTokens.Add(LToken); 137 | 138 | if Assigned(LTokensOrNone) then 139 | LTokens.AddRange(LTokensOrNone); 140 | 141 | if (LSampleIdx < FLlama.NumberOfTokens) and (LToken <> LInputIds[LSampleIdx]) then 142 | begin 143 | FLlama.NumberOfTokens := LSampleIdx; 144 | FContext.KvCacheSeqRm(-1, FLlama.NumberOfTokens, -1); 145 | Break; 146 | end; 147 | end; 148 | 149 | if Assigned(FDraftModel) then 150 | begin 151 | J := 0; 152 | for I := FLlama.NumberOfTokens to FLlama.NumberOfTokens + LTokens.Count - 1 do 153 | begin 154 | FLlama.InputIds[I] := LTokens[J]; 155 | Inc(J); 156 | end; 157 | 158 | LDraftTokens := FDraftModel.Execute( 159 | TArrayHelper.Slice( 160 | FLlama.InputIds, Low(FLlama.InputIds), FLlama.NumberOfTokens + LTokens.Count) 161 | ); 162 | 163 | if Assigned(LDraftTokens) then 164 | LTokens.AddRange( 165 | TArrayHelper.Slice( 166 | LDraftTokens, 167 | Low(LDraftTokens), 168 | FContext.NCtx() - FLlama.NumberOfTokens - LTokens.Count 169 | ) 170 | ); 171 | end; 172 | end; 173 | finally 174 | LTokens.Free; 175 | end; 176 | finally 177 | LSampler.Free(); 178 | end; 179 | end; 180 | 181 | end. 182 | -------------------------------------------------------------------------------- /src/LlamaCpp.Registration.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Registration; 2 | 3 | interface 4 | 5 | uses 6 | System.Classes, 7 | LlamaCpp.Llama, 8 | LlamaCpp.Download; 9 | 10 | procedure Register(); 11 | 12 | implementation 13 | 14 | procedure Register(); 15 | begin 16 | RegisterComponents('LlamaCpp', [TLlama, TLlamaDownload]); 17 | end; 18 | 19 | end. 20 | -------------------------------------------------------------------------------- /src/LlamaCpp.Sampler.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Sampler; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.Types, 9 | LlamaCpp.Wrapper.LlamaModel, 10 | LlamaCpp.Wrapper.LlamaContext, 11 | LlamaCpp.Common.Types, 12 | LlamaCpp.Common.Settings, 13 | LlamaCpp.Common.Sampling.Sampler; 14 | 15 | type 16 | TLlamaSampler = class(TInterfacedObject, ILlamaSampler) 17 | private 18 | FSettings: TLlamaSettings; 19 | FModel: TLlamaModel; 20 | FContext: TLlamaContext; 21 | public 22 | constructor Create(const ALlama: ILlama); 23 | 24 | procedure InitSampler( 25 | const AInputIds: TArray; 26 | const ASettings: TLlamaSamplerSettings; 27 | const ASampler: LlamaCpp.Common.Sampling.Sampler.TLlamaSampler; 28 | const ALogitsProcessor: ILogitsProcessorList; 29 | const AGrammar: ILlamaGrammar); 30 | 31 | function Sample( 32 | const ANumberOfTokens: integer; 33 | const ASettings: TLlamaSamplerSettings; 34 | const ASampler: LlamaCpp.Common.Sampling.Sampler.TLlamaSampler; 35 | const AIdx: integer = -1): integer; 36 | end; 37 | 38 | implementation 39 | 40 | uses 41 | System.Math, 42 | LlamaCpp.CType.Llama; 43 | 44 | { TLlamaSampler } 45 | 46 | constructor TLlamaSampler.Create(const ALlama: ILlama); 47 | begin 48 | FModel := ALlama.Model; 49 | FContext := ALlama.Context; 50 | FSettings := ALlama.Settings; 51 | end; 52 | 53 | procedure TLlamaSampler.InitSampler(const AInputIds: TArray; 54 | const ASettings: TLlamaSamplerSettings; 55 | const ASampler: LlamaCpp.Common.Sampling.Sampler.TLlamaSampler; 56 | const ALogitsProcessor: ILogitsProcessorList; const AGrammar: ILlamaGrammar); 57 | var 58 | NProbs: integer; 59 | MinKeep: integer; 60 | begin 61 | if Assigned(ALogitsProcessor) then 62 | ASampler.AddCustom( 63 | procedure(const ATokenDataArray: PLlamaTokenDataArray) 64 | var 65 | I: Integer; 66 | LSize: integer; 67 | LDataSOA: PLlamaTokenData; 68 | LLogits: TList; 69 | LCustomLogits: TArray; 70 | begin 71 | LSize := ATokenDataArray.Size; 72 | 73 | LLogits := TList.Create(); 74 | try 75 | LDataSOA := ATokenDataArray.Data; 76 | for I := 0 to LSize - 1 do 77 | begin 78 | LLogits.Add(LDataSOA^.logit); 79 | Inc(LDataSOA); 80 | end; 81 | 82 | LCustomLogits := LLogits.ToArray(); 83 | finally 84 | LLogits.Free(); 85 | end; 86 | 87 | ALogitsProcessor.Execute(AInputIds, LCustomLogits); 88 | 89 | LDataSOA := ATokenDataArray.Data; 90 | for I := 0 to Min(LSize, Length(LCustomLogits)) - 1 do 91 | begin 92 | LDataSOA^.logit := LCustomLogits[I]; 93 | Inc(LDataSOA); 94 | end; 95 | end); 96 | 97 | ASampler.AddPenalties( 98 | FModel.NVocab, FModel.TokenEOS, FModel.TokenNL, FSettings.LastNTokensSize, 99 | ASettings.RepeatPenalty, ASettings.FrequencyPenalty, 100 | ASettings.PresencePenalty, ASettings.PenalizeNL, false); 101 | 102 | if Assigned(AGrammar) then 103 | ASampler.AddGrammar(FModel, AGrammar); 104 | 105 | if ASettings.Temp < 0.0 then 106 | begin 107 | ASampler.AddSoftmax; 108 | ASampler.AddDist(FSettings.Seed); 109 | end 110 | else if ASettings.Temp = 0.0 then 111 | begin 112 | ASampler.AddGreedy; 113 | end 114 | else 115 | begin 116 | if ASettings.MirostatMode = 1 then 117 | begin 118 | ASampler.AddMirostat(FModel.NVocab, FSettings.Seed, ASettings.MirostatTau, 119 | ASettings.MirostatEta, 100); 120 | end 121 | else if ASettings.MirostatMode = 2 then 122 | begin 123 | ASampler.AddMirostatV2(FSettings.Seed, ASettings.MirostatTau, 124 | ASettings.MirostatEta); 125 | end 126 | else 127 | begin 128 | NProbs := 0; 129 | MinKeep := Max(1, NProbs); 130 | ASampler.AddTopK(ASettings.TopK); 131 | ASampler.AddTypical(ASettings.TypicalP, MinKeep); 132 | ASampler.AddTopP(ASettings.TopP, MinKeep); 133 | ASampler.AddMinP(ASettings.MinP, MinKeep); 134 | ASampler.AddTemp(ASettings.Temp); 135 | ASampler.AddDist(FSettings.Seed); 136 | end; 137 | end; 138 | end; 139 | 140 | function TLlamaSampler.Sample(const ANumberOfTokens: integer; 141 | const ASettings: TLlamaSamplerSettings; 142 | const ASampler: LlamaCpp.Common.Sampling.Sampler.TLlamaSampler; 143 | const AIdx: integer): integer; 144 | var 145 | LIdx: integer; 146 | begin 147 | Assert(ANumberOfTokens > 0, 'No tokens available for sampling.'); 148 | 149 | if AIdx >= 0 then 150 | LIdx := AIdx - ANumberOfTokens 151 | else 152 | LIdx := -1; 153 | 154 | Assert(Assigned(FContext), 'Context is not initialized.'); 155 | 156 | Result := ASampler.Sample(FContext, LIdx); 157 | end; 158 | 159 | end. 160 | -------------------------------------------------------------------------------- /src/LlamaCpp.Tokenization.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Tokenization; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | LlamaCpp.Types, 8 | LlamaCpp.Wrapper.LlamaModel, 9 | LlamaCpp.Common.Types; 10 | 11 | type 12 | TLlamaTokenization = class(TInterfacedObject, ILlamaTokenization) 13 | private 14 | FTokenizer: ILlamaTokenizer; 15 | public 16 | constructor Create(const ALlama: ILlama); 17 | 18 | function Tokenize( 19 | const AText: TBytes; 20 | const AAddSpecial: boolean = true; 21 | const AParseSpecial: boolean = false): TArray; 22 | function Detokenize( 23 | const ATokens: TArray; 24 | const APrevTokens: TArray = nil; 25 | const ASpecial: boolean = false): TBytes; overload; 26 | 27 | function Encode( 28 | const AText: string; 29 | const AAddSpecial: boolean = true; 30 | const AParseSpecial: boolean = false) 31 | : TArray; 32 | function Decode( 33 | const ATokens: TArray; 34 | const APrevTokens: TArray = nil; 35 | const ASpecial: boolean = false) 36 | : string; 37 | end; 38 | 39 | implementation 40 | 41 | uses 42 | LlamaCpp.Common.Tokenizer; 43 | 44 | { TLlamaTokenization } 45 | 46 | constructor TLlamaTokenization.Create(const ALlama: ILlama); 47 | begin 48 | if not Assigned(ALlama.Tokenizer) then 49 | FTokenizer := TLlamaTokenizer.Create(ALlama.Model) 50 | else 51 | FTokenizer := ALlama.Tokenizer; 52 | end; 53 | 54 | function TLlamaTokenization.Tokenize(const AText: TBytes; 55 | const AAddSpecial: boolean; const AParseSpecial: boolean): TArray; 56 | begin 57 | Result := FTokenizer.Tokenize(AText, AAddSpecial, AParseSpecial); 58 | end; 59 | 60 | function TLlamaTokenization.Detokenize(const ATokens, 61 | APrevTokens: TArray; const ASpecial: boolean): TBytes; 62 | begin 63 | Result := FTokenizer.Detokenize(ATokens, APrevTokens, ASpecial); 64 | end; 65 | 66 | function TLlamaTokenization.Encode(const AText: string; const AAddSpecial, 67 | AParseSpecial: boolean): TArray; 68 | begin 69 | Result := FTokenizer.Encode(AText, AAddSpecial, AParseSpecial); 70 | end; 71 | 72 | function TLlamaTokenization.Decode(const ATokens: TArray; 73 | const APrevTokens: TArray = nil; 74 | const ASpecial: boolean = false): string; 75 | begin 76 | Result := FTokenizer.Decode(ATokens); 77 | end; 78 | 79 | 80 | end. 81 | -------------------------------------------------------------------------------- /src/LlamaCpp.Types.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Types; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | System.Generics.Collections, 8 | LlamaCpp.CType.Llama, 9 | LlamaCpp.Wrapper.LlamaModel, 10 | LlamaCpp.Wrapper.LlamaContext, 11 | LlamaCpp.Wrapper.LlamaBatch, 12 | LlamaCpp.Common.Types, 13 | LlamaCpp.Common.Settings, 14 | LlamaCpp.Common.State, 15 | LlamaCpp.Common.Sampling.Sampler, 16 | LlamaCpp.Common.Chat.Types; 17 | 18 | type 19 | ILlamaTokenization = interface 20 | ['{370B06E0-073B-4305-A35F-60B32DD93FA0}'] 21 | function Tokenize( 22 | const AText: TBytes; 23 | const AAddSpecial: boolean = true; 24 | const AParseSpecial: boolean = false) 25 | : TArray; 26 | function Detokenize( 27 | const ATokens: TArray; 28 | const APrevTokens: TArray = nil; 29 | const ASpecial: boolean = false) 30 | : TBytes; 31 | 32 | function Encode( 33 | const AText: string; 34 | const AAddSpecial: boolean = true; 35 | const AParseSpecial: boolean = false) 36 | : TArray; 37 | function Decode( 38 | const ATokens: TArray; 39 | const APrevTokens: TArray = nil; 40 | const ASpecial: boolean = false) 41 | : string; 42 | end; 43 | 44 | ILlamaEvaluator = interface 45 | ['{5B29A173-C2E2-42A1-9C38-78882BE84FF2}'] 46 | procedure Eval(const ATokens: TArray); 47 | end; 48 | 49 | ILlamaSampler = interface 50 | ['{2E2E3727-B735-485F-A049-736B2CEB4DE5}'] 51 | procedure InitSampler( 52 | const AInputIds: TArray; 53 | const ASettings: TLlamaSamplerSettings; 54 | const ASampler: TLlamaSampler; 55 | const ALogitsProcessor: ILogitsProcessorList; 56 | const AGrammar: ILlamaGrammar); 57 | 58 | function Sample( 59 | const ANumberOfTokens: integer; 60 | const ASettings: TLlamaSamplerSettings; 61 | const ASampler: LlamaCpp.Common.Sampling.Sampler.TLlamaSampler; 62 | const AIdx: integer = -1): integer; 63 | end; 64 | 65 | TGeneratorCallback = reference to function(const AToken: integer; 66 | var AContinue: boolean): TArray; 67 | 68 | ILlamaGenerator = interface 69 | ['{9939F90B-A942-4FB2-8C6B-11ED34A1A549}'] 70 | procedure Generate( 71 | ATokens: TArray; 72 | const ASettings: TLlamaSamplerSettings; 73 | const ACallback: TGeneratorCallback; 74 | const AReset: boolean = true; 75 | const AStoppingCriteria: IStoppingCriteriaList = nil; 76 | const ALogitsProcessor: ILogitsProcessorList = nil; 77 | const AGrammar: ILlamaGrammar = nil); 78 | end; 79 | 80 | ILlamaEmbedding = interface 81 | ['{5592ACB9-A0AC-4211-A0F7-4FA348E084F8}'] 82 | function Embed( 83 | const AInput: TArray; 84 | out AReturnCount: integer; 85 | const ANormalize: boolean = false; 86 | const ATruncate: boolean = true) 87 | : TArray>; 88 | function CreateEmbedding(const AInput: TArray; 89 | AModelName: string = ''): TCreateEmbeddingResponse; 90 | end; 91 | 92 | ILlamaCompletion = interface 93 | ['{E945EA10-AD4D-4545-9457-E9FB0804D3E5}'] 94 | function CreateCompletion( 95 | const APrompt: string; 96 | ASettings: TLlamaCompletionSettings; 97 | const AStoppingCriteria: IStoppingCriteriaList = nil; 98 | const ALogitsProcessor: ILogitsProcessorList = nil; 99 | const AGrammar: ILlamaGrammar = nil) 100 | : TCreateCompletionResponse; overload; 101 | procedure CreateCompletion( 102 | const APrompt: string; 103 | ASettings: TLlamaCompletionSettings; 104 | const ACallback: TCompletionCallback; 105 | const AStoppingCriteria: IStoppingCriteriaList = nil; 106 | const ALogitsProcessor: ILogitsProcessorList = nil; 107 | const AGrammar: ILlamaGrammar = nil); overload; 108 | function CreateCompletion( 109 | const APrompt: TArray; 110 | ASettings: TLlamaCompletionSettings; 111 | const AStoppingCriteria: IStoppingCriteriaList = nil; 112 | const ALogitsProcessor: ILogitsProcessorList = nil; 113 | const AGrammar: ILlamaGrammar = nil) 114 | : TCreateCompletionResponse; overload; 115 | procedure CreateCompletion( 116 | const APrompt: TArray; 117 | ASettings: TLlamaCompletionSettings; 118 | const ACallback: TCompletionCallback; 119 | const AStoppingCriteria: IStoppingCriteriaList = nil; 120 | const ALogitsProcessor: ILogitsProcessorList = nil; 121 | const AGrammar: ILlamaGrammar = nil); overload; 122 | end; 123 | 124 | ILlamaChatCompletion = interface 125 | ['{352133DB-4AA1-4425-B7AA-239199AC5E6E}'] 126 | function CreateChatCompletion( 127 | const ASettings: TLlamaChatCompletionSettings; 128 | const AStoppingCriteria: IStoppingCriteriaList = nil; 129 | const ALogitsProcessor: ILogitsProcessorList = nil; 130 | const AGrammar: ILlamaGrammar = nil) 131 | : TCreateChatCompletionResponse; overload; 132 | procedure CreateChatCompletion( 133 | const ASettings: TLlamaChatCompletionSettings; 134 | const ACallback: TChatCompletionCallback; 135 | const AStoppingCriteria: IStoppingCriteriaList = nil; 136 | const ALogitsProcessor: ILogitsProcessorList = nil; 137 | const AGrammar: ILlamaGrammar = nil); overload; 138 | end; 139 | 140 | ILlama = interface 141 | ['{EF8CB1BF-678D-498E-BEA6-6CAFFC84DDE5}'] 142 | function GetModelPath(): string; 143 | function GetMetadata(): TMetadata; 144 | function GetBOSToken(): string; 145 | function GetEOSToken(): string; 146 | 147 | function GetNumberOfTokens(): integer; 148 | procedure SetNumberOfTokens(const ANumberOfTokens: integer); 149 | function GetNumberOfBatches(): integer; 150 | 151 | function GetInputIds(): TArray; 152 | procedure SetInputIds(const AInputIds: TArray); 153 | 154 | function GetScores(): TArray>; 155 | procedure SetScores(const AScores: TArray>); 156 | 157 | function GetModelParams(): TLlamaModelParams; 158 | function GetModel(): TLlamaModel; 159 | 160 | function GetContextParams(): TLlamaContextParams; 161 | function GetContext(): TLlamaContext; 162 | 163 | function GetBatch(): TLlamaBatch; 164 | 165 | function GetSettings(): TLlamaSettings; 166 | 167 | function GetTokenizer(): ILlamaTokenizer; 168 | function GetChatHandler(): ILlamaChatCompletionHandler; 169 | function GetDraftModel(): ILlamaDraftModel; 170 | function GetCache(): ILlamaCache; 171 | function GetTemplateChoices(): TDictionary; 172 | function GetChatHandlers(): TDictionary; 173 | 174 | function SaveState(): TLlamaState; 175 | procedure LoadState(const AState: TLlamaState); 176 | 177 | procedure Reset(); 178 | 179 | property ModelPath: string read GetModelPath; 180 | property Metadata: TMetadata read GetMetadata; 181 | property BOSToken: string read GetBOSToken; 182 | property EOSToken: string read GetEOSToken; 183 | property NumberOfTokens: integer read GetNumberOfTokens write SetNumberOfTokens; 184 | property NumberOfBatches: integer read GetNumberOfBatches; 185 | property InputIds: TArray read GetInputIds write SetInputIds; 186 | property Scores: TArray> read GetScores write SetScores; 187 | property ModelParams: TLlamaModelParams read GetModelParams; 188 | property Model: TLlamaModel read GetModel; 189 | property ContextParams: TLlamaContextParams read GetContextParams; 190 | property Context: TLlamaContext read GetContext; 191 | property Batch: TLlamaBatch read GetBatch; 192 | property Settings: TLlamaSettings read GetSettings; 193 | property Tokenizer: ILlamaTokenizer read GetTokenizer; 194 | property ChatHandler: ILlamaChatCompletionHandler read GetChatHandler; 195 | property DraftModel: ILlamaDraftModel read GetDraftModel; 196 | property Cache: ILlamaCache read GetCache; 197 | property TemplateChoices: TDictionary read GetTemplateChoices; 198 | property ChatHandlers: TDictionary read GetChatHandlers; 199 | end; 200 | 201 | implementation 202 | 203 | end. 204 | -------------------------------------------------------------------------------- /src/Wrapper/LlamaCpp.Wrapper.LlamaBatch.pas: -------------------------------------------------------------------------------- 1 | unit LlamaCpp.Wrapper.LlamaBatch; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils, 7 | LlamaCpp.CType.Llama; 8 | 9 | type 10 | TLlamaBatch = class 11 | private 12 | FBatch: LlamaCpp.CType.Llama.TLlamaBatch; 13 | FNTokens: Integer; 14 | FEmbeddings: Integer; 15 | FNSeqMax: Integer; 16 | function HasBatch(): boolean; 17 | public 18 | constructor Create(const ANTokens, AEmbeddings, ANSeqMax: Integer); 19 | destructor Destroy(); override; 20 | 21 | procedure LoadBatch(); 22 | procedure UnloadBatch(); 23 | 24 | function NTokens: Integer; 25 | procedure Reset; 26 | procedure SetBatch(const ABatch: TArray; ANPast: Integer; ALogitsAll: Boolean); 27 | procedure AddSequence(const ABatch: TArray; ASeqID: Integer; ALogitsAll: Boolean); 28 | public 29 | property Batch: LlamaCpp.CType.Llama.TLlamaBatch read FBatch; 30 | end; 31 | 32 | implementation 33 | 34 | uses 35 | System.IOUtils, 36 | LlamaCpp.Api.Llama; 37 | 38 | { TLlamaBatch } 39 | 40 | constructor TLlamaBatch.Create(const ANTokens, AEmbeddings, ANSeqMax: Integer); 41 | begin 42 | FNTokens := ANTokens; 43 | FEmbeddings := AEmbeddings; 44 | FNSeqMax := ANSeqMax; 45 | end; 46 | 47 | destructor TLlamaBatch.Destroy; 48 | begin 49 | inherited; 50 | end; 51 | 52 | function TLlamaBatch.HasBatch: boolean; 53 | begin 54 | Result := Assigned(FBatch.n_seq_id); 55 | end; 56 | 57 | procedure TLlamaBatch.LoadBatch; 58 | begin 59 | FBatch := TLlamaApi.Instance.llama_batch_init(FNTokens, FEmbeddings, FNSeqMax); 60 | 61 | if not HasBatch() then 62 | raise Exception.Create('Failed to create llama_batch'); 63 | end; 64 | 65 | procedure TLlamaBatch.UnloadBatch; 66 | begin 67 | if HasBatch() then 68 | TLlamaApi.Instance.llama_batch_free(FBatch); 69 | end; 70 | 71 | function TLlamaBatch.NTokens: Integer; 72 | begin 73 | Result := FBatch.n_tokens; 74 | end; 75 | 76 | procedure TLlamaBatch.Reset; 77 | begin 78 | FBatch.n_tokens := 0; 79 | end; 80 | 81 | procedure TLlamaBatch.SetBatch(const ABatch: TArray; ANPast: Integer; ALogitsAll: Boolean); 82 | var 83 | i, n_tokens: Integer; 84 | begin 85 | n_tokens := Length(ABatch); 86 | FBatch.n_tokens := n_tokens; 87 | {$R-} 88 | for i := 0 to n_tokens - 1 do 89 | begin 90 | FBatch.token^[i] := ABatch[i]; 91 | FBatch.pos^[i] := ANPast + i; 92 | FBatch.seq_id^[i][0] := 0; 93 | FBatch.n_seq_id^[i] := 1; 94 | FBatch.logits^[i] := ShortInt(ALogitsAll); 95 | end; 96 | FBatch.logits^[n_tokens - 1] := ShortInt(true); 97 | {$R+} 98 | end; 99 | 100 | procedure TLlamaBatch.AddSequence(const ABatch: TArray; ASeqID: Integer; ALogitsAll: Boolean); 101 | var 102 | i, j, n_tokens, n_tokens0: Integer; 103 | begin 104 | n_tokens := Length(ABatch); 105 | n_tokens0 := FBatch.n_tokens; 106 | FBatch.n_tokens := FBatch.n_tokens + n_tokens; 107 | {$R-} 108 | for i := 0 to n_tokens - 1 do 109 | begin 110 | j := n_tokens0 + i; 111 | FBatch.token^[j] := ABatch[i]; 112 | FBatch.pos^[j] := i; 113 | FBatch.seq_id^[j][0] := ASeqID; 114 | FBatch.n_seq_id^[j] := 1; 115 | FBatch.logits^[j] := ShortInt(ALogitsAll); 116 | end; 117 | FBatch.logits^[n_tokens - 1] := ShortInt(true); 118 | {$R+} 119 | end; 120 | 121 | end. 122 | -------------------------------------------------------------------------------- /test/ChatFormatters.pas: -------------------------------------------------------------------------------- 1 | unit ChatFormatters; 2 | 3 | interface 4 | 5 | uses 6 | System.Sysutils, 7 | System.Classes, 8 | System.Generics.Collections, 9 | TestFramework, 10 | DUnitX.TestFramework, 11 | LlamaCpp.Api, 12 | LlamaCpp.Types, 13 | LlamaCpp.Llama, 14 | LlamaCpp.Common.Chat.Types; 15 | 16 | type 17 | { ************************************************************************** 18 | 19 | !!!! WARNING !!!! 20 | 21 | >>>>>>>> THIS IS HUGE!!!!!! <<<<<<<<< 22 | 23 | This test will download many BIG models 24 | 25 | ************************************************************************** } 26 | TChatFormattersTest = class(TTestCase) 27 | private 28 | class var FLogDirectory: string; 29 | class procedure Log( 30 | const AModel: string; 31 | const AIdentifier: string; 32 | const AChat: string); 33 | class procedure ClearLogs(); 34 | private 35 | procedure TestChat(const AModelPath, AChatFormat: string); 36 | public 37 | class constructor Create(); 38 | class destructor Destroy(); 39 | published 40 | procedure TestLlama2(); 41 | procedure TestLlama3(); 42 | procedure TestAlpaca(); 43 | procedure TestQwen(); 44 | procedure TestVicuna(); 45 | procedure TestMistrallite(); 46 | procedure TestZephyr(); 47 | procedure TestSaiga(); 48 | procedure TestGemma(); 49 | end; 50 | 51 | implementation 52 | 53 | uses 54 | System.Variants, 55 | System.IOUtils, 56 | LlamaCpp.Download, 57 | Utils; 58 | 59 | { TChatFormattersTest } 60 | 61 | class constructor TChatFormattersTest.Create; 62 | begin 63 | FLogDirectory := TPath.Combine(TTestUtils.GetLogsFolder(), 'ChatFormatters'); 64 | 65 | TLlamaCppApis.LoadAll(TTestUtils.GetLibPath()); 66 | 67 | ClearLogs(); 68 | 69 | if not TDirectory.Exists(FLogDirectory) then 70 | TDirectory.CreateDirectory(FLogDirectory); 71 | end; 72 | 73 | class destructor TChatFormattersTest.Destroy; 74 | begin 75 | TLlamaCppApis.UnloadAll(); 76 | end; 77 | 78 | class procedure TChatFormattersTest.ClearLogs; 79 | begin 80 | if TDirectory.Exists(FLogDirectory) then 81 | TDirectory.Delete(FLogDirectory, true); 82 | end; 83 | 84 | class procedure TChatFormattersTest.Log(const AModel, AIdentifier, AChat: string); 85 | begin 86 | var LLogFile := TPath.Combine(FLogDirectory, AIdentifier) + '.log'; 87 | 88 | if TFile.Exists(LLogFile) then 89 | TFile.Delete(LLogFile); 90 | 91 | TFile.Create(LLogFile).Free(); 92 | 93 | var LHeader := '-> Execution date/time: ' + DateTimeToStr(Now()) + sLineBreak; 94 | LHeader := LHeader + '-> Model: ' + AModel + sLineBreak + sLineBreak; 95 | 96 | TFile.WriteAllText( 97 | LLogFile, 98 | LHeader + AChat); 99 | end; 100 | 101 | procedure TChatFormattersTest.TestChat(const AModelPath, AChatFormat: string); 102 | begin 103 | var LSettings := TLlamaSettings.Create(); 104 | try 105 | LSettings.Seed := Random(High(Integer)); 106 | LSettings.ChatFormat := AChatFormat; 107 | 108 | var Llama: ILlama := TLlamaBase.Create(AModelPath, LSettings); 109 | var LChatCompletion := LLama as ILlamaChatCompletion; 110 | 111 | var LMessages: TArray := [ 112 | TChatCompletionRequestMessage.System( 113 | 'You are a master in the Delphi programming language.'), 114 | TChatCompletionRequestMessage.User( 115 | 'What is Delphi?') 116 | ]; 117 | 118 | var LCompletion := LChatCompletion.CreateChatCompletion( 119 | TLlamaChatCompletionSettings.Create(LMessages)); 120 | 121 | Assert.IsNotNull(LCompletion.Choices); 122 | Assert.IsNotEmpty(VarToStr(LCompletion.Choices[0].Message.Content)); 123 | 124 | LMessages := LMessages + [ 125 | TChatCompletionRequestMessage.Assistant( 126 | VarToStr(LCompletion.Choices[0].Message.Content))]; 127 | 128 | Log(AModelPath, AChatFormat, TChatCompletionRequestMessage.ToString(LMessages)); 129 | finally 130 | LSettings.Free(); 131 | end; 132 | end; 133 | 134 | procedure TChatFormattersTest.TestLlama2; 135 | begin 136 | var LModelPaths := TLlamaDownload.Default.DownloadLlama2_Chat_7B(); 137 | Assert.IsNotNull(LModelPaths); 138 | 139 | TestChat(LModelPaths[0], 'llama-2'); 140 | end; 141 | 142 | procedure TChatFormattersTest.TestLlama3; 143 | begin 144 | var LModelPaths := TLlamaDownload.Default.DownloadLlama3_Chat_30B(); 145 | Assert.IsNotNull(LModelPaths); 146 | 147 | TestChat(LModelPaths[0], 'llama-3'); 148 | end; 149 | 150 | procedure TChatFormattersTest.TestAlpaca; 151 | begin 152 | var LModelPaths := TLlamaDownload.Default.DownloadAlpaca_Chat_7B(); 153 | Assert.IsNotNull(LModelPaths); 154 | 155 | TestChat(LModelPaths[0], 'alpaca'); 156 | end; 157 | 158 | procedure TChatFormattersTest.TestQwen; 159 | begin 160 | var LModelPaths := TLlamaDownload.Default.DownloadQwen_Chat_7B(); 161 | Assert.IsNotNull(LModelPaths); 162 | 163 | TestChat(LModelPaths[0], 'qwen'); 164 | end; 165 | 166 | procedure TChatFormattersTest.TestVicuna; 167 | begin 168 | var LModelPaths := TLlamaDownload.Default.DownloadVicuna_Chat_13B(); 169 | Assert.IsNotNull(LModelPaths); 170 | 171 | TestChat(LModelPaths[0], 'vicuna'); 172 | end; 173 | 174 | procedure TChatFormattersTest.TestMistrallite; 175 | begin 176 | var LModelPaths := TLlamaDownload.Default.DownloadMistrallite_7B(); 177 | Assert.IsNotNull(LModelPaths); 178 | 179 | TestChat(LModelPaths[0], 'mistrallite'); 180 | end; 181 | 182 | procedure TChatFormattersTest.TestZephyr; 183 | begin 184 | var LModelPaths := TLlamaDownload.Default.DownloadZephyr_Chat(); 185 | Assert.IsNotNull(LModelPaths); 186 | 187 | TestChat(LModelPaths[0], 'zephyr'); 188 | end; 189 | 190 | procedure TChatFormattersTest.TestSaiga; 191 | begin 192 | var LModelPaths := TLlamaDownload.Default.DownloadSaiga_7B(); 193 | Assert.IsNotNull(LModelPaths); 194 | 195 | TestChat(LModelPaths[0], 'saiga'); 196 | end; 197 | 198 | procedure TChatFormattersTest.TestGemma; 199 | begin 200 | var LModelPaths := TLlamaDownload.Default.DownloadGemma_9B(); 201 | Assert.IsNotNull(LModelPaths); 202 | 203 | TestChat(LModelPaths[0], 'gemma'); 204 | end; 205 | 206 | initialization 207 | RegisterTest(TChatFormattersTest.Suite); 208 | 209 | end. 210 | -------------------------------------------------------------------------------- /test/HighLevelAPI.pas: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/test/HighLevelAPI.pas -------------------------------------------------------------------------------- /test/LlamaCppTests.dpr: -------------------------------------------------------------------------------- 1 | program LlamaCppTests; 2 | { 3 | 4 | Delphi DUnit Test Project 5 | ------------------------- 6 | This project contains the DUnit test framework and the GUI/Console test runners. 7 | Add "CONSOLE_TESTRUNNER" to the conditional defines entry in the project options 8 | to use the console test runner. Otherwise the GUI test runner will be used by 9 | default. 10 | 11 | } 12 | 13 | {$IFDEF CONSOLE_TESTRUNNER} 14 | {$APPTYPE CONSOLE} 15 | {$ENDIF} 16 | 17 | uses 18 | DUnitTestRunner, 19 | HighLevelAPI in 'HighLevelAPI.pas', 20 | ChatFormatters in 'ChatFormatters.pas', 21 | Utils in 'Utils.pas'; 22 | 23 | {$R *.RES} 24 | 25 | begin 26 | DUnitTestRunner.RunRegisteredTests; 27 | end. 28 | 29 | -------------------------------------------------------------------------------- /test/Utils.pas: -------------------------------------------------------------------------------- 1 | unit Utils; 2 | 3 | interface 4 | 5 | uses 6 | System.SysUtils; 7 | 8 | type 9 | TTestUtils = class 10 | public 11 | class function GetLibPath(): string; 12 | class function GetLogsFolder(): string; 13 | end; 14 | 15 | implementation 16 | 17 | uses 18 | System.IOUtils; 19 | 20 | { TTestUtils } 21 | 22 | class function TTestUtils.GetLibPath: string; 23 | begin 24 | {$IFDEF MSWINDOWS} 25 | Result := 'C:\Users\lmbelo\Documents\Embarcadero\Studio\Projects\testllamacpp\Win64\Debug\llamacpp'; 26 | {$ELSEIF DEFINED(LINUX)} 27 | Result := '/home/lmbelo/Documents/llama.cpp/lib'; 28 | {$ELSEIF DEFINED(MACOS)} 29 | Result := '/Users/lmbelo/Documents/llamacpptest/.conda/lib/python3.11/site-packages/llama_cpp/lib'; 30 | {$ENDIF MSWINDOWS} 31 | end; 32 | 33 | class function TTestUtils.GetLogsFolder: string; 34 | begin 35 | Result := TPath.Combine( 36 | TPath.GetDocumentsPath(), 'LlamaCppDelphi', 'Tests', 'Logs'); 37 | end; 38 | 39 | end. 40 | --------------------------------------------------------------------------------