├── .github
└── workflows
│ └── build_and_release.yml
├── .gitignore
├── .gitmodules
├── README.md
├── images
├── bmp
│ ├── 128x128
│ │ └── llama_cpp.bmp
│ ├── 16x16
│ │ └── llama_cpp.bmp
│ ├── 24x24
│ │ └── llama_cpp.bmp
│ └── 32x32
│ │ └── llama_cpp.bmp
└── png
│ ├── 128x128
│ └── llama_cpp.png
│ ├── 16x16
│ └── llama_cpp.png
│ ├── 24x24
│ └── llama_cpp.png
│ └── 32x32
│ └── llama_cpp.png
├── packages
├── LlamaCpp.dpk
├── LlamaCpp.dproj
├── LlamaCppBindings.groupproj
├── dclLlamaCpp.dpk
└── dclLlamaCpp.dproj
├── samples
└── SimpleChatWithDownload
│ ├── DownloadForm.fmx
│ ├── DownloadForm.pas
│ ├── Entitlement.TemplateOSX.xml
│ ├── MainForm.fmx
│ ├── MainForm.pas
│ ├── README.md
│ ├── SimpleChatWithDownload.deployproj
│ ├── SimpleChatWithDownload.dpr
│ ├── SimpleChatWithDownload.dproj
│ └── lib
│ ├── macos_arm64
│ ├── libggml-base.dylib
│ ├── libggml-blas.dylib
│ ├── libggml-cpu.dylib
│ ├── libggml-metal.dylib
│ ├── libggml-rpc.dylib
│ ├── libggml.dylib
│ ├── libllama.dylib
│ └── libllava_shared.dylib
│ └── windows_x64
│ ├── ggml-base.dll
│ ├── ggml-cpu.dll
│ ├── ggml-rpc.dll
│ ├── ggml.dll
│ ├── llama.dll
│ └── llava_shared.dll
├── src
├── Api
│ ├── LlamaCpp.Api.Ggml.pas
│ ├── LlamaCpp.Api.Llama.pas
│ ├── LlamaCpp.Api.Llava.pas
│ └── LlamaCpp.Api.pas
├── CType
│ ├── Ggml
│ │ ├── LlamaCpp.CType.Ggml.Backend.pas
│ │ ├── LlamaCpp.CType.Ggml.Cpu.pas
│ │ └── LlamaCpp.CType.Ggml.pas
│ ├── Llama
│ │ └── LlamaCpp.CType.Llama.pas
│ └── Llava
│ │ └── LlamaCpp.CType.Llava.pas
├── Common
│ ├── Cache
│ │ ├── LlamaCpp.Common.Cache.Base.pas
│ │ ├── LlamaCpp.Common.Cache.Disk.pas
│ │ └── LlamaCpp.Common.Cache.Ram.pas
│ ├── Chat
│ │ ├── Completion
│ │ │ └── LlamaCpp.Common.Chat.Completion.Collection.pas
│ │ ├── Formatter
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Adapter.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Alpaca.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Baichuan.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Baichuan2.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.ChatGLM3.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Chatml.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Gemma.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Intel.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Jinja2.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Llama2.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Llama3.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.MilstralLite.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.MistralInstruct.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.OasstLlama.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.OpenBuddy.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.OpenChat.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.OpenOrca.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Phind.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Pygmalion.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Qwen.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.RedpajamaIncite.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Registration.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Saiga.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Snoozy.pas
│ │ │ ├── LlamaCpp.Common.Chat.Formatter.Vicuna.pas
│ │ │ └── LlamaCpp.Common.Chat.Formatter.Zephyr.pas
│ │ ├── LlamaCpp.Common.Chat.Format.pas
│ │ └── LlamaCpp.Common.Chat.Types.pas
│ ├── Grammar
│ │ └── LlamaCpp.Common.Grammar.pas
│ ├── LlamaCpp.Common.Settings.pas
│ ├── LlamaCpp.Common.State.pas
│ ├── LlamaCpp.Common.TokenArray.pas
│ ├── LlamaCpp.Common.Types.pas
│ ├── Processor
│ │ ├── LlamaCpp.Common.Processor.LogitsScore.pas
│ │ └── LlamaCpp.Common.Processor.StoppingCriteria.pas
│ ├── Sampling
│ │ ├── LlamaCpp.Common.Sampling.Context.pas
│ │ ├── LlamaCpp.Common.Sampling.CustomSampler.pas
│ │ ├── LlamaCpp.Common.Sampling.Params.pas
│ │ └── LlamaCpp.Common.Sampling.Sampler.pas
│ ├── Speculative
│ │ └── LlamaCpp.Common.Speculative.LookupDecoding.pas
│ └── Tokenizer
│ │ ├── LlamaCpp.Common.Tokenizer.Base.pas
│ │ └── LlamaCpp.Common.Tokenizer.pas
├── LlamaCpp.ChatCompletion.pas
├── LlamaCpp.Completion.pas
├── LlamaCpp.Download.pas
├── LlamaCpp.Embedding.pas
├── LlamaCpp.Evaluator.pas
├── LlamaCpp.Exception.pas
├── LlamaCpp.Generator.pas
├── LlamaCpp.Helper.pas
├── LlamaCpp.Llama.pas
├── LlamaCpp.Registration.pas
├── LlamaCpp.Sampler.pas
├── LlamaCpp.Tokenization.pas
├── LlamaCpp.Types.pas
└── Wrapper
│ ├── LlamaCpp.Wrapper.LlamaBatch.pas
│ ├── LlamaCpp.Wrapper.LlamaContext.pas
│ └── LlamaCpp.Wrapper.LlamaModel.pas
└── test
├── ChatFormatters.pas
├── HighLevelAPI.pas
├── LlamaCppTests.dpr
├── LlamaCppTests.dproj
└── Utils.pas
/.gitignore:
--------------------------------------------------------------------------------
1 | # Uncomment these types if you want even more clean repository. But be careful.
2 | # It can make harm to an existing project source. Read explanations below.
3 | #
4 | # Resource files are binaries containing manifest, project icon and version info.
5 | # They can not be viewed as text or compared by diff-tools. Consider replacing them with .rc files.
6 | #*.res
7 | #
8 | # Type library file (binary). In old Delphi versions it should be stored.
9 | # Since Delphi 2009 it is produced from .ridl file and can safely be ignored.
10 | #*.tlb
11 | #
12 | # Diagram Portfolio file. Used by the diagram editor up to Delphi 7.
13 | # Uncomment this if you are not using diagrams or use newer Delphi version.
14 | #*.ddp
15 | #
16 | # Visual LiveBindings file. Added in Delphi XE2.
17 | # Uncomment this if you are not using LiveBindings Designer.
18 | #*.vlb
19 | #
20 | # Deployment Manager configuration file for your project. Added in Delphi XE2.
21 | # Uncomment this if it is not mobile development and you do not use remote debug feature.
22 | #*.deployproj
23 | #
24 | # C++ object files produced when C/C++ Output file generation is configured.
25 | # Uncomment this if you are not using external objects (zlib library for example).
26 | #*.obj
27 | #
28 |
29 | # Default Delphi compiler directories
30 | # Content of this directories are generated with each Compile/Construct of a project.
31 | # Most of the time, files here have not there place in a code repository.
32 | #Win32/
33 | #Win64/
34 | #OSX64/
35 | #OSXARM64/
36 | #Android/
37 | #Android64/
38 | #iOSDevice64/
39 | #Linux64/
40 |
41 | # Delphi compiler-generated binaries (safe to delete)
42 | *.exe
43 | *.dll
44 | *.bpl
45 | *.bpi
46 | *.dcp
47 | *.so
48 | *.apk
49 | *.drc
50 | *.map
51 | *.dres
52 | *.rsm
53 | *.tds
54 | *.dcu
55 | *.lib
56 | *.a
57 | *.o
58 | *.ocx
59 |
60 | # Delphi autogenerated files (duplicated info)
61 | *.cfg
62 | *.hpp
63 | *Resource.rc
64 |
65 | # Delphi local files (user-specific info)
66 | *.local
67 | *.identcache
68 | *.projdata
69 | *.tvsconfig
70 | *.dsk
71 |
72 | # Delphi history and backups
73 | __history/
74 | __recovery/
75 | *.~*
76 |
77 | # Castalia statistics file (since XE7 Castalia is distributed with Delphi)
78 | *.stat
79 |
80 | # Boss dependency manager vendor folder https://github.com/HashLoad/boss
81 | modules/
82 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "vendor/llama.cpp"]
2 | path = vendor/llama.cpp
3 | url = https://github.com/ggerganov/llama.cpp.git
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🐫 llama-cpp-delphi
2 |
3 | Welcome to **llama-cpp-delphi**, the Delphi bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp)! This project allows you to integrate the power of Llama-based Large Language Models (LLMs) into your Delphi applications, enabling efficient and versatile local inference.
4 |
5 | ## 🚀 Features
6 |
7 | - **Delphi Integration**: Harness Llama models directly in your Delphi projects.
8 | - **Local Inference**: No external servers or APIs required—your data stays local.
9 | - **Cross-Platform Support**: Compatible with Windows, Linux, and Mac.
10 | - 🖥️ **Mac Silicon**: GPU (MPS) and CPU inference supported.
11 | - 💻 **Windows**: CPU inference supported, with options for CUDA, Vulkan, Kompute, and OpenBLAS.
12 | - 🌏 **Linux**: CPU inference supported, with options for CUDA, Vulkan, Kompute, and OpenBLAS.
13 | - 🚀 **Android and iOS support coming soon!**
14 | - **Pre-Built Libraries**: Simplified setup with pre-compiled libraries.
15 | - **Customizable Sampling**: Fine-tune your AI’s behavior with easy-to-configure samplers.
16 |
17 | ## 🔧 Getting Started
18 |
19 | ### Prerequisites
20 |
21 | 1. **Delphi IDE** installed.
22 | 2. **Git** installed (required for cloning model repositories).
23 | 3. A basic understanding of Delphi development.
24 |
25 | ### Installation
26 |
27 | 1. Clone the **llama-cpp-delphi** repository:
28 | ```bash
29 | git clone https://github.com/Embarcadero/llama-cpp-delphi.git
30 | ```
31 | 2. Open the project in Delphi IDE.
32 | 3. Build the project for your desired platform(s):
33 | - Windows
34 | - Linux
35 | - Mac Silicon
36 |
37 | ### Libraries
38 |
39 | The necessary **llama.cpp** libraries are distributed as part of the releases of this repository. You can find them under the "Release" section in the repository. Here's an explanation of the libraries available:
40 |
41 | #### CPU Build
42 |
43 | CPU-only builds for Windows, Linux, and macOS. Inference runs slow on CPU—consider using a GPU-based library.
44 |
45 | #### BLAS Build
46 |
47 | Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Using BLAS doesn't affect the generation performance. There are several different BLAS implementations available for build and use:
48 |
49 | - **Accelerate Framework**: Available on macOS, enabled by default.
50 | - **OpenBLAS**: Provides CPU-based BLAS acceleration. Ensure OpenBLAS is installed on your machine.
51 | - **BLIS**: A high-performance portable BLAS framework. [Learn more](https://github.com/flame/blis).
52 | - **Intel oneMKL**: Optimized for Intel processors, supporting advanced instruction sets like avx\_vnni.
53 |
54 | #### SYCL
55 |
56 | SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators.
57 |
58 | llama.cpp based on SYCL is used to **support Intel GPU** (Data Center Max series, Flex series, Arc series, Built-in GPU and iGPU).
59 |
60 | For detailed info, please refer to [[llama.cpp for SYCL](./backend/SYCL.md)](https://github.com/ggerganov/llama.cpp/blob/master/docs/backend/SYCL.md).
61 |
62 | #### Metal Build
63 |
64 | On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU.
65 |
66 | When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers 0` option in the Llama settings.
67 |
68 | #### CUDA
69 |
70 | Provides GPU acceleration using an NVIDIA GPU. [Refer to the CUDA guide](https://github.com/ggerganov/llama.cpp/blob/master/docs/cuda-fedora.md) for Fedora setup.
71 |
72 | #### Vulkan
73 |
74 | Vulkan provides GPU acceleration through a modern, low-overhead API. To use Vulkan:
75 |
76 | * Ensure Vulkan is installed and supported by your GPU drivers.
77 |
78 | Learn more at the [official Vulkan site](https://vulkan.org).
79 |
80 | #### Kompute
81 |
82 | Kompute offers efficient compute operations for GPU workloads. It's designed for AI inference tasks and works seamlessly with Vulkan.
83 |
84 | #### CANN
85 |
86 | Provides NPU acceleration using the AI cores of Ascend NPUs. [Learn more about CANN](https://www.hiascend.com/en/software/cann).
87 |
88 | #### SYCL
89 |
90 | SYCL enables GPU acceleration on Intel GPUs. Refer to the [SYCL documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/backend/SYCL.md) for setup details.
91 |
92 | #### HIP
93 |
94 | Supports GPU acceleration on AMD GPUs compatible with HIP.
95 |
96 | #### MUSA
97 |
98 | Provides GPU acceleration using the MUSA cores of Moore Threads MTT GPUs.
99 |
100 | ## 🌟 Using llama-cpp-delphi
101 |
102 | ### Key Components
103 |
104 | - **Llama**: Delphi-friendly IDE component.
105 |
106 | ### Running Samples
107 |
108 | 1. Explore the `samples` directory for available examples, like **SimpleChatWithDownload**.
109 | 2. Follow the README provided in each sample folder for detailed instructions.
110 |
111 | ## 🔧 Configuration
112 |
113 | ### Models
114 |
115 | You can use any model compatible with **llama.cpp** (e.g., GGUF format). Popular options include:
116 | - **Llama-2**: A robust and general-purpose model.
117 | - **Llama-3**: A lightweight alternative with excellent performance.
118 | - **Mistral**: A compact and efficient model.
119 | - **DeepSeek**: An innovative model designed for exploratory tasks.
120 |
121 | ### Hardware Support
122 |
123 | - **Mac Silicon**:
124 | - GPU inference (via MPS) is recommended for optimal performance.
125 | - CPU inference is available but slower.
126 | - **Windows**:
127 | - CPU inference supported, with additional support for CUDA, Vulkan, Kompute, HIP, and OpenBLAS.
128 | - **Linux**:
129 | - CPU inference supported, with additional support for CUDA, Vulkan, HIP, and MUSA.
130 |
131 | ## 🤝 Contributions
132 |
133 | We welcome contributions to improve **llama-cpp-delphi**! Feel free to:
134 | - Report issues.
135 | - Submit pull requests.
136 | - Suggest enhancements.
137 |
138 | ## 📝 License
139 |
140 | This project is licensed under the MIT License—see the `LICENSE` file for details.
141 |
142 | ## 🌟 Final Notes
143 |
144 | Get started with **llama-cpp-delphi** and bring advanced AI capabilities to your Delphi projects. If you encounter any issues or have suggestions, let us know—we’re here to help! Happy coding! 🎉
145 |
146 |
--------------------------------------------------------------------------------
/images/bmp/128x128/llama_cpp.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/bmp/128x128/llama_cpp.bmp
--------------------------------------------------------------------------------
/images/bmp/16x16/llama_cpp.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/bmp/16x16/llama_cpp.bmp
--------------------------------------------------------------------------------
/images/bmp/24x24/llama_cpp.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/bmp/24x24/llama_cpp.bmp
--------------------------------------------------------------------------------
/images/bmp/32x32/llama_cpp.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/bmp/32x32/llama_cpp.bmp
--------------------------------------------------------------------------------
/images/png/128x128/llama_cpp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/png/128x128/llama_cpp.png
--------------------------------------------------------------------------------
/images/png/16x16/llama_cpp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/png/16x16/llama_cpp.png
--------------------------------------------------------------------------------
/images/png/24x24/llama_cpp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/png/24x24/llama_cpp.png
--------------------------------------------------------------------------------
/images/png/32x32/llama_cpp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/images/png/32x32/llama_cpp.png
--------------------------------------------------------------------------------
/packages/LlamaCpp.dpk:
--------------------------------------------------------------------------------
1 | package LlamaCpp;
2 |
3 | {$R *.res}
4 | {$IFDEF IMPLICITBUILDING This IFDEF should not be used by users}
5 | {$ALIGN 8}
6 | {$ASSERTIONS ON}
7 | {$BOOLEVAL OFF}
8 | {$DEBUGINFO OFF}
9 | {$EXTENDEDSYNTAX ON}
10 | {$IMPORTEDDATA ON}
11 | {$IOCHECKS ON}
12 | {$LOCALSYMBOLS ON}
13 | {$LONGSTRINGS ON}
14 | {$OPENSTRINGS ON}
15 | {$OPTIMIZATION OFF}
16 | {$OVERFLOWCHECKS ON}
17 | {$RANGECHECKS ON}
18 | {$REFERENCEINFO ON}
19 | {$SAFEDIVIDE OFF}
20 | {$STACKFRAMES ON}
21 | {$TYPEDADDRESS OFF}
22 | {$VARSTRINGCHECKS ON}
23 | {$WRITEABLECONST OFF}
24 | {$MINENUMSIZE 1}
25 | {$IMAGEBASE $400000}
26 | {$DEFINE DEBUG}
27 | {$ENDIF IMPLICITBUILDING}
28 | {$LIBSUFFIX AUTO}
29 | {$RUNONLY}
30 | {$IMPLICITBUILD ON}
31 |
32 | requires
33 | rtl,
34 | dbrtl,
35 | FireDAC,
36 | FireDACCommonDriver,
37 | FireDACCommon,
38 | FireDACSqliteDriver,
39 | fmxFireDAC;
40 |
41 | contains
42 | LlamaCpp.Api.Llama in '..\src\Api\LlamaCpp.Api.Llama.pas',
43 | LlamaCpp.Api in '..\src\Api\LlamaCpp.Api.pas',
44 | LlamaCpp.Api.Ggml in '..\src\Api\LlamaCpp.Api.Ggml.pas',
45 | LlamaCpp.Api.Llava in '..\src\Api\LlamaCpp.Api.Llava.pas',
46 | LlamaCpp.Wrapper.LlamaModel in '..\src\Wrapper\LlamaCpp.Wrapper.LlamaModel.pas',
47 | LlamaCpp.CType.Ggml.Backend in '..\src\CType\Ggml\LlamaCpp.CType.Ggml.Backend.pas',
48 | LlamaCpp.CType.Ggml.Cpu in '..\src\CType\Ggml\LlamaCpp.CType.Ggml.Cpu.pas',
49 | LlamaCpp.CType.Ggml in '..\src\CType\Ggml\LlamaCpp.CType.Ggml.pas',
50 | LlamaCpp.CType.Llama in '..\src\CType\Llama\LlamaCpp.CType.Llama.pas',
51 | LlamaCpp.CType.Llava in '..\src\CType\Llava\LlamaCpp.CType.Llava.pas',
52 | LlamaCpp.Wrapper.LlamaContext in '..\src\Wrapper\LlamaCpp.Wrapper.LlamaContext.pas',
53 | LlamaCpp.Wrapper.LlamaBatch in '..\src\Wrapper\LlamaCpp.Wrapper.LlamaBatch.pas',
54 | LlamaCpp.Common.Sampling.Params in '..\src\Common\Sampling\LlamaCpp.Common.Sampling.Params.pas',
55 | LlamaCpp.Common.Sampling.Context in '..\src\Common\Sampling\LlamaCpp.Common.Sampling.Context.pas',
56 | LlamaCpp.Helper in '..\src\LlamaCpp.Helper.pas',
57 | LlamaCpp.Common.Sampling.CustomSampler in '..\src\Common\Sampling\LlamaCpp.Common.Sampling.CustomSampler.pas',
58 | LlamaCpp.Common.Sampling.Sampler in '..\src\Common\Sampling\LlamaCpp.Common.Sampling.Sampler.pas',
59 | LlamaCpp.Common.Tokenizer.Base in '..\src\Common\Tokenizer\LlamaCpp.Common.Tokenizer.Base.pas',
60 | LlamaCpp.Common.Tokenizer in '..\src\Common\Tokenizer\LlamaCpp.Common.Tokenizer.pas',
61 | LlamaCpp.Common.Cache.Base in '..\src\Common\Cache\LlamaCpp.Common.Cache.Base.pas',
62 | LlamaCpp.Common.State in '..\src\Common\LlamaCpp.Common.State.pas',
63 | LlamaCpp.Common.Cache.Ram in '..\src\Common\Cache\LlamaCpp.Common.Cache.Ram.pas',
64 | LlamaCpp.Common.Cache.Disk in '..\src\Common\Cache\LlamaCpp.Common.Cache.Disk.pas',
65 | LlamaCpp.Llama in '..\src\LlamaCpp.Llama.pas',
66 | LlamaCpp.Common.TokenArray in '..\src\Common\LlamaCpp.Common.TokenArray.pas',
67 | LlamaCpp.Common.Chat.Format in '..\src\Common\Chat\LlamaCpp.Common.Chat.Format.pas',
68 | LlamaCpp.Common.Chat.Types in '..\src\Common\Chat\LlamaCpp.Common.Chat.Types.pas',
69 | LlamaCpp.Common.Types in '..\src\Common\LlamaCpp.Common.Types.pas',
70 | LlamaCpp.Common.Grammar in '..\src\Common\Grammar\LlamaCpp.Common.Grammar.pas',
71 | LlamaCpp.Common.Processor.LogitsScore in '..\src\Common\Processor\LlamaCpp.Common.Processor.LogitsScore.pas',
72 | LlamaCpp.Common.Processor.StoppingCriteria in '..\src\Common\Processor\LlamaCpp.Common.Processor.StoppingCriteria.pas',
73 | LlamaCpp.Common.Speculative.LookupDecoding in '..\src\Common\Speculative\LlamaCpp.Common.Speculative.LookupDecoding.pas',
74 | LlamaCpp.Common.Chat.Completion.Collection in '..\src\Common\Chat\Completion\LlamaCpp.Common.Chat.Completion.Collection.pas',
75 | LlamaCpp.Common.Chat.Formatter.Adapter in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Adapter.pas',
76 | LlamaCpp.Common.Settings in '..\src\Common\LlamaCpp.Common.Settings.pas',
77 | LlamaCpp.Common.Chat.Formatter.Llama2 in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Llama2.pas',
78 | LlamaCpp.Common.Chat.Formatter.Registration in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Registration.pas',
79 | LlamaCpp.Tokenization in '..\src\LlamaCpp.Tokenization.pas',
80 | LlamaCpp.Evaluator in '..\src\LlamaCpp.Evaluator.pas',
81 | LlamaCpp.Sampler in '..\src\LlamaCpp.Sampler.pas',
82 | LlamaCpp.Types in '..\src\LlamaCpp.Types.pas',
83 | LlamaCpp.Generator in '..\src\LlamaCpp.Generator.pas',
84 | LlamaCpp.Embedding in '..\src\LlamaCpp.Embedding.pas',
85 | LlamaCpp.Completion in '..\src\LlamaCpp.Completion.pas',
86 | LlamaCpp.ChatCompletion in '..\src\LlamaCpp.ChatCompletion.pas',
87 | LlamaCpp.Exception in '..\src\LlamaCpp.Exception.pas',
88 | LlamaCpp.Common.Chat.Formatter.Llama3 in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Llama3.pas',
89 | LlamaCpp.Common.Chat.Formatter.Alpaca in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Alpaca.pas',
90 | LlamaCpp.Common.Chat.Formatter.Qwen in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Qwen.pas',
91 | LlamaCpp.Common.Chat.Formatter.Vicuna in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Vicuna.pas',
92 | LlamaCpp.Common.Chat.Formatter.OasstLlama in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.OasstLlama.pas',
93 | LlamaCpp.Common.Chat.Formatter.Baichuan2 in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Baichuan2.pas',
94 | LlamaCpp.Common.Chat.Formatter.Baichuan in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Baichuan.pas',
95 | LlamaCpp.Common.Chat.Formatter.OpenBuddy in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.OpenBuddy.pas',
96 | LlamaCpp.Common.Chat.Formatter.RedpajamaIncite in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.RedpajamaIncite.pas',
97 | LlamaCpp.Common.Chat.Formatter.Snoozy in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Snoozy.pas',
98 | LlamaCpp.Common.Chat.Formatter.Phind in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Phind.pas',
99 | LlamaCpp.Common.Chat.Formatter.Intel in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Intel.pas',
100 | LlamaCpp.Common.Chat.Formatter.OpenOrca in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.OpenOrca.pas',
101 | LlamaCpp.Common.Chat.Formatter.MilstralLite in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.MilstralLite.pas',
102 | LlamaCpp.Common.Chat.Formatter.Zephyr in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Zephyr.pas',
103 | LlamaCpp.Common.Chat.Formatter.Pygmalion in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Pygmalion.pas',
104 | LlamaCpp.Common.Chat.Formatter.Chatml in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Chatml.pas',
105 | LlamaCpp.Common.Chat.Formatter.MistralInstruct in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.MistralInstruct.pas',
106 | LlamaCpp.Common.Chat.Formatter.ChatGLM3 in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.ChatGLM3.pas',
107 | LlamaCpp.Common.Chat.Formatter.OpenChat in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.OpenChat.pas',
108 | LlamaCpp.Common.Chat.Formatter.Saiga in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Saiga.pas',
109 | LlamaCpp.Common.Chat.Formatter.Gemma in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Gemma.pas',
110 | LlamaCpp.Download in '..\src\LlamaCpp.Download.pas',
111 | LlamaCpp.Common.Chat.Formatter.Jinja2 in '..\src\Common\Chat\Formatter\LlamaCpp.Common.Chat.Formatter.Jinja2.pas';
112 |
113 | end.
114 |
115 |
116 |
--------------------------------------------------------------------------------
/packages/LlamaCppBindings.groupproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | {4393AAEC-1CA8-4DFA-82FE-C3984AE5D33A}
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | Default.Personality.12
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/packages/dclLlamaCpp.dpk:
--------------------------------------------------------------------------------
1 | package dclLlamaCpp;
2 |
3 | {$R *.res}
4 | {$R *.dres}
5 | {$IFDEF IMPLICITBUILDING This IFDEF should not be used by users}
6 | {$ALIGN 8}
7 | {$ASSERTIONS ON}
8 | {$BOOLEVAL OFF}
9 | {$DEBUGINFO OFF}
10 | {$EXTENDEDSYNTAX ON}
11 | {$IMPORTEDDATA ON}
12 | {$IOCHECKS ON}
13 | {$LOCALSYMBOLS ON}
14 | {$LONGSTRINGS ON}
15 | {$OPENSTRINGS ON}
16 | {$OPTIMIZATION OFF}
17 | {$OVERFLOWCHECKS ON}
18 | {$RANGECHECKS ON}
19 | {$REFERENCEINFO ON}
20 | {$SAFEDIVIDE OFF}
21 | {$STACKFRAMES ON}
22 | {$TYPEDADDRESS OFF}
23 | {$VARSTRINGCHECKS ON}
24 | {$WRITEABLECONST OFF}
25 | {$MINENUMSIZE 1}
26 | {$IMAGEBASE $400000}
27 | {$DEFINE DEBUG}
28 | {$ENDIF IMPLICITBUILDING}
29 | {$DESCRIPTION 'Delphi bindings for Llama.cpp'}
30 | {$LIBSUFFIX AUTO}
31 | {$DESIGNONLY}
32 | {$IMPLICITBUILD ON}
33 |
34 | requires
35 | rtl,
36 | llamacpp;
37 |
38 | contains
39 | LlamaCpp.Registration in '..\src\LlamaCpp.Registration.pas';
40 |
41 | end.
42 |
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/DownloadForm.fmx:
--------------------------------------------------------------------------------
1 | object FormDownload: TFormDownload
2 | Left = 0
3 | Top = 0
4 | Caption = 'Clone and Load Model'
5 | ClientHeight = 280
6 | ClientWidth = 400
7 | Position = MainFormCenter
8 | Constraints.MaxHeight = 280.000000000000000000
9 | Constraints.MaxWidth = 400.000000000000000000
10 | Constraints.MinHeight = 280.000000000000000000
11 | Constraints.MinWidth = 400.000000000000000000
12 | FormFactor.Width = 320
13 | FormFactor.Height = 480
14 | FormFactor.Devices = [Desktop]
15 | DesignerMasterStyle = 0
16 | object memoDownload: TMemo
17 | Touch.InteractiveGestures = [Pan, LongTap, DoubleTap]
18 | DataDetectorTypes = []
19 | ReadOnly = True
20 | Align = Client
21 | Size.Width = 400.000000000000000000
22 | Size.Height = 280.000000000000000000
23 | Size.PlatformDefault = False
24 | TabOrder = 0
25 | Viewport.Width = 400.000000000000000000
26 | Viewport.Height = 280.000000000000000000
27 | end
28 | object LlamaDownload1: TLlamaDownload
29 | OnWriteData = LlamaDownload1WriteData
30 | Left = 184
31 | Top = 120
32 | end
33 | end
34 |
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/DownloadForm.pas:
--------------------------------------------------------------------------------
1 | unit DownloadForm;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils, System.Types, System.UITypes, System.Classes, System.Variants,
7 | FMX.Types, FMX.Controls, FMX.Forms, FMX.Graphics, FMX.Dialogs, FMX.Memo.Types,
8 | FMX.Controls.Presentation, FMX.ScrollBox, FMX.Memo, LlamaCpp.Llama, LlamaCpp.Download;
9 |
10 | type
11 | TFormDownload = class(TForm)
12 | memoDownload: TMemo;
13 | LlamaDownload1: TLlamaDownload;
14 | procedure LlamaDownload1WriteData(Sender: TObject; const AText: string);
15 |
16 | procedure Download(const ALlama: TLlama; const ATask: TFunc);
17 | public
18 | // HF Auth
19 | procedure HFAuth(const AUserName, AToken: string);
20 |
21 | procedure DownloadAndPrepareLlama2(const ALlama: TLlama);
22 | procedure DownloadAndPrepareLlama3(const ALlama: TLlama);
23 | procedure DownloadAndPrepareMistralLite(const ALlama: TLlama);
24 | procedure DownloadAndPrepareTinyLlama(const ALlama: TLlama);
25 | end;
26 |
27 | var
28 | FormDownload: TFormDownload;
29 |
30 | implementation
31 |
32 | uses
33 | System.Threading;
34 |
35 | {$R *.fmx}
36 |
37 | { TFormDownload }
38 |
39 | procedure TFormDownload.Download(const ALlama: TLlama;
40 | const ATask: TFunc);
41 | begin
42 | memoDownload.Lines.Add(
43 | 'Checking your local copy. It may take a while...'
44 | + sLineBreak + sLineBreak);
45 |
46 | TTask.Run(procedure() begin
47 | try
48 | ALlama.ModelPath := ATask;
49 |
50 | TThread.Queue(nil, procedure() begin
51 | memoDownload.Lines.Add('Loading...');
52 | end);
53 |
54 | ALlama.Init();
55 |
56 | TThread.Queue(nil, procedure() begin
57 | memoDownload.Lines.Add(String.Empty);
58 | memoDownload.Lines.Add('All done!');
59 | end);
60 |
61 | TThread.ForceQueue(nil, procedure() begin
62 | Self.Close();
63 | end, 500);
64 | except
65 | on E: Exception do
66 | Application.ShowException(E);
67 | end;
68 | end);
69 |
70 | Self.ShowModal();
71 | end;
72 |
73 | procedure TFormDownload.DownloadAndPrepareLlama2(const ALlama: TLlama);
74 | begin
75 | ALlama.Settings.ChatFormat := 'llama-2';
76 |
77 | Download(ALlama, function(): string begin
78 | Result := LlamaDownload1.DownloadLlama2_Chat_7B()[0];
79 | end);
80 | end;
81 |
82 | procedure TFormDownload.DownloadAndPrepareLlama3(const ALlama: TLlama);
83 | begin
84 | ALlama.Settings.ChatFormat := 'llama-3';
85 |
86 | Download(ALlama, function(): string begin
87 | Result := LlamaDownload1.DownloadLlama3_Chat_30B()[0];
88 | end);
89 | end;
90 |
91 | procedure TFormDownload.DownloadAndPrepareMistralLite(
92 | const ALlama: TLlama);
93 | begin
94 | ALlama.Settings.ChatFormat := 'mistrallite';
95 |
96 | Download(ALlama, function(): string begin
97 | Result := LlamaDownload1.DownloadMistrallite_7B()[0];
98 | end);
99 | end;
100 |
101 | procedure TFormDownload.LlamaDownload1WriteData(Sender: TObject;
102 | const AText: string);
103 | begin
104 | TThread.Queue(nil, procedure() begin
105 | MemoDownload.Lines.Text := MemoDownload.Lines.Text + AText;
106 | end);
107 | end;
108 |
109 | procedure TFormDownload.DownloadAndPrepareTinyLlama(const ALlama: TLlama);
110 | begin
111 | ALlama.Settings.ChatFormat := 'zephyr';
112 |
113 | Download(ALlama, function(): string begin
114 | Result := LlamaDownload1.DownloadTinyLlama_1_1B()[0];
115 | end);
116 | end;
117 |
118 | procedure TFormDownload.HFAuth(const AUserName, AToken: string);
119 | begin
120 | LLamaDownload1.HuggingFace.UserName := AUserName;
121 | LLamaDownload1.HuggingFace.Token := AToken;
122 | end;
123 |
124 | end.
125 |
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/Entitlement.TemplateOSX.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | <%appSandboxKeys%>
6 |
7 |
8 |
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/README.md:
--------------------------------------------------------------------------------
1 | # 🚀 SimpleChatWithDownload
2 |
3 | Experience the power of local inference! This app runs a Large Language Model (LLM) entirely on your machine, meaning no internet or external API calls are needed for predictions. By leveraging GPU (on Mac) or CPU (on Windows) for computation, you get a secure and self-contained AI experience tailored to your hardware setup. 🎉
4 |
5 | **SimpleChatWithDownload** is an exciting sample project from the **llama-cpp-delphi** bindings. This app provides a streamlined way to interact with a local LLM (Large Language Model) in a sleek chat interface, featuring automatic model downloads. Whether you’re using Mac Silicon for blazing-fast GPU inference or Windows for a **SLOW** CPU inference, this sample is a great way to get started! 🎉
6 |
7 |
8 |
9 | https://github.com/user-attachments/assets/16582374-4c12-43bd-aff8-6c4ad4f41339
10 |
11 |
12 |
13 | ## 🌟 Features
14 |
15 | - **Interactive Chat Window**: Start chatting with your local LLM in seconds!
16 | - **Automatic Model Downloads**: Download models like **Llama-2**, **Llama-3**, and **Mistral Lite** effortlessly. 🚀
17 | - Models are cloned via Git and downloaded to your system’s default download folder.
18 | - **Platform Support**:
19 | - 🖥️ **Mac Silicon**: GPU (MPS) and CPU inference supported.
20 | - 💻 **Windows**: CPU inference only. Feel free to extend it and test CUDA.
21 | - ⚡ GPU inference is recommended for Mac to avoid slower CPU performance.
22 | - **Pre-Bundled Llama.cpp Libraries**: No extra setup! All required libraries are included in the `lib` folder for easy deployment.
23 | - **Customizable Settings**:
24 | - Choose your model.
25 | - Switch between GPU and CPU inference on Mac.
26 | - Enable/disable seed settings to control response variability.
27 |
28 | ## 🛠️ Getting Started
29 |
30 | ### Note
31 |
32 | You must have Git installed on your machine to clone model repositories.
33 |
34 | ### Prerequisites
35 |
36 | 1. Ensure you have the **llama-cpp-delphi** project ready. If not, grab it from the repository.
37 | 2. A **Delphi IDE** installation.
38 | 3. For Mac deployment, make sure **PAServer** is running on your Mac.
39 |
40 | ### Steps to Run
41 |
42 | 1. **Build llama-cpp-delphi**:
43 | - Open the llama-cpp-delphi project in Delphi IDE.
44 | - Build it for **Windows** and **Mac Silicon**.
45 |
46 | 2. **Open and Build the Sample**:
47 | - Open the `SimpleChatWithDownload` sample in Delphi IDE.
48 | - Build it for your target platform:
49 | - **Mac Silicon**: Recommended for GPU inference.
50 | - **Windows**: CPU inference only.
51 |
52 | 3. **Deploy to Mac**:
53 | - Connect to your Mac using **PAServer**.
54 | - Deploy the app to your Mac. 🎉
55 |
56 | 4. **Run the App**:
57 | - The app will launch with a "Settings" menu where you can:
58 | - Select your model (Llama-2, Llama-3, Mistral Lite).
59 | - Choose GPU or CPU inference (Mac only).
60 | - Enable/disable seed randomness.
61 |
62 | ### Download and Use Models
63 |
64 | - Click the **hamburger menu** to start downloading the selected model.
65 | - Supported Models:
66 | - **Llama-2**: ~4 GB (7B.Q4_K_M).
67 | - **Llama-3**: ~5 GB (30B.Q4_K_M).
68 | - **Mistral Lite**: ~7 GB (7B.Q4_K_M).
69 | - 🔧 You can also use any GGUF-compatible models with Llama.cpp.
70 | - 💡 Feel free to test **DeepSeek** locally for additional insights and functionality!
71 |
72 | - After the model download is complete, the chat window will activate.
73 |
74 | ## 💡 Usage Tips
75 |
76 | - **Start Chatting**:
77 | - Type your message in the chat box and press **Enter** or click the **Play** button.
78 | - Use the **Stop** button to pause responses.
79 |
80 | - **Customize Inference**:
81 | - Mac users: Switch between GPU (fast) and CPU (fallback) modes via the "Settings" menu.
82 | - Windows users: For better performance, explore CUDA builds in the llama-cpp-delphi "Release" section. 💪
83 |
84 | - **Seed Option**:
85 | - Prevent repetitive responses for the same questions by enabling the seed setting.
86 |
87 | ## 📁 Libraries
88 |
89 | All required libraries are bundled in the `lib` folder of the sample’s root directory:
90 | - **Mac**: Deployment is pre-configured. Deploy via PAServer, and you’re good to go!
91 | - **Windows**: The app automatically loads libraries from the `lib` folder.
92 |
93 | For additional builds (e.g., CUDA versions), visit the llama-cpp-delphi "Release" section.
94 |
95 | ## 🌟 Final Notes
96 |
97 | Enjoy chatting with cutting-edge LLMs in your own app! If you run into any issues or have feedback, feel free to contribute or reach out. Happy coding! 🚀
98 |
99 |
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/SimpleChatWithDownload.dpr:
--------------------------------------------------------------------------------
1 | program SimpleChatWithDownload;
2 |
3 | uses
4 | System.StartUpCopy,
5 | FMX.Forms,
6 | MainForm in 'MainForm.pas' {FormMain},
7 | DownloadForm in 'DownloadForm.pas' {FormDownload};
8 |
9 | {$R *.res}
10 |
11 | begin
12 | Application.Initialize;
13 | Application.CreateForm(TFormMain, FormMain);
14 | Application.Run;
15 | end.
16 |
17 |
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-base.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-base.dylib
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-blas.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-blas.dylib
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-cpu.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-cpu.dylib
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-metal.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-metal.dylib
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-rpc.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml-rpc.dylib
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/macos_arm64/libggml.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libggml.dylib
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/macos_arm64/libllama.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libllama.dylib
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/macos_arm64/libllava_shared.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/macos_arm64/libllava_shared.dylib
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/windows_x64/ggml-base.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/ggml-base.dll
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/windows_x64/ggml-cpu.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/ggml-cpu.dll
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/windows_x64/ggml-rpc.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/ggml-rpc.dll
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/windows_x64/ggml.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/ggml.dll
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/windows_x64/llama.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/llama.dll
--------------------------------------------------------------------------------
/samples/SimpleChatWithDownload/lib/windows_x64/llava_shared.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Embarcadero/llama-cpp-delphi/9a25fe52d89ec0e4e2a68a32681ac3b6d4b760cf/samples/SimpleChatWithDownload/lib/windows_x64/llava_shared.dll
--------------------------------------------------------------------------------
/src/Api/LlamaCpp.Api.Ggml.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Api.Ggml;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | LlamaCpp.Api;
8 |
9 | type
10 | TGgmlApiAccess = class(TLlamaCppLibraryLoader)
11 | protected
12 | procedure DoLoadLibrary(const ALibAddr: THandle); override;
13 | end;
14 |
15 | TGgmlApi = class(TGgmlApiAccess)
16 | private
17 | class var FInstance: TGgmlApi;
18 | public
19 | class constructor Create();
20 | class destructor Destroy();
21 |
22 | class property Instance: TGgmlApi read FInstance;
23 | end;
24 |
25 | implementation
26 |
27 | { TGgmlApiAccess }
28 |
29 | procedure TGgmlApiAccess.DoLoadLibrary(const ALibAddr: THandle);
30 | begin
31 | inherited;
32 | //
33 | end;
34 |
35 | { TGgmlApi }
36 |
37 | class constructor TGgmlApi.Create;
38 | begin
39 | FInstance := TGgmlApi.Create();
40 | end;
41 |
42 | class destructor TGgmlApi.Destroy;
43 | begin
44 | FInstance.Free();
45 | end;
46 |
47 | end.
48 |
--------------------------------------------------------------------------------
/src/Api/LlamaCpp.Api.Llava.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Api.Llava;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | LlamaCpp.Api,
8 | LlamaCpp.CType.Llava,
9 | LlamaCpp.CType.Llama;
10 |
11 | type
12 | TLlavaApiAccess = class(TLlamaCppLibraryLoader)
13 | public type
14 | TLLavaValidateEmbedSize = function(const ALlamaContext: PLlamaContext;
15 | const AClipContext: PClipCtx): Boolean; cdecl;
16 | TLLavaImageEmbedMakeWithClipImg = function(AClipContext: PClipCtx;
17 | AThreadCount: Integer; const AImage: PClipImageU8;
18 | var AImageEmbedOut: PSingle; var AImagePosOut: Integer): Boolean; cdecl;
19 | TLLavaImageEmbedMakeWithBytes = function(AClipContext: PClipCtx;
20 | AThreadCount: Integer; const AImageBytes: PByte;
21 | AImageBytesLength: Integer): PLlavaImageEmbed; cdecl;
22 | TLLavaImageEmbedMakeWithFilename = function(AClipContext: PClipCtx;
23 | AThreadCount: Integer; const AImagePath: PAnsiChar)
24 | : PLlavaImageEmbed; cdecl;
25 | TLLavaImageEmbedFree = procedure(AImageEmbed: PLlavaImageEmbed); cdecl;
26 | TLLavaEvalImageEmbed = function(ALlamaContext: PLlamaContext;
27 | const AImageEmbed: PLlavaImageEmbed; ABatchSize: Integer;
28 | var APastPos: Integer): Boolean; cdecl;
29 | TClipModelLoad = function(const AFileName: PAnsiChar; AVerbosity: Integer)
30 | : PClipCtx; cdecl;
31 | TClipFree = procedure(AClipContext: PClipCtx); cdecl;
32 | protected
33 | procedure DoLoadLibrary(const ALibAddr: THandle); override;
34 | public
35 | llava_validate_embed_size: TLLavaValidateEmbedSize;
36 | llava_image_embed_make_with_clip_img: TLLavaImageEmbedMakeWithClipImg;
37 | llava_image_embed_make_with_bytes: TLLavaImageEmbedMakeWithBytes;
38 | llava_image_embed_make_with_filename: TLLavaImageEmbedMakeWithFilename;
39 | llava_image_embed_free: TLLavaImageEmbedFree;
40 | llava_eval_image_embed: TLLavaEvalImageEmbed;
41 | clip_model_load: TClipModelLoad;
42 | clip_free: TClipFree;
43 | end;
44 |
45 | TLlavaApi = class(TLlavaApiAccess)
46 | private
47 | class var FInstance: TLlavaApi;
48 | public
49 | class constructor Create();
50 | class destructor Destroy();
51 |
52 | class property Instance: TLlavaApi read FInstance;
53 | end;
54 |
55 | implementation
56 |
57 | { TLlavaApiAccess }
58 |
59 | procedure TLlavaApiAccess.DoLoadLibrary(const ALibAddr: THandle);
60 | begin
61 | inherited;
62 | @llava_validate_embed_size := GetProcAddress(ALibAddr,
63 | 'llava_validate_embed_size');
64 | @llava_image_embed_make_with_clip_img :=
65 | GetProcAddress(ALibAddr, 'llava_image_embed_make_with_clip_img');
66 | @llava_image_embed_make_with_bytes := GetProcAddress(ALibAddr,
67 | 'llava_image_embed_make_with_bytes');
68 | @llava_image_embed_make_with_filename :=
69 | GetProcAddress(ALibAddr, 'llava_image_embed_make_with_filename');
70 | @llava_image_embed_free := GetProcAddress(ALibAddr, 'llava_image_embed_free');
71 | @llava_eval_image_embed := GetProcAddress(ALibAddr, 'llava_eval_image_embed');
72 | @clip_model_load := GetProcAddress(ALibAddr, 'clip_model_load');
73 | @clip_free := GetProcAddress(ALibAddr, 'clip_free');
74 | end;
75 |
76 | { TLlavaApi }
77 |
78 | class constructor TLlavaApi.Create;
79 | begin
80 | FInstance := TLlavaAPI.Create();
81 | end;
82 |
83 | class destructor TLlavaApi.Destroy;
84 | begin
85 | FInstance.Free();
86 | end;
87 |
88 | end.
89 |
--------------------------------------------------------------------------------
/src/Api/LlamaCpp.Api.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Api;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils
7 | {$IFDEF MSWINDOWS}
8 | , Winapi.Windows
9 | {$ENDIF MSWINDOWS};
10 |
11 | type
12 | TLlamaCppLibraryLoader = class
13 | strict private
14 | FLibAddr: THandle;
15 | protected
16 | function GetProcAddress(const AHandle: THandle;
17 | const AProcName: string): pointer;
18 | procedure DoLoadLibrary(const ALibAddr: THandle); virtual; abstract;
19 | public
20 | procedure Load(const ALibraryPath: string);
21 | procedure Unload();
22 | end;
23 |
24 | TLlamaCppApis = class
25 | public
26 | class procedure LoadAll(ALibDir: string = '');
27 | class procedure UnloadAll();
28 | end;
29 |
30 | implementation
31 |
32 | uses
33 | System.IOUtils,
34 | LlamaCpp.Api.Ggml,
35 | LlamaCpp.Api.Llava,
36 | LlamaCpp.Api.Llama;
37 |
38 | { TLlamaCppLibraryLoader }
39 |
40 | procedure TLlamaCppLibraryLoader.Load(const ALibraryPath: string);
41 | begin
42 | if not TFile.Exists(ALibraryPath) then
43 | raise Exception.CreateFmt('Library "%s" not found.', [ALibraryPath]);
44 |
45 | {$IFDEF MSWINDOWS}
46 | FLibAddr := Winapi.Windows.LoadLibrary(PWideChar(WideString(ALibraryPath)));
47 | {$ELSE}
48 | FLibAddr := System.SysUtils.LoadLibrary(PWideChar(WideString(ALibraryPath)));
49 | {$ENDIF MSWINDOWS}
50 | if FLibAddr = 0 then
51 | raise Exception.CreateFmt('Unable to load llama library. %s', [SysErrorMessage(GetLastError)]);
52 | DoLoadLibrary(FLibAddr);
53 | end;
54 |
55 | procedure TLlamaCppLibraryLoader.Unload;
56 | begin
57 | {$IFDEF MSWINDOWS}
58 | Winapi.Windows.FreeLibrary(FLibAddr);
59 | {$ELSE}
60 | System.SysUtils.FreeLibrary(FLibAddr);
61 | {$ENDIF MSWINDOWS}
62 | end;
63 |
64 | function TLlamaCppLibraryLoader.GetProcAddress(const AHandle: THandle;
65 | const AProcName: string): pointer;
66 | begin
67 | {$IFDEF MSWINDOWS}
68 | Result := Winapi.Windows.GetProcAddress(AHandle,
69 | PWideChar(WideString(AProcName)));
70 | {$ELSE}
71 | Result := System.SysUtils.GetProcAddress(AHandle,
72 | PWideChar(WideString(AProcName)));
73 | {$ENDIF MSWINDOWS}
74 | end;
75 |
76 | { TLlamaCppApis }
77 |
78 | class procedure TLlamaCppApis.LoadAll(ALibDir: string);
79 | const
80 | {$IFDEF MSWINDOWS}
81 | LIB_LLAMA = 'llama.dll';
82 | LIB_GGML = 'ggml.dll';
83 | LIB_LAVA = 'llava_shared.dll';
84 | {$ELSEIF DEFINED(OSX64)}
85 | LIB_LLAMA = 'libllama.dylib';
86 | LIB_GGML = 'libggml.dylib';
87 | LIB_LAVA = 'libllava_shared.dylib';
88 | {$ELSE}
89 | LIB_LLAMA = 'libllama.so';
90 | LIB_GGML = 'libggml.so';
91 | LIB_LAVA = 'libllava_shared.so';
92 | {$ENDIF MSWINDOWS}
93 | begin
94 | if ALibDir.IsEmpty() then
95 | ALibDir := TPath.Combine(
96 | PWideChar(WideString(TPath.GetDirectoryName(ParamStr(0)))),
97 | 'llamacpp');
98 | {$IFDEF MSWINDOWS}
99 | SetDllDirectory(PWideChar(WideString(ALibDir)));
100 | {$ENDIF MSWINDOWS}
101 | TLlamaApi.Instance.Load(TPath.Combine(ALibDir, LIB_LLAMA));
102 | TGgmlApi.Instance.Load(TPath.Combine(ALibDir, LIB_GGML));
103 | TLlavaApi.Instance.Load(TPath.Combine(ALibDir, LIB_LAVA));
104 | end;
105 |
106 | class procedure TLlamaCppApis.UnloadAll;
107 | begin
108 | TLlamaApi.Instance.Unload();
109 | TLlavaApi.Instance.Unload();
110 | TGgmlApi.Instance.Unload();
111 | end;
112 |
113 | end.
114 |
--------------------------------------------------------------------------------
/src/CType/Ggml/LlamaCpp.CType.Ggml.Backend.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.CType.Ggml.Backend;
2 |
3 | interface
4 |
5 | type
6 | // Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
7 | // when ask == true, the scheduler wants to know if the user wants to observe this node
8 | // this allows the scheduler to batch nodes together in order to evaluate them in a single call
9 | //
10 | // when ask == false, the scheduler is passing the node tensor to the user for observation
11 | // if the user returns false, the scheduler will cancel the graph compute
12 | //
13 | // typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
14 | TGgmlBackendSchedEvalCallback = function(const AGgmlTensor: pointer;
15 | const AAsk: boolean; const AUserData: pointer): boolean; cdecl;
16 |
17 | implementation
18 |
19 | end.
20 |
--------------------------------------------------------------------------------
/src/CType/Ggml/LlamaCpp.CType.Ggml.Cpu.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.CType.Ggml.Cpu;
2 |
3 | interface
4 |
5 | type
6 | {$MINENUMSIZE 4}
7 | TGGMLNumaStrategy = (
8 | GGML_NUMA_STRATEGY_DISABLED = 0,
9 | GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
10 | GGML_NUMA_STRATEGY_ISOLATE = 2,
11 | GGML_NUMA_STRATEGY_NUMACTL = 3,
12 | GGML_NUMA_STRATEGY_MIRROR = 4,
13 | GGML_NUMA_STRATEGY_COUNT
14 | );
15 | {$MINENUMSIZE 1}
16 |
17 | implementation
18 |
19 | end.
20 |
--------------------------------------------------------------------------------
/src/CType/Ggml/LlamaCpp.CType.Ggml.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.CType.Ggml;
2 |
3 | interface
4 |
5 | type
6 | TGgml = class
7 | public const
8 | GGML_ROPE_TYPE_NEOX = 2;
9 | end;
10 |
11 | {$MINENUMSIZE 4}
12 | TGGMLType = (GGML_TYPE_F32 = 0, GGML_TYPE_F16 = 1, GGML_TYPE_Q4_0 = 2,
13 | GGML_TYPE_Q4_1 = 3,
14 | // GGML_TYPE_Q4_2 = 4, support has been removed
15 | // GGML_TYPE_Q4_3 = 5, support has been removed
16 | GGML_TYPE_Q5_0 = 6, GGML_TYPE_Q5_1 = 7, GGML_TYPE_Q8_0 = 8,
17 | GGML_TYPE_Q8_1 = 9, GGML_TYPE_Q2_K = 10, GGML_TYPE_Q3_K = 11,
18 | GGML_TYPE_Q4_K = 12, GGML_TYPE_Q5_K = 13, GGML_TYPE_Q6_K = 14,
19 | GGML_TYPE_Q8_K = 15, GGML_TYPE_IQ2_XXS = 16, GGML_TYPE_IQ2_XS = 17,
20 | GGML_TYPE_IQ3_XXS = 18, GGML_TYPE_IQ1_S = 19, GGML_TYPE_IQ4_NL = 20,
21 | GGML_TYPE_IQ3_S = 21, GGML_TYPE_IQ2_S = 22, GGML_TYPE_IQ4_XS = 23,
22 | GGML_TYPE_I8 = 24, GGML_TYPE_I16 = 25, GGML_TYPE_I32 = 26,
23 | GGML_TYPE_I64 = 27, GGML_TYPE_F64 = 28, GGML_TYPE_IQ1_M = 29,
24 | GGML_TYPE_BF16 = 30, GGML_TYPE_Q4_0_4_4 = 31, GGML_TYPE_Q4_0_4_8 = 32,
25 | GGML_TYPE_Q4_0_8_8 = 33, GGML_TYPE_TQ1_0 = 34, GGML_TYPE_TQ2_0 = 35,
26 | GGML_TYPE_COUNT = Integer(36) // Number of types (excluding the commented ones)
27 | );
28 |
29 | TGgmlLogLevel = (GGML_LOG_LEVEL_NONE = 0, GGML_LOG_LEVEL_DEBUG = 1,
30 | GGML_LOG_LEVEL_INFO = 2, GGML_LOG_LEVEL_WARN = 3, GGML_LOG_LEVEL_ERROR = 4,
31 | GGML_LOG_LEVEL_CONT = 5);
32 | {$MINENUMSIZE 1}
33 |
34 | // Abort callback
35 | // If not NULL, called before ggml computation
36 | // If it returns true, the computation is aborted
37 | // typedef bool (*ggml_abort_callback)(void * data);
38 | TGgmlAbortCallback = function(const AData: pointer): boolean; cdecl;
39 |
40 | PGgmlTensor = ^TGgmlTensor;
41 | TGgmlTensor = NativeUInt;
42 |
43 | TGGMLLogCallback = procedure(level: TGgmlLogLevel; const text: PAnsiChar;
44 | user_data: pointer); cdecl;
45 |
46 | implementation
47 |
48 | end.
49 |
--------------------------------------------------------------------------------
/src/CType/Llava/LlamaCpp.CType.Llava.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.CType.Llava;
2 |
3 | interface
4 |
5 | type
6 | PLlavaImageEmbed = ^TLLavaImageEmbed;
7 | TLlavaImageEmbed = record
8 | embed: PSingle; // Pointer to a float array (Single type in Delphi)
9 | n_image_pos: Int32;
10 | end;
11 |
12 | // The struct clip_ctx is an opaque type, so we represent it as a pointer in Delphi.
13 | PClipCtx = ^TClipCtx;
14 | TClipCtx = NativeUInt;
15 |
16 | PClipImageU8 = ^TClipImageU8;
17 | TClipImageU8 = record
18 | nx: Integer;
19 | ny: Integer;
20 | buf: TArray; // This is the equivalent of std::vector
21 | end;
22 |
23 | implementation
24 |
25 | end.
26 |
--------------------------------------------------------------------------------
/src/Common/Cache/LlamaCpp.Common.Cache.Base.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Cache.Base;
2 |
3 | interface
4 |
5 | uses
6 | LlamaCpp.Common.Types,
7 | LlamaCpp.Common.State;
8 |
9 | type
10 | TBaseLlamaCache = class(TInterfacedObject, ILlamaCache)
11 | public
12 | CapacityBytes: Int64;
13 | protected
14 | function GetCacheSize: Int64; virtual; abstract;
15 | function FindLongestPrefixKey(const AKey: TArray): TArray; virtual; abstract;
16 | function GetItem(const AKey: TArray): TLlamaState; virtual; abstract;
17 | function Contains(const AKey: TArray): Boolean; virtual; abstract;
18 | procedure SetItem(const AKey: TArray; const AValue: TLlamaState); virtual; abstract;
19 | protected
20 | function LongestTokenPrefix(const A, B: TArray): integer;
21 | public
22 | constructor Create(ACapacityBytes: Int64);
23 | end;
24 |
25 | implementation
26 |
27 | uses
28 | System.Math;
29 |
30 | { TBaseLlamaCache }
31 |
32 | constructor TBaseLlamaCache.Create(ACapacityBytes: Int64);
33 | begin
34 | inherited Create;
35 | CapacityBytes := ACapacityBytes;
36 | end;
37 |
38 | function TBaseLlamaCache.LongestTokenPrefix(const A,
39 | B: TArray): integer;
40 | var
41 | I: Integer;
42 | begin
43 | Result := 0;
44 | for I := 0 to Min(Length(A), Length(B)) - 1 do
45 | if A[I] = B[I] then
46 | Inc(Result)
47 | else
48 | Break;
49 | end;
50 |
51 | end.
52 |
--------------------------------------------------------------------------------
/src/Common/Cache/LlamaCpp.Common.Cache.Disk.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Cache.Disk;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Classes,
8 | System.Threading,
9 | System.Generics.Collections,
10 | System.IOUtils,
11 | FireDAC.Comp.Client,
12 | FireDAC.Phys.SQLite,
13 | LlamaCpp.Common.Types,
14 | LlamaCpp.Common.State,
15 | LlamaCpp.Common.Cache.Base;
16 |
17 | type
18 | TLlamaDiskCache = class(TBaseLlamaCache)
19 | private const
20 | {$IFDEF MSWINDOWS}
21 | DEFAULT_CACHE_DIR = '.\cache\llama_cache';
22 | {$ELSE}
23 | DEFAULT_CACHE_DIR = './cache/llama_cache';
24 | {$ENDIF}
25 | private
26 | FCacheFileName: string;
27 | FConnection: TFDConnection;
28 | FDatS: TFDQuery;
29 | FTask: ITask;
30 | private
31 | procedure CreateCacheConnectionDefs();
32 | procedure CreateCacheTable();
33 | function Load(const AKey: TArray): TLlamaState;
34 | procedure Save(const AKey: TArray; const AState: TLlamaState);
35 | procedure Delete(const AKey: TArray);
36 | public
37 | constructor Create(const ACacheDir: string = DEFAULT_CACHE_DIR;
38 | ACapacityBytes: Int64 = Int64(2) shl 30);
39 | destructor Destroy; override;
40 |
41 | function GetCacheSize: Int64; override;
42 | function FindLongestPrefixKey(const AKey: TArray): TArray; override;
43 | function GetItem(const AKey: TArray): TLlamaState; override;
44 | function Contains(const AKey: TArray): Boolean; override;
45 | procedure SetItem(const AKey: TArray; const AValue: TLlamaState); override;
46 | end;
47 |
48 | implementation
49 |
50 | uses
51 | FireDAC.Stan.Intf, FireDAC.Stan.Option,
52 | FireDAC.Stan.Error, FireDAC.UI.Intf, FireDAC.Phys.Intf, FireDAC.Stan.Def,
53 | FireDAC.Stan.Pool, FireDAC.Stan.Async, FireDAC.Phys,
54 | FireDAC.Stan.ExprFuncs, FireDAC.Phys.SQLiteWrapper, FireDAC.Phys.SQLiteWrapper.Stat,
55 | FireDAC.Phys.SQLiteDef, FireDAC.Stan.Param, FireDAC.DatS, FireDAC.DApt.Intf,
56 | FireDAC.DApt, Data.DB, FireDAC.Comp.DataSet,
57 | {$IFDEF MSWINDOWS}
58 | Windows
59 | {$ELSE}
60 | Posix.Unistd
61 | {$ENDIF}
62 | ;
63 |
64 | type
65 | TCachePair = TPair, TLlamaState>;
66 | TCachePairs = TArray;
67 |
68 | { TLlamaDiskCache }
69 |
70 | constructor TLlamaDiskCache.Create(const ACacheDir: string; ACapacityBytes: Int64);
71 | var
72 | LStr: string;
73 | begin
74 | inherited Create(ACapacityBytes);
75 |
76 | if TDirectory.Exists(ACacheDir) then
77 | begin
78 | for LStr in TDirectory.GetFiles(ACacheDir, '*', TSearchOption.soAllDirectories) do
79 | try
80 | TFile.Delete(LStr); // Delete files not in use
81 | except
82 | //
83 | end;
84 |
85 | for LStr in TDirectory.GetDirectories(ACacheDir) do
86 | try
87 | TDirectory.Delete(LStr, true); // Delete files not in use
88 | except
89 | //
90 | end;
91 | end;
92 |
93 | {$IFDEF MSWINDOWS}
94 | FCacheFileName := TPath.Combine(
95 | TPath.GetFullPath(ACacheDir),
96 | GetCurrentProcessId().ToString());
97 | {$ELSE}
98 | FCacheFileName := TPath.Combine(
99 | TPath.GetFullPath(ACacheDir),
100 | GetPID().ToString());
101 | {$ENDIF}
102 |
103 | FCacheFileName := TPath.Combine(
104 | FCacheFileName,
105 | TThread.CurrentThread.ThreadID.ToString());
106 |
107 | FCacheFileName := TPath.Combine(FCacheFileName, 'cache.db');
108 |
109 | if not TDirectory.Exists(TPath.GetDirectoryName(FCacheFileName)) then
110 | TDirectory.CreateDirectory(TPath.GetDirectoryName(FCacheFileName));
111 |
112 | FConnection := TFDConnection.Create(nil);
113 | FDatS := TFDQuery.Create(FConnection);
114 | FDatS.Connection := FConnection;
115 |
116 | CreateCacheConnectionDefs();
117 | CreateCacheTable();
118 | end;
119 |
120 | destructor TLlamaDiskCache.Destroy;
121 | begin
122 | if Assigned(FTask) then
123 | FTask.Wait();
124 |
125 | FConnection.Free();
126 | inherited;
127 | end;
128 |
129 | procedure TLlamaDiskCache.CreateCacheConnectionDefs;
130 | begin
131 | FConnection.Params.Values['database'] := FCacheFileName;
132 | FConnection.LoginPrompt := False;
133 | FConnection.DriverName := 'SQLite';
134 | FConnection.Connected:= True;
135 | end;
136 |
137 | procedure TLlamaDiskCache.CreateCacheTable;
138 | begin
139 | FDatS.SQL.Text := '''
140 | CREATE TABLE IF NOT EXISTS CACHE(
141 | ID INTEGER PRIMARY KEY AUTOINCREMENT,
142 | KEY BLOB,
143 | DATA BLOB
144 | );
145 | ''';
146 | FDatS.ExecSQL;
147 | end;
148 |
149 | function TLlamaDiskCache.Load(const AKey: TArray): TLlamaState;
150 | var
151 | LStream: TMemoryStream;
152 | begin
153 | FDatS.SQL.Text := 'SELECT KEY, DATA FROM CACHE WHERE KEY = :KEY';
154 |
155 | LStream := TMemoryStream.Create();
156 | try
157 | LStream.WriteBuffer(AKey[0], Length(AKey) * SizeOf(Integer));
158 | LStream.Position := 0;
159 | FDatS.ParamByName('KEY').LoadFromStream(LStream, TFieldType.ftBlob);
160 | LStream.Clear();
161 |
162 | FDatS.Open();
163 |
164 | if FDatS.IsEmpty() then
165 | Exit(nil);
166 |
167 | try
168 | LStream.Size := 0;
169 | TBlobField(FDatS.FieldByName('DATA')).SaveToStream(LStream);
170 |
171 | Result := TLlamaState.Create();
172 | try
173 | LStream.Position := 0;
174 | Result.Deserialize(LStream);
175 | except
176 | on E: Exception do
177 | begin
178 | FreeAndNil(Result);
179 | raise;
180 | end;
181 | end;
182 | finally
183 | FDatS.Close();
184 | end;
185 |
186 | finally
187 | LStream.Free;
188 | end;
189 | end;
190 |
191 | procedure TLlamaDiskCache.Save(const AKey: TArray;
192 | const AState: TLlamaState);
193 | var
194 | LStream: TMemoryStream;
195 | begin
196 | Delete(AKey);
197 |
198 | LStream := TMemoryStream.Create();
199 | try
200 | FDatS.SQL.Text := 'INSERT INTO CACHE (KEY, DATA) VALUES (:KEY, :DATA)';
201 |
202 | LStream.WriteBuffer(AKey[0], Length(AKey) * SizeOf(Integer));
203 | LStream.Position := 0;
204 | FDatS.ParamByName('KEY').LoadFromStream(LStream, TFieldType.ftBlob);
205 |
206 | LStream.Clear();
207 | LStream.Size := 0;
208 |
209 | AState.Serialize(LStream);
210 | LStream.Position := 0;
211 | FDatS.ParamByName('DATA').LoadFromStream(LStream, TFieldType.ftBlob);
212 | LStream.Clear();
213 |
214 | FDatS.ExecSQL();
215 | FConnection.Commit();
216 | finally
217 | LStream.Free;
218 | end;
219 | end;
220 |
221 | procedure TLlamaDiskCache.Delete(const AKey: TArray);
222 | var
223 | LStream: TMemoryStream;
224 | begin
225 | FDatS.SQL.Text := 'DELETE FROM CACHE WHERE KEY = :KEY';
226 |
227 | LStream := TMemoryStream.Create();
228 | try
229 | LStream.WriteBuffer(AKey[0], Length(AKey) * SizeOf(Integer));
230 | LStream.Position := 0;
231 | FDatS.ParamByName('KEY').LoadFromStream(LStream, TFieldType.ftBlob);
232 | LStream.Clear();
233 |
234 | FDatS.ExecSQL();
235 | FConnection.Commit();
236 | finally
237 | LStream.Free;
238 | end;
239 | end;
240 |
241 | function TLlamaDiskCache.GetCacheSize: Int64;
242 | const
243 | SQL_SIZES = 'SELECT SUM(LENGTH(KEY)) + SUM(LENGTH(DATA)) FROM CACHE;';
244 | begin
245 | FDatS.Open(SQL_SIZES);
246 | try
247 | Result := FDatS.Fields[0].Value;
248 | finally
249 | FDatS.Close();
250 | end;
251 | end;
252 |
253 | function TLlamaDiskCache.FindLongestPrefixKey(const AKey: TArray): TArray;
254 | var
255 | LPrefixLen: Integer;
256 | LMaxPrefixLen: Integer;
257 | LKey: TArray;
258 | LStream: TMemoryStream;
259 | begin
260 | LMaxPrefixLen := 0;
261 |
262 | FDatS.Open('SELECT KEY, DATA FROM CACHE');
263 |
264 | if FDatS.IsEmpty() then
265 | Exit(nil);
266 |
267 | FDatS.First();
268 |
269 | LStream := TMemoryStream.Create();
270 | try
271 | while not FDatS.Eof do
272 | begin
273 | LStream.Clear();
274 |
275 | TBlobField(FDatS.FieldByName('KEY')).SaveToStream(LStream);
276 |
277 | LStream.Position := 0;
278 | SetLength(LKey, LStream.Size div SizeOf(Integer));
279 | LStream.ReadBuffer(LKey[0], Length(LKey) * SizeOf(Integer));
280 |
281 | LPrefixLen := LongestTokenPrefix(LKey, AKey);
282 | if LPrefixLen > LMaxPrefixLen then
283 | begin
284 | LMaxPrefixLen := LPrefixLen;
285 | Result := LKey;
286 | end;
287 |
288 | FDatS.Next();
289 | end;
290 | finally
291 | LStream.Free;
292 | end;
293 |
294 | FDatS.Close();
295 | end;
296 |
297 | function TLlamaDiskCache.Contains(const AKey: TArray): Boolean;
298 | begin
299 | if Assigned(FTask) then
300 | FTask.Wait();
301 |
302 | Result := Assigned(FindLongestPrefixKey(AKey));
303 | end;
304 |
305 | function TLlamaDiskCache.GetItem(const AKey: TArray): TLlamaState;
306 | var
307 | LFoundKey: TArray;
308 | begin
309 | if Assigned(FTask) then
310 | FTask.Wait();
311 |
312 | LFoundKey := FindLongestPrefixKey(AKey);
313 |
314 | if not Assigned(LFoundKey) then
315 | raise Exception.Create('Key not found');
316 |
317 | Result := Load(LFoundKey);
318 |
319 | Delete(LFoundKey);
320 | end;
321 |
322 | procedure TLlamaDiskCache.SetItem(const AKey: TArray;
323 | const AValue: TLlamaState);
324 | var
325 | LValue: TLlamaState;
326 | begin
327 | LValue := AValue.Clone();
328 | FTask := TTask.Run(procedure() begin
329 | try
330 | Save(AKey, LValue);
331 | finally
332 | LValue.Free();
333 | end;
334 |
335 | while (GetCacheSize() > CapacityBytes) do
336 | FDatS.ExecSQL('DELETE FROM CACHE WHERE ID = (SELECT MIN(ID) FROM CACHE);');
337 |
338 | FConnection.Commit();
339 | end);
340 | end;
341 |
342 | end.
343 |
--------------------------------------------------------------------------------
/src/Common/Cache/LlamaCpp.Common.Cache.Ram.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Cache.Ram;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Cache.Base,
9 | LlamaCpp.Common.Types,
10 | LlamaCpp.Common.State;
11 |
12 | type
13 | TLlamaRAMCache = class(TBaseLlamaCache)
14 | private
15 | const DEFAULT_CAPACITY = {$IFDEF WIN32}1_073_741_824{$ELSE}Int64(2) shl 30{$ENDIF WIN32};
16 | private
17 | FCache: TOrderedDictionary, TLlamaState>;
18 | public
19 | constructor Create(ACapacityBytes: NativeInt = DEFAULT_CAPACITY);
20 | destructor Destroy; override;
21 |
22 | function GetCacheSize: Int64; override;
23 | function FindLongestPrefixKey(const AKey: TArray): TArray; override;
24 | function GetItem(const AKey: TArray): TLlamaState; override;
25 | function Contains(const AKey: TArray): Boolean; override;
26 | procedure SetItem(const AKey: TArray; const AValue: TLlamaState); override;
27 | end;
28 |
29 | implementation
30 |
31 | { TLlamaRAMCache }
32 |
33 | constructor TLlamaRAMCache.Create(ACapacityBytes: NativeInt);
34 | begin
35 | inherited Create(ACapacityBytes);
36 | FCache := TOrderedDictionary, TLlamaState>.Create();
37 | end;
38 |
39 | destructor TLlamaRAMCache.Destroy;
40 | var
41 | I: Integer;
42 | begin
43 | for I := 0 to FCache.Values.Count - 1 do
44 | FCache.ValueList[I].Free();
45 |
46 | FCache.Free;
47 | inherited;
48 | end;
49 |
50 | function TLlamaRAMCache.GetCacheSize: Int64;
51 | var
52 | I: Integer;
53 | begin
54 | Result := 0;
55 | for I := 0 to FCache.Count - 1 do
56 | Result := Result + FCache.ValueList[I].GetSize();
57 | end;
58 |
59 | function TLlamaRAMCache.FindLongestPrefixKey(const AKey: TArray): TArray;
60 | var
61 | LPrefixLen: Integer;
62 | LMaxPrefixLen: Integer;
63 | LCachedItem: TPair, TLlamaState>;
64 | begin
65 | LMaxPrefixLen := 0;
66 |
67 | for LCachedItem in FCache do
68 | begin
69 | LPrefixLen := LongestTokenPrefix(LCachedItem.Key, AKey);
70 | if LPrefixLen > LMaxPrefixLen then
71 | begin
72 | LMaxPrefixLen := LPrefixLen;
73 | Result := LCachedItem.Key;
74 | end;
75 | end;
76 | end;
77 |
78 | function TLlamaRAMCache.Contains(const AKey: TArray): Boolean;
79 | begin
80 | Result := Assigned(FindLongestPrefixKey(AKey));
81 | end;
82 |
83 | function TLlamaRAMCache.GetItem(const AKey: TArray): TLlamaState;
84 | var
85 | LFoundKey: TArray;
86 | begin
87 | LFoundKey := FindLongestPrefixKey(AKey);
88 |
89 | if not Assigned(LFoundKey) then
90 | raise Exception.Create('Key not found');
91 |
92 | Result := FCache[LFoundKey];
93 |
94 | FCache.Remove(LFoundKey);
95 | end;
96 |
97 | procedure TLlamaRAMCache.SetItem(const AKey: TArray;
98 | const AValue: TLlamaState);
99 | begin
100 | FCache.AddOrSetValue(AKey, AValue.Clone());
101 |
102 | while (GetCacheSize() > CapacityBytes) do
103 | begin
104 | FCache.ValueList[0].Free();
105 | FCache.Remove(FCache.KeyList[0]);
106 | end;
107 | end;
108 |
109 | end.
110 |
--------------------------------------------------------------------------------
/src/Common/Chat/Completion/LlamaCpp.Common.Chat.Completion.Collection.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Completion.Collection;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types;
9 |
10 | type
11 | TLlamaChatCompletionCollection = class
12 | private
13 | class var FInstance: TLlamaChatCompletionCollection;
14 | private
15 | FChatCompletionHandlers: TDictionary;
16 | private
17 | class constructor Create();
18 | class destructor Destroy();
19 | public
20 | constructor Create();
21 | destructor Destroy(); override;
22 |
23 | procedure RegisterChatCompletionHandler(const AName: string;
24 | const AChatHandler: ILlamaChatCompletionHandler;
25 | const AOverwrite: boolean = false);
26 | procedure UnregisterChatHandler(const AName: string);
27 |
28 | function GetChatCompletionHandler(const AName: string)
29 | : ILlamaChatCompletionHandler;
30 |
31 | class property Instance: TLlamaChatCompletionCollection read FInstance;
32 | end;
33 |
34 | implementation
35 |
36 | uses
37 | LlamaCpp.Common.Chat.Formatter.Registration;
38 |
39 | { TLlamaChatCompletionCollection }
40 |
41 | class constructor TLlamaChatCompletionCollection.Create;
42 | begin
43 | FInstance := TLlamaChatCompletionCollection.Create();
44 | TChatFormatterRegistration.RegisterAll();
45 | end;
46 |
47 | class destructor TLlamaChatCompletionCollection.Destroy;
48 | begin
49 | TChatFormatterRegistration.UnregisterAll();
50 | FInstance.Free();
51 | end;
52 |
53 | constructor TLlamaChatCompletionCollection.Create;
54 | begin
55 | FChatCompletionHandlers := TDictionary.Create();
56 | end;
57 |
58 | destructor TLlamaChatCompletionCollection.Destroy;
59 | begin
60 | FChatCompletionHandlers.Free();
61 | inherited;
62 | end;
63 |
64 | procedure TLlamaChatCompletionCollection.RegisterChatCompletionHandler(
65 | const AName: string; const AChatHandler: ILlamaChatCompletionHandler;
66 | const AOverwrite: boolean);
67 | begin
68 | if not AOverwrite and FChatCompletionHandlers.ContainsKey(AName) then
69 | raise Exception.CreateFmt(
70 | 'Formatter with name "%s" already registered. Use "AOverwrite=true" to overwrite it.', [
71 | AName]);
72 |
73 | FChatCompletionHandlers.AddOrSetValue(AName, AChatHandler);
74 | end;
75 |
76 | procedure TLlamaChatCompletionCollection.UnregisterChatHandler(
77 | const AName: string);
78 | begin
79 | if not FChatCompletionHandlers.ContainsKey(AName) then
80 | raise Exception.CreateFmt(
81 | 'No formatter registered under the name "%s".', [AName]);
82 |
83 | FChatCompletionHandlers.Remove(AName);
84 | end;
85 |
86 | function TLlamaChatCompletionCollection.GetChatCompletionHandler(
87 | const AName: string): ILlamaChatCompletionHandler;
88 | begin
89 | FChatCompletionHandlers.TryGetValue(AName, Result);
90 | end;
91 |
92 | end.
93 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Alpaca.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Alpaca;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TAlpacaChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TAlpacaChatFormatter }
23 |
24 | function TAlpacaChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSeparator: string;
29 | LSeparator2: string;
30 | LSystemMessage: string;
31 | LMessages: TArray>;
32 | LPrompt: string;
33 | begin
34 | LRoles := TDictionary.Create();
35 | try
36 | LRoles.Add('user', '### Instruction');
37 | LRoles.Add('assistant', '### Response');
38 |
39 | LSeparator := sLineBreak + sLineBreak;
40 | LSeparator2 := '';
41 |
42 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
44 | LPrompt := TLlamaChatFormat.FormatAddColonTwo(
45 | LSystemMessage, LMessages, LSeparator, LSeparator2);
46 |
47 | Result := TChatFormatterResponse.Create(LPrompt);
48 | finally
49 | LRoles.Free();
50 | end;
51 | end;
52 |
53 | end.
54 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Baichuan.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Baichuan;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TBaichuanChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TBaichuanChatFormatter }
23 |
24 | function TBaichuanChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LSeparator: string;
30 | LMessages: TArray>;
31 | LPrompt: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', '');
36 | LRoles.Add('assistant', '');
37 |
38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
39 | LSeparator := '';
40 |
41 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
42 | LMessages := LMessages + [
43 | TPair.Create(LRoles['assistant'], '')];
44 |
45 | LPrompt := TLlamaChatFormat.FormatNoColonSingle(
46 | LSystemMessage, LMessages, LSeparator);
47 |
48 | Result := TChatFormatterResponse.Create(LPrompt);
49 | finally
50 | LRoles.Free();
51 | end;
52 | end;
53 |
54 | end.
55 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Baichuan2.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Baichuan2;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TBaichuan2ChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TBaichuan2ChatFormatter }
23 |
24 | function TBaichuan2ChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LSeparator: string;
30 | LMessages: TArray>;
31 | LPrompt: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', '');
36 | LRoles.Add('assistant', '');
37 |
38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
39 | LSeparator := '';
40 |
41 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
42 | LMessages := LMessages + [
43 | TPair.Create(LRoles['assistant'], '')];
44 |
45 | LPrompt := TLlamaChatFormat.FormatNoColonSingle(
46 | LSystemMessage, LMessages, LSeparator);
47 |
48 | Result := TChatFormatterResponse.Create(LPrompt);
49 | finally
50 | LRoles.Free();
51 | end;
52 | end;
53 |
54 | end.
55 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.ChatGLM3.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.ChatGLM3;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TChatGLM3ChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TChatGLM3ChatFormatter }
23 |
24 | function TChatGLM3ChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LMessages: TArray>;
30 | LPrompt: string;
31 | LSeparator: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', '<|user|>');
36 | LRoles.Add('assistant', '<|assistant|>');
37 |
38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
39 | LSystemMessage := String.Format('<|system|>'#13#10'%s', [LSystemMessage]);
40 |
41 | LSeparator := '';
42 |
43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
44 | LMessages := LMessages + [
45 | TPair.Create(LRoles['assistant'], '')];
46 | finally
47 | LRoles.Free();
48 | end;
49 |
50 | LPrompt := TLlamaChatFormat.FormatChatGML3(
51 | LSystemMessage, LMessages);
52 |
53 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]);
54 | end;
55 |
56 | end.
57 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Chatml.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Chatml;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TChatmlChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TChatmlChatFormatter }
23 |
24 | function TChatmlChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LMessages: TArray>;
30 | LPrompt: string;
31 | LSeparator: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', '<|im_start|>user');
36 | LRoles.Add('assistant', '<|im_start|>assistant');
37 |
38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
39 | LSystemMessage := String.Format('<|im_start|>system'#13#10'%s', [LSystemMessage]);
40 |
41 | LSeparator := '<|im_end|>';
42 |
43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
44 | LMessages := LMessages + [
45 | TPair.Create(LRoles['assistant'], '')];
46 | finally
47 | LRoles.Free();
48 | end;
49 |
50 | LPrompt := TLlamaChatFormat.FormatChatml(
51 | LSystemMessage, LMessages, LSeparator);
52 |
53 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]);
54 | end;
55 |
56 | end.
57 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Gemma.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Gemma;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TGemmaChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TGemmaChatFormatter }
23 |
24 | function TGemmaChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LMessages: TArray>;
30 | LPrompt: string;
31 | LSeparator: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', 'user' + sLineBreak);
36 | LRoles.Add('assistant', 'model' + sLineBreak);
37 |
38 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
39 | LMessages := LMessages + [
40 | TPair.Create(LRoles['assistant'], '')];
41 | finally
42 | LRoles.Free();
43 | end;
44 |
45 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
46 |
47 | LSeparator := '' + sLineBreak;
48 |
49 | LPrompt := TLlamaChatFormat.FormatNoColonSingle(
50 | LSystemMessage, LMessages, LSeparator);
51 |
52 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]);
53 | end;
54 |
55 | end.
56 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Intel.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Intel;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TIntelChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TIntelChatFormatter }
23 |
24 | function TIntelChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LSeparator: string;
30 | LMessages: TArray>;
31 | LPrompt: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', '### User:');
36 | LRoles.Add('assistant', '### Assistant:');
37 |
38 | LSystemMessage := '### System:'#13#10'';
39 |
40 | LSeparator := sLineBreak;
41 |
42 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
43 | LMessages := LMessages + [
44 | TPair.Create(LRoles['assistant'], '')];
45 | finally
46 | LRoles.Free();
47 | end;
48 |
49 | LPrompt := TLlamaChatFormat.FormatAddColonSingle(
50 | LSystemMessage, LMessages, LSeparator);
51 |
52 | Result := TChatFormatterResponse.Create(LPrompt);
53 | end;
54 |
55 | end.
56 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Jinja2.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Jinja2;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TJinja2ChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | public
19 | constructor Create(
20 | const ATemplate: string;
21 | const AEOSToken: string;
22 | const ABOSToken: string;
23 | const AAddGenerationPrompt: boolean = true;
24 | const AStopTokenIds: TArray = nil);
25 |
26 | function ToChatHandler(): ILlamaChatCompletionHandler;
27 | end;
28 |
29 | implementation
30 |
31 | uses
32 | LlamaCpp.Common.Chat.Formatter.Adapter;
33 |
34 | { TJinja2ChatFormatter }
35 |
36 | constructor TJinja2ChatFormatter.Create(const ATemplate, AEOSToken,
37 | ABOSToken: string; const AAddGenerationPrompt: boolean;
38 | const AStopTokenIds: TArray);
39 | begin
40 | //
41 | end;
42 |
43 | function TJinja2ChatFormatter.Format(
44 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
45 | begin
46 | // Working in a Jinja2 parser...
47 | raise ENotImplemented.Create(
48 | 'Please, set the "ChatFormat" option in your settings.');
49 | end;
50 |
51 | function TJinja2ChatFormatter.ToChatHandler: ILlamaChatCompletionHandler;
52 | begin
53 | Result := TChatFormaterAdapter.ToChatCompletionHandler(Self);
54 | end;
55 |
56 | end.
57 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Llama2.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Llama2;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TLlama2ChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function FormatLlama2(const ASystemMessage: string;
17 | const AMessages: TArray>;
18 | const ASep1, ASep2: string): string;
19 | private
20 | function Format(const ASettings: TLlamaChatCompletionSettings)
21 | : TChatFormatterResponse;
22 | end;
23 |
24 | implementation
25 |
26 | { TLlama2ChatFormatter }
27 |
28 | function TLlama2ChatFormatter.FormatLlama2(const ASystemMessage: string;
29 | const AMessages: TArray>; const ASep1,
30 | ASep2: string): string;
31 | var
32 | I: Integer;
33 | LSeps: TArray;
34 | begin
35 | LSeps := [ASep1, ASep2];
36 | Result := ASystemMessage + ASep1;
37 |
38 | for I := Low(AMessages) to High(AMessages) do
39 | begin
40 | if not ASystemMessage.IsEmpty() and (I = 0) then
41 | Result := Result + AMessages[I].Value + LSeps[I mod 2]
42 | else if not AMessages[I].Value.IsEmpty() then
43 | Result := Result + AMessages[I].Key + AMessages[I].Value + ' ' + LSeps[I mod 2]
44 | else
45 | Result := Result + AMessages[I].Key + ' ';
46 | end;
47 | end;
48 |
49 | function TLlama2ChatFormatter.Format(
50 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
51 | var
52 | LSystemTemplate: string;
53 | LRoles: TDictionary;
54 | LMessages: TArray>;
55 | LSystemMessage: string;
56 | LPrompt: string;
57 | begin
58 | LSystemTemplate := '[INST] <>'#13#10'%s'#13#10'<>';
59 |
60 | LRoles := TDictionary.Create();
61 | try
62 | LRoles.Add('user', '[INST]');
63 | LRoles.Add('assistant', '[/INST]');
64 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
65 | finally
66 | LRoles.Free();
67 | end;
68 |
69 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
70 |
71 | if not LSystemMessage.IsEmpty() then
72 | LSystemMessage := String.Format(LSystemTemplate, [LSystemMessage]);
73 |
74 | LPrompt := FormatLlama2(LSystemMessage, LMessages, ' ', '') + '[/INST]';
75 |
76 | Result := TChatFormatterResponse.Create(LPrompt);
77 | end;
78 |
79 | end.
80 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Llama3.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Llama3;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TLlama3ChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TLlama3ChatFormatter }
23 |
24 | function TLlama3ChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSeparator: string;
29 | LMessages: TArray>;
30 | LPrompt: string;
31 | begin
32 | LRoles := TDictionary.Create();
33 | try
34 | LRoles.Add('system',
35 | '<|start_header_id|>system<|end_header_id|>' + sLineBreak + sLineBreak);
36 | LRoles.Add('user',
37 | '<|start_header_id|>user<|end_header_id|>' + sLineBreak + sLineBreak);
38 | LRoles.Add('assistant',
39 | '<|start_header_id|>assistant<|end_header_id|>' + sLineBreak + sLineBreak);
40 |
41 | LSeparator := '<|eot_id|>';
42 |
43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
44 | LMessages := LMessages + [
45 | TPair.Create(LRoles['assistant'], '')];
46 |
47 | LPrompt := TLlamaChatFormat.FormatNoColonSingle('', LMessages, LSeparator);
48 |
49 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]);
50 | finally
51 | LRoles.Free();
52 | end;
53 | end;
54 |
55 | end.
56 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.MilstralLite.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.MilstralLite;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TMistralLiteChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TMistralLiteChatFormatter }
23 |
24 | function TMistralLiteChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LMessages: TArray>;
30 | LPrompt: string;
31 | LSeparator: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', '<|prompter|>');
36 | LRoles.Add('assistant', ''#13#10'<|assistant|>');
37 |
38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
39 | LSystemMessage := String.Format('<|system|>%s', [LSystemMessage]);
40 |
41 | LSeparator := ' ';
42 |
43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
44 | LMessages := LMessages + [
45 | TPair.Create(LRoles['assistant'], '')];
46 | finally
47 | LRoles.Free();
48 | end;
49 |
50 | LPrompt := TLlamaChatFormat.FormatNoColonSingle(
51 | LSystemMessage, LMessages, LSeparator);
52 |
53 | Result := TChatFormatterResponse.Create(LPrompt);
54 | end;
55 |
56 | end.
57 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.MistralInstruct.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.MistralInstruct;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TMistralInstructChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | uses
23 | System.Variants;
24 |
25 | { TMistralInstructChatFormatter }
26 |
27 | function TMistralInstructChatFormatter.Format(
28 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
29 | const
30 | EOS = '';
31 | var
32 | LStop: string;
33 | LPrompt: string;
34 | LMessage: TChatCompletionRequestMessage;
35 | begin
36 | LStop := EOS;
37 | LPrompt := String.Empty;
38 |
39 | for LMessage in ASettings.Messages do
40 | if (LMessage.Role = 'user') and not VarIsNull(LMessage.Content) and VarIsStr(LMessage.Content) then
41 | LPrompt := LPrompt + '[INST] ' + VarToStr(LMessage.Content)
42 | else if (LMessage.Role = 'assistant') and not VarIsNull(LMessage.Content) then
43 | LPrompt := LPrompt + '[/INST] ' + VarToStr(LMessage.Content) + EOS;
44 |
45 | LPrompt := LPrompt + '[/INST]';
46 |
47 | Result := TChatFormatterResponse.Create(LPrompt, [LStop]);
48 | end;
49 |
50 | end.
51 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.OasstLlama.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.OasstLlama;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TOasstLlamaChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TOasstLlamaChatFormatter }
23 |
24 | function TOasstLlamaChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LSeparator: string;
30 | LMessages: TArray>;
31 | LPrompt: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', '<|prompter|>');
36 | LRoles.Add('assistant', '<|assistant|>');
37 |
38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
39 | LSystemMessage := String.Format(
40 | '[INST] <>'#13#10'%s'#13#10'<>'#13#10#13#10'', [
41 | LSystemMessage]);
42 |
43 | LSeparator := '';
44 |
45 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
46 | LMessages := LMessages + [
47 | TPair.Create(LRoles['assistant'], '')];
48 |
49 | LPrompt := TLlamaChatFormat.FormatNoColonSingle(
50 | LSystemMessage, LMessages, LSeparator);
51 |
52 | Result := TChatFormatterResponse.Create(LPrompt);
53 | finally
54 | LRoles.Free();
55 | end;
56 | end;
57 |
58 | end.
59 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.OpenBuddy.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.OpenBuddy;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TOpenBudyChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TOpenBudyChatFormatter }
23 |
24 | function TOpenBudyChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LSystemMessage: string;
28 | LRoles: TDictionary;
29 | LMessages: TArray>;
30 | LSeparator: string;
31 | LPrompt: string;
32 | begin
33 | LSystemMessage := '''
34 | You are a helpful, respectful and honest INTP-T AI Assistant named Buddy. You are talking to a human User.
35 | Always answer as helpfully and logically as possible, while being safe. Your answers should not include any harmful, political, religious, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
36 | If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
37 | You can speak fluently in many languages, for example: English, Chinese.
38 | You cannot access the internet, but you have vast knowledge, cutoff: 2021-09.
39 | You are trained by OpenBuddy team, (https://openbuddy.ai, https://github.com/OpenBuddy/OpenBuddy), you are based on LLaMA and Falcon transformers model, not related to GPT or OpenAI.
40 |
41 | ''';
42 |
43 | LRoles := TDictionary.Create();
44 | try
45 | LRoles.Add('user', 'User');
46 | LRoles.Add('assistant', 'Assistant');
47 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
48 | LMessages := LMessages + [
49 | TPair.Create(LRoles['assistant'], '')];
50 | finally
51 | LRoles.Free();
52 | end;
53 |
54 | LSeparator := sLineBreak;
55 |
56 | LPrompt := TLlamaChatFormat.FormatAddColonSingle(
57 | LSystemMessage, LMessages, LSeparator);
58 |
59 | Result := TChatFormatterResponse.Create(LPrompt);
60 | end;
61 |
62 | end.
63 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.OpenChat.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.OpenChat;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TOpenChatChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TOpenChatChatFormatter }
23 |
24 | function TOpenChatChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LMessages: TArray>;
30 | LPrompt: string;
31 | LSeparator: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', 'GPT4 Correct User: ');
36 | LRoles.Add('assistant', '<|end_of_turn|>GPT4 Correct Assistant: ');
37 |
38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
39 | LSystemMessage := String.Format('%s<|end_of_turn|>', [LSystemMessage]);
40 |
41 | LSeparator := '<|end_of_turn|>';
42 |
43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
44 | LMessages := LMessages + [
45 | TPair.Create(LRoles['assistant'], '')];
46 | finally
47 | LRoles.Free();
48 | end;
49 |
50 | LPrompt := TLlamaChatFormat.FormatChatml(
51 | LSystemMessage, LMessages, LSeparator);
52 |
53 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]);
54 | end;
55 |
56 | end.
57 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.OpenOrca.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.OpenOrca;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TOpenOrcaChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TOpenOrcaChatFormatter }
23 |
24 | function TOpenOrcaChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LStop: string;
30 | LSeparator: string;
31 | LMessages: TArray>;
32 | LPrompt: string;
33 | begin
34 | LSystemMessage :=
35 | '''
36 | You are a helpful assistant. Please answer truthfully and write out your
37 | thinking step by step to be sure you get the right answer. If you make a mistake or encounter
38 | an error in your thinking, say so out loud and attempt to correct it. If you don't know or
39 | aren't sure about something, say so clearly. You will act as a professional logician, mathematician,
40 | and physicist. You will also act as the most appropriate type of expert to answer any particular
41 | question or solve the relevant problem; state which expert type your are, if so. Also think of
42 | any particular named expert that would be ideal to answer the relevant question or solve the
43 | relevant problem; name and act as them, if appropriate.
44 | ''';
45 |
46 | LStop := 'User';
47 |
48 | LRoles := TDictionary.Create();
49 | try
50 | LRoles.Add('User', 'User');
51 | LRoles.Add('Assistant', 'Assistant');
52 |
53 | LSeparator := '<|end_of_turn|>' + sLineBreak;
54 |
55 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
56 | LMessages := LMessages + [
57 | TPair.Create(LRoles['Assistant'], '')];
58 | finally
59 | LRoles.Free();
60 | end;
61 |
62 | LPrompt := TLlamaChatFormat.FormatAddColonSingle(
63 | LSystemMessage, LMessages, LSeparator);
64 |
65 | Result := TChatFormatterResponse.Create(LPrompt, [LStop]);
66 |
67 | end;
68 |
69 | end.
70 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Phind.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Phind;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TPhindChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TPhindChatFormatter }
23 |
24 | function TPhindChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LMessages: TArray>;
30 | LPrompt: string;
31 | LSeparator: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', '### User Message');
36 | LRoles.Add('assistant', '### Assistant');
37 |
38 | LSystemMessage := '### System Prompt'#13#10'You are an intelligent programming assistant.';
39 |
40 | LSeparator := sLineBreak + sLineBreak;
41 |
42 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
43 | LMessages := LMessages + [
44 | TPair.Create(LRoles['assistant'], '')];
45 | finally
46 | LRoles.Free();
47 | end;
48 |
49 | LPrompt := TLlamaChatFormat.FormatAddColonSingle(
50 | LSystemMessage, LMessages, LSeparator);
51 |
52 | Result := TChatFormatterResponse.Create(LPrompt);
53 | end;
54 |
55 | end.
56 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Pygmalion.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Pygmalion;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TPygmalionChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TPygmalionChatFormatter }
23 |
24 | function TPygmalionChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LMessages: TArray>;
30 | LPrompt: string;
31 | LSeparator: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', '<|user|>');
36 | LRoles.Add('assistant', '<|model|>');
37 |
38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
39 | LSystemMessage := String.Format('<|system|>%s', [LSystemMessage]);
40 |
41 | LSeparator := sLineBreak;
42 |
43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
44 | LMessages := LMessages + [
45 | TPair.Create(LRoles['assistant'], '')];
46 | finally
47 | LRoles.Free();
48 | end;
49 |
50 | LPrompt := TLlamaChatFormat.FormatChatml(
51 | LSystemMessage, LMessages, LSeparator);
52 |
53 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]);
54 | end;
55 |
56 | end.
57 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Qwen.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Qwen;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TQwenChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TQwenChatFormatter }
23 |
24 | function TQwenChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LMessages: TArray>;
30 | LSeparator: string;
31 | LPrompt: string;
32 | LSeparator2: string;
33 | begin
34 | LRoles := TDictionary.Create();
35 | try
36 | LRoles.Add('user', '<|im_start|>user');
37 | LRoles.Add('assistant', '<|im_start|>assistant');
38 |
39 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
40 | if LSystemMessage.IsEmpty() then
41 | LSystemMessage := 'You are a helpful assistant.';
42 | LSystemMessage := '<|im_start|>system' + sLineBreak + LSystemMessage;
43 |
44 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
45 | LMessages := LMessages + [
46 | TPair.Create(LRoles['assistant'], '')];
47 |
48 | LSeparator := '<|im_end|>';
49 |
50 | LPrompt := TLlamaChatFormat.FormatChatml(LSystemMessage, LMessages, LSeparator);
51 |
52 | LSeparator2 := '<|endoftext|>';
53 |
54 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator2]);
55 | finally
56 | LRoles.Free();
57 | end;
58 | end;
59 |
60 | end.
61 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.RedpajamaIncite.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.RedpajamaIncite;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TRedpajamaInciteChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TRedpajamaInciteChatFormatter }
23 |
24 | function TRedpajamaInciteChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LSystemMessage: string;
28 | LRoles: TDictionary;
29 | LMessages: TArray>;
30 | LSeparator: WideString;
31 | LStop: string;
32 | LPrompt: string;
33 | begin
34 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
35 |
36 | LRoles := TDictionary.Create();
37 | try
38 | LRoles.Add('user', '');
39 | LRoles.Add('assistant', '');
40 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
41 | LMessages := LMessages + [
42 | TPair.Create(LRoles['assistant'], '')];
43 | finally
44 | LRoles.Free();
45 | end;
46 |
47 | LSeparator := sLineBreak;
48 | LStop := '';
49 |
50 | LPrompt := TLlamaChatFormat.FormatAddColonSingle(
51 | LSystemMessage, LMessages, LSeparator);
52 |
53 | Result := TChatFormatterResponse.Create(LPrompt, [LStop]);
54 | end;
55 |
56 | end.
57 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Registration.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Registration;
2 |
3 | interface
4 |
5 | type
6 | TChatFormatterRegistration = class
7 | public
8 | class procedure RegisterAll();
9 | class procedure UnregisterAll();
10 | end;
11 |
12 | implementation
13 |
14 | uses
15 | LlamaCpp.Common.Chat.Completion.Collection,
16 | LlamaCpp.Common.Chat.Formatter.Adapter,
17 | LlamaCpp.Common.Chat.Formatter.Llama2,
18 | LlamaCpp.Common.Chat.Formatter.Llama3,
19 | LlamaCpp.Common.Chat.Formatter.Alpaca,
20 | LlamaCpp.Common.Chat.Formatter.Qwen,
21 | LlamaCpp.Common.Chat.Formatter.Vicuna,
22 | LlamaCpp.Common.Chat.Formatter.OasstLlama,
23 | LlamaCpp.Common.Chat.Formatter.Baichuan,
24 | LlamaCpp.Common.Chat.Formatter.Baichuan2,
25 | LlamaCpp.Common.Chat.Formatter.OpenBuddy,
26 | LlamaCpp.Common.Chat.Formatter.RedpajamaIncite,
27 | LlamaCpp.Common.Chat.Formatter.Snoozy,
28 | LlamaCpp.Common.Chat.Formatter.Phind,
29 | LlamaCpp.Common.Chat.Formatter.Intel,
30 | LlamaCpp.Common.Chat.Formatter.OpenOrca,
31 | LlamaCpp.Common.Chat.Formatter.MilstralLite,
32 | LlamaCpp.Common.Chat.Formatter.Zephyr,
33 | LlamaCpp.Common.Chat.Formatter.Pygmalion,
34 | LlamaCpp.Common.Chat.Formatter.Chatml,
35 | LlamaCpp.Common.Chat.Formatter.MistralInstruct,
36 | LlamaCpp.Common.Chat.Formatter.ChatGLM3,
37 | LlamaCpp.Common.Chat.Formatter.OpenChat,
38 | LlamaCpp.Common.Chat.Formatter.Saiga,
39 | LlamaCpp.Common.Chat.Formatter.Gemma;
40 |
41 | { TChatFormatterRegistration }
42 |
43 | class procedure TChatFormatterRegistration.RegisterAll;
44 | begin
45 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
46 | 'llama-2', TChatFormaterAdapter.ToChatCompletionHandler(
47 | TLlama2ChatFormatter.Create()));
48 |
49 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
50 | 'llama-3', TChatFormaterAdapter.ToChatCompletionHandler(
51 | TLlama3ChatFormatter.Create()));
52 |
53 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
54 | 'alpaca', TChatFormaterAdapter.ToChatCompletionHandler(
55 | TAlpacaChatFormatter.Create()));
56 |
57 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
58 | 'qwen', TChatFormaterAdapter.ToChatCompletionHandler(
59 | TQwenChatFormatter.Create()));
60 |
61 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
62 | 'vicuna', TChatFormaterAdapter.ToChatCompletionHandler(
63 | TVicunaChatFormatter.Create()));
64 |
65 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
66 | 'oasst_llama', TChatFormaterAdapter.ToChatCompletionHandler(
67 | TOasstLlamaChatFormatter.Create()));
68 |
69 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
70 | 'baichuan', TChatFormaterAdapter.ToChatCompletionHandler(
71 | TBaichuanChatFormatter.Create()));
72 |
73 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
74 | 'baichuan-2', TChatFormaterAdapter.ToChatCompletionHandler(
75 | TBaichuan2ChatFormatter.Create()));
76 |
77 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
78 | 'openbuddy', TChatFormaterAdapter.ToChatCompletionHandler(
79 | TOpenBudyChatFormatter.Create()));
80 |
81 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
82 | 'redpajama-incite', TChatFormaterAdapter.ToChatCompletionHandler(
83 | TRedpajamaInciteChatFormatter.Create()));
84 |
85 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
86 | 'snoozy', TChatFormaterAdapter.ToChatCompletionHandler(
87 | TSnoozyChatFormatter.Create()));
88 |
89 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
90 | 'phind', TChatFormaterAdapter.ToChatCompletionHandler(
91 | TPhindChatFormatter.Create()));
92 |
93 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
94 | 'intel', TChatFormaterAdapter.ToChatCompletionHandler(
95 | TIntelChatFormatter.Create()));
96 |
97 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
98 | 'open-orca', TChatFormaterAdapter.ToChatCompletionHandler(
99 | TOpenOrcaChatFormatter.Create()));
100 |
101 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
102 | 'mistrallite', TChatFormaterAdapter.ToChatCompletionHandler(
103 | TMistralLiteChatFormatter.Create()));
104 |
105 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
106 | 'zephyr', TChatFormaterAdapter.ToChatCompletionHandler(
107 | TZephyrChatFormatter.Create()));
108 |
109 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
110 | 'pygmalion', TChatFormaterAdapter.ToChatCompletionHandler(
111 | TPygmalionChatFormatter.Create()));
112 |
113 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
114 | 'chatml', TChatFormaterAdapter.ToChatCompletionHandler(
115 | TChatmlChatFormatter.Create()));
116 |
117 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
118 | 'mistral-instruct', TChatFormaterAdapter.ToChatCompletionHandler(
119 | TMistralInstructChatFormatter.Create()));
120 |
121 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
122 | 'chatglm3', TChatFormaterAdapter.ToChatCompletionHandler(
123 | TChatGLM3ChatFormatter.Create()));
124 |
125 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
126 | 'openchat', TChatFormaterAdapter.ToChatCompletionHandler(
127 | TOpenChatChatFormatter.Create()));
128 |
129 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
130 | 'saiga', TChatFormaterAdapter.ToChatCompletionHandler(
131 | TSaigaChatFormatter.Create()));
132 |
133 | TLlamaChatCompletionCollection.Instance.RegisterChatCompletionHandler(
134 | 'gemma', TChatFormaterAdapter.ToChatCompletionHandler(
135 | TGemmaChatFormatter.Create()));
136 | end;
137 |
138 | class procedure TChatFormatterRegistration.UnregisterAll;
139 | begin
140 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('gemma');
141 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('saiga');
142 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('openchat');
143 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('chatglm3');
144 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('mistral-instruct');
145 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('chatml');
146 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('pygmalion');
147 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('zephyr');
148 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('mistrallite');
149 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('open-orca');
150 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('intel');
151 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('phind');
152 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('snoozy');
153 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('redpajama-incite');
154 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('openbuddy');
155 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('baichuan-2');
156 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('baichuan');
157 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('oasst_llama');
158 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('vicuna');
159 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('qwen');
160 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('alpaca');
161 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('llama-3');
162 | TLlamaChatCompletionCollection.Instance.UnregisterChatHandler('llama-2');
163 | end;
164 |
165 | end.
166 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Saiga.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Saiga;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TSaigaChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TSaigaChatFormatter }
23 |
24 | function TSaigaChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LMessageTemplate: string;
28 | LRoles: TDictionary;
29 | LMessages: TArray>;
30 | LPrompt: string;
31 | LMessage: TPair;
32 | begin
33 | LMessageTemplate := '%s'#13#10'%s';
34 |
35 | LRoles := TDictionary.Create();
36 | try
37 | LRoles.Add('user', 'user');
38 | LRoles.Add('bot', 'bot');
39 | LRoles.Add('system', 'system');
40 |
41 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
42 | finally
43 | LRoles.Free();
44 | end;
45 |
46 | LPrompt := String.Empty;
47 |
48 | for LMessage in LMessages do
49 | if not LMessage.Value.IsEmpty() then
50 | LPrompt := LPrompt + String.Format(LMessageTemplate, [LMessage.Key, LMessage.Value])
51 | else
52 | LPrompt := LPrompt + '' + LMessage.Key + sLineBreak;
53 |
54 | LPrompt := LPrompt + 'bot';
55 |
56 | Result := TChatFormatterResponse.Create(LPrompt.Trim());
57 | end;
58 |
59 | end.
60 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Snoozy.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Snoozy;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TSnoozyChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TSnoozyChatFormatter }
23 |
24 | function TSnoozyChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LSystemMessage: string;
28 | LRoles: TDictionary;
29 | LSeparator: string;
30 | LMessages: TArray>;
31 | LPrompt: string;
32 | LStop: string;
33 | begin
34 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
35 | if LSystemMessage.IsEmpty() then
36 | LSystemMessage := 'The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.';
37 |
38 | LSystemMessage := String.Format('### Instruction:'#13#10'%s', [
39 | LSystemMessage]);
40 |
41 | LSeparator := sLineBreak;
42 | LStop := '###';
43 |
44 | LRoles := TDictionary.Create();
45 | try
46 | LRoles.Add('user', '### Prompt');
47 | LRoles.Add('assistant', '### Response');
48 |
49 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
50 |
51 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
52 | LMessages := LMessages + [
53 | TPair.Create(LRoles['assistant'], '')];
54 | finally
55 | LRoles.Free();
56 | end;
57 |
58 | LPrompt := TLlamaChatFormat.FormatAddColonSingle(
59 | LSystemMessage, LMessages, LSeparator);
60 |
61 | Result := TChatFormatterResponse.Create(LPrompt, [LStop]);
62 | end;
63 |
64 | end.
65 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Vicuna.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Vicuna;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TVicunaChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TVicunaChatFormatter }
23 |
24 | function TVicunaChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LSeparator: string;
30 | LSeparator2: string;
31 | LMessages: TArray>;
32 | LPrompt: string;
33 | begin
34 | LRoles := TDictionary.Create();
35 | try
36 | LSystemMessage := 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user''s questions.';
37 |
38 | LRoles.Add('user', 'USER');
39 | LRoles.Add('assistant', 'ASSISTANT');
40 |
41 | LSeparator := ' ';
42 | LSeparator2 := '';
43 |
44 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
45 | LMessages := LMessages + [
46 | TPair.Create(LRoles['assistant'], '')];
47 |
48 | LPrompt := TLlamaChatFormat.FormatAddColonTwo(LSystemMessage, LMessages, LSeparator, LSeparator2);
49 |
50 | Result := TChatFormatterResponse.Create(LPrompt);
51 | finally
52 | LRoles.Free();
53 | end;
54 | end;
55 |
56 | end.
57 |
--------------------------------------------------------------------------------
/src/Common/Chat/Formatter/LlamaCpp.Common.Chat.Formatter.Zephyr.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Formatter.Zephyr;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types,
9 | LlamaCpp.Common.Settings,
10 | LlamaCpp.Common.Chat.Types,
11 | LlamaCpp.Common.Chat.Format;
12 |
13 | type
14 | TZephyrChatFormatter = class(TInterfacedObject, ILlamaChatFormater)
15 | private
16 | function Format(const ASettings: TLlamaChatCompletionSettings)
17 | : TChatFormatterResponse;
18 | end;
19 |
20 | implementation
21 |
22 | { TZephyrChatFormatter }
23 |
24 | function TZephyrChatFormatter.Format(
25 | const ASettings: TLlamaChatCompletionSettings): TChatFormatterResponse;
26 | var
27 | LRoles: TDictionary;
28 | LSystemMessage: string;
29 | LMessages: TArray>;
30 | LPrompt: string;
31 | LSeparator: string;
32 | begin
33 | LRoles := TDictionary.Create();
34 | try
35 | LRoles.Add('user', '<|user|>'#13#10'');
36 | LRoles.Add('assistant', '<|assistant|>'#13#10'');
37 |
38 | LSystemMessage := TLlamaChatFormat.GetSystemMessage(ASettings.Messages);
39 | LSystemMessage := String.Format('<|system|>'#13#10'%s', [LSystemMessage]);
40 |
41 | LSeparator := '';
42 |
43 | LMessages := TLlamaChatFormat.MapRoles(ASettings.Messages, LRoles);
44 | LMessages := LMessages + [
45 | TPair.Create(LRoles['assistant'], '')];
46 | finally
47 | LRoles.Free();
48 | end;
49 |
50 | LPrompt := TLlamaChatFormat.FormatChatml(
51 | LSystemMessage, LMessages, LSeparator);
52 |
53 | Result := TChatFormatterResponse.Create(LPrompt, [LSeparator]);
54 | end;
55 |
56 | end.
57 |
--------------------------------------------------------------------------------
/src/Common/Chat/LlamaCpp.Common.Chat.Format.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Chat.Format;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Rtti,
8 | System.Generics.Collections,
9 | LlamaCpp.Wrapper.LlamaModel,
10 | LLamaCpp.Common.Chat.Types;
11 |
12 | type
13 | TLlamaChatFormat = class
14 | public const
15 | // Source: https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B/blob/main/tokenizer_config.json
16 | CHATML_CHAT_TEMPLATE = '{% for message in messages %}{{''<|im_start|>'' + message[''role''] + ''\n'' + message[''content''] + ''<|im_end|>'' + ''\n''}}{% endfor %}{% if add_generation_prompt %}{{ ''<|im_start|>assistant\n'' }}{% endif %}';
17 | CHATML_BOS_TOKEN = '';
18 | CHATML_EOS_TOKEN = '<|im_end|>';
19 |
20 | // Source: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/blob/main/tokenizer_config.json
21 | MISTRAL_INSTRUCT_CHAT_TEMPLATE = '{{ bos_token }}{% for message in messages %}{% if (message[''role''] == ''user'') != (loop.index0 % 2 == 0) %}{{ raise_exception(''Conversation roles must alternate user/assistant/user/assistant/...'') }}{% endif %}{% if message[''role''] == ''user'' %}{{ ''[INST] '' + message[''content''] + '' [/INST]'' }}{% elif message[''role''] == ''assistant'' %}{{ message[''content''] + eos_token + '' '' }}{% else %}{{ raise_exception(''Only user and assistant roles are supported!'') }}{% endif %}{% endfor %}';
22 | MISTRAL_INSTRUCT_BOS_TOKEN = '';
23 | MISTRAL_INSTRUCT_EOS_TOKEN = '';
24 |
25 | // Source: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/blob/main/tokenizer_config.json
26 | MIXTRAL_INSTRUCT_CHAT_TEMPLATE = '{{ bos_token }}{% for message in messages %}{% if (message[''role''] == ''user'') != (loop.index0 % 2 == 0) %}{{ raise_exception(''Conversation roles must alternate user/assistant/user/assistant/...'') }}{% endif %}{% if message[''role''] == ''user'' %}{{ ''[INST] '' + message[''content''] + '' [/INST]'' }}{% elif message[''role''] == ''assistant'' %}{{ message[''content''] + eos_token}}{% else %}{{ raise_exception(''Only user and assistant roles are supported!'') }}{% endif %}{% endfor %}';
27 |
28 | // Source: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json
29 | LLAMA3_INSTRUCT_CHAT_TEMPLATE = '{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = ''<|start_header_id|>'' + message[''role''] + ''<|end_header_id|>\n\n''+ message[''content''] | trim + ''<|eot_id|>'' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ ''<|start_header_id|>assistant<|end_header_id|>\n\n'' }}{% endif %}';
30 | public
31 | class function GuessChatFormatFromGguf(const AMetadata: TMetadata)
32 | : string; static;
33 |
34 | class function GetSystemMessage(
35 | const AMessages: TArray): string;
36 | class function MapRoles(
37 | const AMessages: TArray;
38 | const ARoleMap: TDictionary): TArray>;
39 | class function FormatNoColonSingle(
40 | const ASystemMessage: string;
41 | const AMessages: TArray>;
42 | const ASeparator: string): string;
43 | class function FormatAddColonTwo(
44 | const ASystemMessage: string;
45 | const AMessages: TArray>;
46 | const ASeparator, ASeparator2: string): string;
47 | class function FormatAddColonSingle(
48 | const ASystemMessage: string;
49 | const AMessages: TArray>;
50 | const ASeparator: string): string;
51 | class function FormatChatml(
52 | const ASystemMessage: string;
53 | const AMessages: TArray>;
54 | const ASeparator: string): string;
55 | class function FormatChatGML3(
56 | const ASystemMessage: string;
57 | const AMessages: TArray>): string;
58 | end;
59 |
60 | implementation
61 |
62 | uses
63 | System.Variants;
64 |
65 | { TLlamaChatFormat }
66 |
67 | class function TLlamaChatFormat.GuessChatFormatFromGguf(const AMetadata
68 | : TMetadata): string;
69 | begin
70 | if not AMetadata.ContainsKey('tokenizer.chat_template') then
71 | Exit(String.Empty);
72 |
73 | if AMetadata.Items['tokenizer.chat_template'] = CHATML_CHAT_TEMPLATE then
74 | Result := 'chatml'
75 | else if (AMetadata.Items['tokenizer.chat_template'] = MISTRAL_INSTRUCT_CHAT_TEMPLATE) or
76 | (AMetadata.Items['tokenizer.chat_template'] = MIXTRAL_INSTRUCT_CHAT_TEMPLATE) then
77 | Result := 'mistral-instruct'
78 | else if AMetadata.Items['tokenizer.chat_template'] = LLAMA3_INSTRUCT_CHAT_TEMPLATE then
79 | Result := 'llama-3'
80 | else
81 | Result := String.Empty;
82 | end;
83 |
84 | class function TLlamaChatFormat.GetSystemMessage(
85 | const AMessages: TArray): string;
86 | var
87 | LMessage: TChatCompletionRequestMessage;
88 | begin
89 | for LMessage in AMessages do
90 | if LMessage.Role = 'system' then
91 | Exit(VarToStr(LMessage.Content));
92 |
93 | Result := String.Empty;
94 | end;
95 |
96 | class function TLlamaChatFormat.MapRoles(
97 | const AMessages: TArray;
98 | const ARoleMap: TDictionary): TArray>;
99 | var
100 | LMessage: TChatCompletionRequestMessage;
101 | begin
102 | Result := nil;
103 |
104 | for LMessage in AMessages do
105 | if ARoleMap.ContainsKey(LMessage.Role) then
106 | if VarIsStr(LMessage.Content) then
107 | Result := Result + [TPair.Create(
108 | ARoleMap[LMessage.Role],
109 | VarToStr(LMessage.Content)
110 | )]
111 | else
112 | Result := Result + [TPair.Create(
113 | ARoleMap[LMessage.Role],
114 | String.Empty
115 | )];
116 | end;
117 |
118 | class function TLlamaChatFormat.FormatNoColonSingle(
119 | const ASystemMessage: string; const AMessages: TArray>;
120 | const ASeparator: string): string;
121 | var
122 | LMessage: TPair;
123 | begin
124 | Result := ASystemMessage + ASeparator;
125 |
126 | for LMessage in AMessages do
127 | begin
128 | if not LMessage.Value.IsEmpty() then
129 | Result := Result + LMessage.Key + LMessage.Value + ASeparator
130 | else
131 | Result := Result + LMessage.Key
132 | end;
133 | end;
134 |
135 | class function TLlamaChatFormat.FormatAddColonTwo(const ASystemMessage: string;
136 | const AMessages: TArray>; const ASeparator,
137 | ASeparator2: string): string;
138 | var
139 | LSeparators: TArray;
140 | I: Integer;
141 | begin
142 | LSeparators := [ASeparator, ASeparator2];
143 | Result := ASystemMessage + LSeparators[0];
144 |
145 | for I := Low(AMessages) to High(AMessages) do
146 | if not AMessages[I].Value.IsEmpty() then
147 | Result := Result + AMessages[I].Key + ': ' + AMessages[I].Value + LSeparators[I mod 2]
148 | else
149 | Result := Result + AMessages[I].Key + ':';
150 | end;
151 |
152 | class function TLlamaChatFormat.FormatAddColonSingle(
153 | const ASystemMessage: string; const AMessages: TArray>;
154 | const ASeparator: string): string;
155 | var
156 | LMessage: TPair;
157 | begin
158 | Result := ASystemMessage + ASeparator;
159 |
160 | for LMessage in AMessages do
161 | if not LMessage.Value.IsEmpty() then
162 | Result := Result + LMessage.Key + ': ' + LMessage.Value + ASeparator
163 | else
164 | Result := Result + LMessage.Key + ':';
165 | end;
166 |
167 | class function TLlamaChatFormat.FormatChatml(const ASystemMessage: string;
168 | const AMessages: TArray>;
169 | const ASeparator: string): string;
170 | var
171 | LMessage: TPair;
172 | begin
173 | if ASystemMessage.IsEmpty() then
174 | Result := String.Empty
175 | else
176 | Result := ASystemMessage + ASeparator + sLineBreak;
177 |
178 | for LMessage in AMessages do
179 | if not LMessage.Value.IsEmpty() then
180 | Result := Result + LMessage.Key + sLineBreak + LMessage.Value + ASeparator + sLineBreak
181 | else
182 | Result := Result + LMessage.Key + sLineBreak;
183 | end;
184 |
185 | class function TLlamaChatFormat.FormatChatGML3(const ASystemMessage: string;
186 | const AMessages: TArray>): string;
187 | var
188 | LMessage: TPair;
189 | begin
190 | Result := String.Empty;
191 |
192 | if not ASystemMessage.IsEmpty() then
193 | Result := ASystemMessage;
194 |
195 | for LMessage in AMessages do
196 | if not LMessage.Value.IsEmpty() then
197 | Result := Result + LMessage.Key + sLineBreak + ' ' + LMessage.Value
198 | else
199 | Result := Result + LMessage.Key;
200 | end;
201 |
202 | end.
203 |
--------------------------------------------------------------------------------
/src/Common/Grammar/LlamaCpp.Common.Grammar.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Grammar;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Classes,
8 | System.Generics.Collections,
9 | System.JSON,
10 | LlamaCpp.Common.Types;
11 |
12 | const
13 | LLAMA_GRAMMAR_DEFAULT_ROOT = 'root';
14 |
15 | type
16 | TLlamaGrammar = class(TInterfacedObject, ILlamaGrammar)
17 | private
18 | FGrammar: string;
19 | FRoot: string;
20 | function GetGrammar(): string;
21 | procedure SetGrammar(const AGrammar: string);
22 | function GetRoot(): string;
23 | procedure SetRoot(const ARoot: string);
24 | public
25 | constructor Create(const AGrammar: string); overload;
26 |
27 | procedure Reset();
28 |
29 | // Class methods
30 | class function FromString(const AGrammar: string): ILlamaGrammar; static;
31 | class function FromFile(const AFileName: string): ILlamaGrammar; static;
32 | class function FromJsonSchema(
33 | const AJsonSchema: string): ILlamaGrammar; static;
34 | class function JsonSchemaToGBNF(const ASchema: string;
35 | const APropOrder: TArray = nil): string; static;
36 | end;
37 |
38 | const
39 | JSON_GBNF: string = '''
40 | root ::= object
41 | value ::= object | array | string | number | ("true" | "false" | "null") ws
42 |
43 | object ::=
44 | "{" ws (
45 | string ":" ws value
46 | ("," ws string ":" ws value)*
47 | )? "}" ws
48 |
49 | array ::=
50 | "[" ws (
51 | value
52 | ("," ws value)*
53 | )? "]" ws
54 |
55 | string ::=
56 | "\"" (
57 | [^"\\\x7F\x00-\x1F] |
58 | "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
59 | )* "\"" ws
60 |
61 | number ::=
62 | ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
63 |
64 | # Optional space: by convention, applied in this grammar after literal chars when allowed
65 | ws ::= | " " | "\n" [ \t]{0,20}
66 | ''';
67 |
68 | implementation
69 |
70 | { TLlamaGrammar }
71 |
72 | constructor TLlamaGrammar.Create(const AGrammar: string);
73 | begin
74 | inherited Create;
75 | FGrammar := AGrammar;
76 | FRoot := LLAMA_GRAMMAR_DEFAULT_ROOT;
77 | end;
78 |
79 | function TLlamaGrammar.GetGrammar: string;
80 | begin
81 | Result := FGrammar;
82 | end;
83 |
84 | function TLlamaGrammar.GetRoot: string;
85 | begin
86 | Result := FRoot;
87 | end;
88 |
89 | procedure TLlamaGrammar.SetGrammar(const AGrammar: string);
90 | begin
91 | FGrammar := AGrammar;
92 | end;
93 |
94 | procedure TLlamaGrammar.SetRoot(const ARoot: string);
95 | begin
96 | FRoot := ARoot;
97 | end;
98 |
99 | class function TLlamaGrammar.FromString(const AGrammar: string): ILlamaGrammar;
100 | begin
101 | Result := TLlamaGrammar.Create(AGrammar);
102 | end;
103 |
104 | class function TLlamaGrammar.FromFile(const AFileName: string): ILlamaGrammar;
105 | var
106 | LGrammarFile: TStringList;
107 | begin
108 | LGrammarFile := TStringList.Create;
109 | try
110 | try
111 | LGrammarFile.LoadFromFile(AFileName);
112 |
113 | if LGrammarFile.Text.Trim.IsEmpty then
114 | raise Exception.Create('Error: Grammar file is empty');
115 |
116 | Result := TLlamaGrammar.FromString(LGrammarFile.Text);
117 | except
118 | on E: Exception do
119 | raise Exception.CreateFmt('Error reading grammar file: %s', [E.Message]);
120 | end;
121 | finally
122 | LGrammarFile.Free;
123 | end;
124 | end;
125 |
126 | class function TLlamaGrammar.FromJsonSchema(const AJsonSchema: string): ILlamaGrammar;
127 | begin
128 | Result := TLlamaGrammar.FromString(JsonSchemaToGBNF(AJsonSchema));
129 | end;
130 |
131 | class function TLlamaGrammar.JsonSchemaToGBNF(const ASchema: string;
132 | const APropOrder: TArray = nil): string;
133 | begin
134 | raise ENotImplemented.Create('Not implemented.');
135 | end;
136 |
137 | procedure TLlamaGrammar.Reset;
138 | begin
139 | //
140 | end;
141 |
142 | end.
143 |
--------------------------------------------------------------------------------
/src/Common/LlamaCpp.Common.State.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.State;
2 |
3 | interface
4 |
5 | uses
6 | System.Classes,
7 | System.SysUtils,
8 | System.JSON.Serializers;
9 |
10 | type
11 | TLlamaState = class
12 | private
13 | FInputIds: TArray;
14 | FScores: TArray>;
15 | FNTokens: Integer;
16 | FLlamaState: TArray;
17 | FLlamaStateSize: Integer;
18 | FSeed: UInt32;
19 | public
20 | constructor Create(); overload;
21 | constructor Create(
22 | const AInputIds: TArray;
23 | const AScores: TArray>;
24 | const ANTokens: Integer;
25 | const ALlamaState: TArray;
26 | const ALlamaStateSize: Integer;
27 | const ASeed: UInt32
28 | ); overload;
29 |
30 | function GetSize(): Int64;
31 | function Clone(): TLlamaState;
32 |
33 | procedure Serialize(const AStream: TStream);
34 | procedure Deserialize(const AStream: TStream);
35 |
36 | function ToJsonString(): string;
37 | class function FromJsonString(const AJsonString: string): TLlamaState;
38 |
39 | property InputIds: TArray read FInputIds write FInputIds;
40 | property Scores: TArray> read FScores write FScores;
41 | property NTokens: Integer read FNTokens write FNTokens;
42 | property LlamaState: TArray read FLlamaState write FLlamaState;
43 | property LlamaStateSize: Integer read FLlamaStateSize write FLlamaStateSize;
44 | property Seed: UInt32 read FSeed write FSeed;
45 | end;
46 |
47 | implementation
48 |
49 | { TLlamaState }
50 |
51 | constructor TLlamaState.Create;
52 | begin
53 | //
54 | end;
55 |
56 | constructor TLlamaState.Create(
57 | const AInputIds: TArray;
58 | const AScores: TArray>;
59 | const ANTokens: Integer;
60 | const ALlamaState: TArray;
61 | const ALlamaStateSize: Integer;
62 | const ASeed: UInt32);
63 | begin
64 | inherited Create;
65 | FInputIds := AInputIds;
66 | FScores := AScores;
67 | FNTokens := ANTokens;
68 | FLlamaState := ALlamaState;
69 | FLlamaStateSize := ALlamaStateSize;
70 | FSeed := ASeed;
71 | end;
72 |
73 | function TLlamaState.GetSize: Int64;
74 | var
75 | I: Integer;
76 | begin
77 | Result := (Length(FInputIds) * SizeOf(integer))
78 | + (Length(FLlamaState) * SizeOf(ShortInt))
79 | + SizeOf(FNTokens)
80 | + SizeOf(FLlamaStateSize)
81 | + SizeOf(FSeed);
82 |
83 | for I := Low(FScores) to High(FScores) do
84 | Result := Result + Length(FScores[I]) * SizeOf(Single);
85 |
86 | Result := Result + (Length(FScores) * SizeOf(TArray));
87 | end;
88 |
89 | function TLlamaState.Clone: TLlamaState;
90 | begin
91 | Result := TLlamaState.Create(
92 | FInputIds,
93 | FScores,
94 | FNTokens,
95 | FLlamaState,
96 | FLlamaStateSize,
97 | FSeed
98 | );
99 | end;
100 |
101 | function TLlamaState.ToJsonString: string;
102 | var
103 | LSerializer: TJsonSerializer;
104 | begin
105 | LSerializer := TJSonSerializer.Create();
106 | try
107 | Result := LSerializer.Serialize(Self);
108 | finally
109 | LSerializer.Free();
110 | end;
111 | end;
112 |
113 | class function TLlamaState.FromJsonString(
114 | const AJsonString: string): TLlamaState;
115 | var
116 | LSerializer: TJsonSerializer;
117 | begin
118 | LSerializer := TJSonSerializer.Create();
119 | try
120 | Result := LSerializer.Deserialize(AJsonString);
121 | finally
122 | LSerializer.Free();
123 | end;
124 | end;
125 |
126 | procedure TLlamaState.Serialize(const AStream: TStream);
127 | var
128 | I: Integer;
129 | begin
130 | AStream.WriteData(FNTokens);
131 | AStream.WriteData(FLlamaStateSize);
132 | AStream.WriteData(FSeed);
133 |
134 | AStream.WriteData(Length(FInputIds));
135 | AStream.Write(FInputIds[0], Length(FInputIds) * SizeOf(integer));
136 |
137 | AStream.WriteData(Length(FLlamaState));
138 | AStream.Write(FLlamaState[0], Length(FLlamaState) * SizeOf(ShortInt));
139 |
140 | AStream.WriteData(Length(FScores));
141 | for I := Low(FScores) to High(FScores) do
142 | begin
143 | AStream.WriteData(Length(FScores[I]));
144 | AStream.Write(FScores[I][0], Length(FScores[I]) * SizeOf(Single));
145 | end;
146 | end;
147 |
148 | procedure TLlamaState.Deserialize(const AStream: TStream);
149 | var
150 | LLength: Integer;
151 | I: Integer;
152 | begin
153 | AStream.ReadData(FNTokens);
154 | AStream.ReadData(FLlamaStateSize);
155 | AStream.ReadData(FSeed);
156 |
157 | AStream.ReadData(LLength);
158 | SetLength(FInputIds, LLength);
159 | AStream.Read(FInputIds[0], LLength * SizeOf(integer));
160 |
161 | AStream.ReadData(LLength);
162 | SetLength(FLlamaState, LLength);
163 | AStream.Read(FLlamaState[0], LLength * SizeOf(ShortInt));
164 |
165 | AStream.ReadData(LLength);
166 | SetLength(FScores, LLength);
167 | for I := Low(FScores) to High(FScores) do
168 | begin
169 | AStream.ReadData(LLength);
170 | SetLength(FScores[I], LLength);
171 | AStream.Read(FScores[I][0], LLength * SizeOf(single));
172 | end;
173 | end;
174 |
175 | end.
176 |
--------------------------------------------------------------------------------
/src/Common/LlamaCpp.Common.TokenArray.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.TokenArray;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | LlamaCpp.CType.Llama;
8 |
9 | type
10 | TLlamaTokenDataArray = class
11 | private
12 | FCandidatesData: TArray;
13 | FCandidates: LlamaCpp.CType.Llama.TLlamaTokenDataArray;
14 | FDefaultCandidatesDataID: TArray;
15 | FDefaultCandidatesDataP: TArray;
16 | FN_Vocab: Int32;
17 | public
18 | constructor Create(const ANVocab: Int32);
19 |
20 | procedure CopyLogits(const ALogits: TArray);
21 | property Candidates: LlamaCpp.CType.Llama.TLlamaTokenDataArray read FCandidates;
22 | property CandidatesData: TArray read FCandidatesData;
23 | end;
24 |
25 | implementation
26 |
27 | { TLlamaTokenDataArray }
28 |
29 | constructor TLlamaTokenDataArray.Create(const ANVocab: Int32);
30 | var
31 | I: Int32;
32 | begin
33 | FN_Vocab := ANVocab;
34 |
35 | SetLength(FCandidatesData, FN_Vocab);
36 | SetLength(FDefaultCandidatesDataID, FN_Vocab);
37 | SetLength(FDefaultCandidatesDataP, FN_Vocab);
38 | for I := 0 to FN_Vocab - 1 do
39 | begin
40 | FDefaultCandidatesDataID[I] := I;
41 | FDefaultCandidatesDataP[I] := 0.0;
42 | end;
43 |
44 | // Initialize TLlamaTokenDataArray
45 | FCandidates.Data := @FCandidatesData[0];
46 | FCandidates.Size := FN_Vocab;
47 | FCandidates.Sorted := False;
48 | end;
49 |
50 | procedure TLlamaTokenDataArray.CopyLogits(const ALogits: TArray);
51 | var
52 | I: Int32;
53 | begin
54 | Assert(Length(ALogits) = FN_Vocab, 'Logits size must match vocabulary size.');
55 |
56 | for I := 0 to FN_Vocab - 1 do
57 | begin
58 | FCandidatesData[I].ID := FDefaultCandidatesDataID[I];
59 | FCandidatesData[I].Logit := ALogits[I];
60 | FCandidatesData[I].P := FDefaultCandidatesDataP[I];
61 | end;
62 |
63 | FCandidates.Sorted := False;
64 | FCandidates.Size := FN_Vocab;
65 | end;
66 |
67 | end.
68 |
--------------------------------------------------------------------------------
/src/Common/Processor/LlamaCpp.Common.Processor.LogitsScore.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Processor.LogitsScore;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types;
9 |
10 | type
11 | TDefaultLogitsScoreList = class(TInterfacedObject, ILogitsProcessorList)
12 | private
13 | FProcessors: TList;
14 | public
15 | constructor Create(); overload;
16 | constructor Create(const AProcessor: TLogitsProcessor); overload;
17 | destructor Destroy(); override;
18 |
19 | procedure Add(const AProcessor: TLogitsProcessor);
20 | procedure Execute(const InputIds: TArray;
21 | [ref] const Scores: TArray);
22 | end;
23 |
24 | implementation
25 |
26 | { TDefaultLogitsScoreList }
27 |
28 | constructor TDefaultLogitsScoreList.Create;
29 | begin
30 | FProcessors := TList.Create();
31 | end;
32 |
33 | constructor TDefaultLogitsScoreList.Create(
34 | const AProcessor: TLogitsProcessor);
35 | begin
36 | Create();
37 | Add(AProcessor);
38 | end;
39 |
40 | destructor TDefaultLogitsScoreList.Destroy;
41 | begin
42 | FProcessors.Free();
43 | inherited;
44 | end;
45 |
46 | procedure TDefaultLogitsScoreList.Add(const AProcessor: TLogitsProcessor);
47 | begin
48 | FProcessors.Add(AProcessor);
49 | end;
50 |
51 | procedure TDefaultLogitsScoreList.Execute(const InputIds: TArray;
52 | [ref] const Scores: TArray);
53 | var
54 | LProcessor: TLogitsProcessor;
55 | LTempScores: TArray;
56 | begin
57 | LTempScores := Scores;
58 | for LProcessor in FProcessors do
59 | LProcessor(InputIds, LTempScores);
60 | end;
61 |
62 | end.
63 |
--------------------------------------------------------------------------------
/src/Common/Processor/LlamaCpp.Common.Processor.StoppingCriteria.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Processor.StoppingCriteria;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LlamaCpp.Common.Types;
9 |
10 | type
11 | TDefaultStoppingCriteriaList = class(TInterfacedObject, IStoppingCriteriaList)
12 | private
13 | FCriterias: TList;
14 | public
15 | constructor Create(); overload;
16 | constructor Create(const AProcessor: TStoppingCriteria); overload;
17 | destructor Destroy(); override;
18 |
19 | procedure Add(const AProcessor: TStoppingCriteria);
20 | function Execute(const AInputIds: TArray; const ALogits: TArray): Boolean;
21 | end;
22 |
23 | implementation
24 |
25 | { TDefaultStoppingCriteriaList }
26 |
27 | constructor TDefaultStoppingCriteriaList.Create;
28 | begin
29 | FCriterias := TList.Create();
30 | end;
31 |
32 | constructor TDefaultStoppingCriteriaList.Create(
33 | const AProcessor: TStoppingCriteria);
34 | begin
35 | Create();
36 | Add(AProcessor);
37 | end;
38 |
39 | destructor TDefaultStoppingCriteriaList.Destroy;
40 | begin
41 | FCriterias.Free();
42 | inherited;
43 | end;
44 |
45 | procedure TDefaultStoppingCriteriaList.Add(const AProcessor: TStoppingCriteria);
46 | begin
47 | FCriterias.Add(AProcessor);
48 | end;
49 |
50 | function TDefaultStoppingCriteriaList.Execute(const AInputIds: TArray; const ALogits: TArray): Boolean;
51 | var
52 | LStoppingCriteria: TStoppingCriteria;
53 | begin
54 | for LStoppingCriteria in FCriterias do
55 | if LStoppingCriteria(AInputIds, ALogits) then
56 | Exit(true);
57 |
58 | Result := False;
59 | end;
60 |
61 | end.
62 |
--------------------------------------------------------------------------------
/src/Common/Sampling/LlamaCpp.Common.Sampling.Context.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Sampling.Context;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Generics.Collections,
8 | LLamaCpp.CType.Llama,
9 | LlamaCpp.Wrapper.LlamaContext,
10 | LlamaCpp.Common.Types,
11 | LlamaCpp.Common.Sampling.Params;
12 |
13 | type
14 | TLlamaSamplingContext = class
15 | private
16 | FParams: TLlamaSamplingParams;
17 | FMirostatMu: Single;
18 | FGrammar: ILlamaGrammar;
19 | FPrev: TList;
20 | FCur: TList;
21 | public
22 | constructor Create;
23 | destructor Destroy; override;
24 |
25 | // Properties
26 | property Params: TLlamaSamplingParams read FParams write FParams;
27 | property MirostatMu: Single read FMirostatMu write FMirostatMu;
28 | property Grammar: ILlamaGrammar read FGrammar write FGrammar;
29 | property Prev: TList read FPrev write FPrev;
30 | property Cur: TList read FCur write FCur;
31 |
32 | // Methods
33 | procedure Reset;
34 | function Copy: TLlamaSamplingContext;
35 | function Last: Int32;
36 | function PrevStr(CtxMain: TLlamaContext; N: Int32): string;
37 | function Sample(const ACtxMain: TLlamaContext; const AIdx: Int32 = 0;
38 | ALogitsArray: TArray = nil): Int32;
39 | procedure Accept(ACtxMain: TLlamaContext; AId: Int32; AApplyGrammar: Boolean);
40 | end;
41 |
42 | implementation
43 |
44 | uses
45 | System.Math,
46 | LlamaCpp.Helper, LlamaCpp.Common.TokenArray;
47 |
48 | type
49 | TListHelper = class helper for TList
50 | function Skip(Count: Integer): TArray;
51 | end;
52 |
53 | { TListHelper }
54 |
55 | function TListHelper.Skip(Count: Integer): TArray;
56 | var
57 | LList: TList;
58 | i: Integer;
59 | begin
60 | LList := TList.Create;
61 | try
62 | for i := Count to Self.Count - 1 do
63 | LList.Add(Self[i]);
64 |
65 | Result := LList.ToArray();
66 | finally
67 | LList.Free;
68 | end;
69 | end;
70 |
71 | { TLlamaSamplingContext }
72 |
73 | constructor TLlamaSamplingContext.Create;
74 | begin
75 | inherited Create;
76 | FParams := TLlamaSamplingParams.Create;
77 | FMirostatMu := 0.0;
78 | FGrammar := nil;
79 | FPrev := TList.Create;
80 | FCur := TList.Create;
81 | end;
82 |
83 | destructor TLlamaSamplingContext.Destroy;
84 | begin
85 | FParams.Free;
86 | FPrev.Free;
87 | FCur.Free;
88 | inherited Destroy;
89 | end;
90 |
91 | procedure TLlamaSamplingContext.Reset;
92 | begin
93 | FPrev.Clear;
94 | FCur.Clear;
95 | if Assigned(FGrammar) then
96 | FGrammar.Reset;
97 | end;
98 |
99 | function TLlamaSamplingContext.Copy: TLlamaSamplingContext;
100 | begin
101 | Result := TLlamaSamplingContext.Create;
102 | Result.Params := FParams;
103 | Result.MirostatMu := FMirostatMu;
104 | Result.Grammar := FGrammar;
105 | Result.Prev := TList.Create(FPrev);
106 | Result.Cur := TList.Create(FCur);
107 | end;
108 |
109 | function TLlamaSamplingContext.Last: Int32;
110 | begin
111 | if FPrev.Count > 0 then
112 | Result := FPrev.Last
113 | else
114 | Result := -1;
115 | end;
116 |
117 | function TLlamaSamplingContext.PrevStr(CtxMain: TLlamaContext; N: Int32): string;
118 | var
119 | Tokens: TArray;
120 | begin
121 | Tokens := FPrev.Skip(FPrev.Count - N);
122 | Result := TEncoding.UTF8.GetString(CtxMain.Model.Detokenize(Tokens));
123 | end;
124 |
125 | function TLlamaSamplingContext.Sample(const ACtxMain: TLlamaContext;
126 | const AIdx: Int32 = 0; ALogitsArray: TArray = nil): Int32;
127 | var
128 | I: integer;
129 | LNVocab: integer;
130 | LLogits: PLogitArray;
131 | LLogitsArray: TArray;
132 | LLogitPair: TPair;
133 | LTokenDataArray: TLlamaTokenDataArray;
134 | LNlToken: Integer;
135 | LNlLogit: Single;
136 | LLastTokens: TArray;
137 | LLastTokensSize: Integer;
138 | LMirostatM: Integer;
139 | LMinKeep: Integer;
140 | begin
141 | LNVocab := ACtxMain.Model.NVocab();
142 |
143 | if not Assigned(ALogitsArray) then
144 | begin
145 | LLogits := ACtxMain.GetLogitsIth(AIdx);
146 | SetLength(LLogitsArray, SizeOf(single) * LNVocab);
147 | for I := Low(LLogitsArray) to High(LLogitsArray) do
148 | {$R-}
149 | LLogitsArray[I] := LLogits[I];
150 | {$R+}
151 | end;
152 |
153 | for LLogitPair in FParams.LogitBias do
154 | begin
155 | LLogitsArray[LLogitPair.Key] := LLogitsArray[LLogitPair.Key]
156 | + LLogitPair.Value;
157 | end;
158 |
159 | LTokenDataArray := TLlamaTokenDataArray.Create(LNVocab);
160 | try
161 | LTokenDataArray.CopyLogits(LLogitsArray);
162 |
163 | if FPrev.Count > 0 then
164 | begin
165 | LNlToken := ACtxMain.Model.TokenNL();
166 | LNlLogit := LLogitsArray[LNlToken];
167 | LLastTokens := TArrayHelper.Slice(FPrev.ToArray(), - FParams.PenaltyLastN);
168 | LLastTokensSize := Min(Length(LLastTokens), FParams.PenaltyLastN);
169 |
170 | if LLastTokensSize > 0 then
171 | ACtxMain.SampleRepetitionPenalties(
172 | LTokenDataArray,
173 | TLlamaTokenArray(LLastTokens[0]),
174 | LLastTokensSize,
175 | FParams.PenaltyRepeat,
176 | FParams.PenaltyFreq,
177 | FParams.PenaltyPresent
178 | );
179 |
180 | if not FParams.PenalizeNL then
181 | LTokenDataArray.CandidatesData[LNlToken].Logit := LNlLogit;
182 | end;
183 |
184 | if Assigned(FGrammar) then
185 | ACtxMain.SampleGrammar(LTokenDataArray, FGrammar);
186 |
187 | if FParams.Temp < 0 then
188 | begin
189 | ACtxMain.SampleSoftmax(LTokenDataArray);
190 | Result := LTokenDataArray.CandidatesData[0].Id;
191 | end
192 | else if FParams.Temp = 0 then
193 | Result := ACtxMain.SampleTokenGreedy(LTokenDataArray)
194 | else
195 | begin
196 | if FParams.Mirostat = 1 then
197 | begin
198 | LMirostatM := 100;
199 | ACtxMain.SampleTemp(LTokenDataArray, FParams.Temp);
200 | Result := ACtxMain.SampleTokenMirostat(
201 | LTokenDataArray,
202 | FParams.MirostatTau,
203 | FParams.MirostatEta,
204 | LMirostatM,
205 | @FMirostatMu
206 | );
207 | end
208 | else
209 | begin
210 | LMinKeep := Max(1, FParams.NProbs);
211 | ACtxMain.SampleTopK(LTokenDataArray, FParams.TopK, LMinKeep);
212 | ACtxMain.SampleTypical(LTokenDataArray, FParams.TypicalP, LMinKeep);
213 | ACtxMain.SampleTopP(LTokenDataArray, FParams.TopP, LMinKeep);
214 | ACtxMain.SampleMinP(LTokenDataArray, FParams.MinP, LMinKeep);
215 | ACtxMain.SampleTemp(LTokenDataArray, FParams.Temp);
216 | Result := ACtxMain.SampleToken(LTokenDataArray);
217 | end;
218 | end;
219 | finally
220 | LTokenDataArray.Free();
221 | end;
222 | end;
223 |
224 | procedure TLlamaSamplingContext.Accept(ACtxMain: TLlamaContext; AId: Int32; AApplyGrammar: Boolean);
225 | begin
226 | if AApplyGrammar and Assigned(FGrammar) then
227 | ACtxMain.GrammarAcceptToken(FGrammar, AId);
228 | FPrev.Add(AId);
229 | end;
230 |
231 | end.
232 |
--------------------------------------------------------------------------------
/src/Common/Sampling/LlamaCpp.Common.Sampling.CustomSampler.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Sampling.CustomSampler;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | Generics.Collections,
8 | LlamaCpp.CType.Llama;
9 |
10 | type
11 | TApplyFunc = reference to procedure(const ATokenDataArray: PLlamaTokenDataArray);
12 |
13 | TCustomSampler = class
14 | private
15 | FSampler: TLlamaSampler;
16 | FSampelrI: TLlamaSamplerI;
17 | FApplyFunc: TApplyFunc;
18 | private
19 | class procedure Apply(ASmpl: PLlamaSampler; ACurrProb: PLlamaTokenDataArray); cdecl; static;
20 | public
21 | constructor Create(const AApplyFunc: TApplyFunc);
22 | destructor Destroy; override;
23 |
24 | function GetSampler: PLlamaSampler;
25 | end;
26 |
27 | implementation
28 |
29 | uses
30 | LlamaCpp.Api.Llama;
31 |
32 | { TCustomSampler }
33 |
34 | constructor TCustomSampler.Create(const AApplyFunc: TApplyFunc);
35 | begin
36 | inherited Create;
37 | FSampler := Default(TLlamaSampler);
38 | FSampelrI := Default(TLlamaSamplerI);
39 | FApplyFunc := AApplyFunc;
40 |
41 | FSampelrI.Apply := @TCustomSampler.Apply;
42 | FSampelrI.name := nil;
43 | FSampelrI.accept := nil;
44 | FSampelrI.reset := nil;
45 | FSampelrI.clone := nil;
46 | FSampelrI.free := nil;
47 |
48 | FSampler.iface := @FSampelrI;
49 | FSampler.ctx := Self;
50 | end;
51 |
52 | destructor TCustomSampler.Destroy;
53 | begin
54 | inherited Destroy;
55 | end;
56 |
57 | class procedure TCustomSampler.Apply(ASmpl: PLlamaSampler;
58 | ACurrProb: PLlamaTokenDataArray);
59 | var
60 | LCustomSampler: TCustomSampler;
61 | begin
62 | LCustomSampler := TCustomSampler(ASmpl.Ctx);
63 |
64 | if Assigned(LCustomSampler.FApplyFunc) then
65 | LCustomSampler.FApplyFunc(ACurrProb);
66 | end;
67 |
68 | function TCustomSampler.GetSampler: PLlamaSampler;
69 | begin
70 | Result := @FSampler;
71 | end;
72 |
73 | end.
74 |
--------------------------------------------------------------------------------
/src/Common/Sampling/LlamaCpp.Common.Sampling.Params.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Sampling.Params;
2 |
3 | interface
4 |
5 | uses
6 | System.Generics.Collections;
7 |
8 | type
9 | TLlamaSamplingParams = class
10 | private
11 | FNPrev: Int32;
12 | FNProbs: Int32;
13 | FTopK: Int32;
14 | FTopP: Single;
15 | FMinP: Single;
16 | FTFS_Z: Single;
17 | FTypicalP: Single;
18 | FTemp: Single;
19 | FPenaltyLastN: Int32;
20 | FPenaltyRepeat: Single;
21 | FPenaltyFreq: Single;
22 | FPenaltyPresent: Single;
23 | FMirostat: Int32;
24 | FMirostatTau: Single;
25 | FMirostatEta: Single;
26 | FPenalizeNL: Boolean;
27 | FGrammar: string;
28 | FCFGNegativePrompt: string;
29 | FCFGScale: Single;
30 | FLogitBias: TDictionary;
31 | public
32 | constructor Create();
33 | destructor Destroy(); override;
34 |
35 | property NPrev: Int32 read FNPrev write FNPrev;
36 | property NProbs: Int32 read FNProbs write FNProbs;
37 | property TopK: Int32 read FTopK write FTopK;
38 | property TopP: Single read FTopP write FTopP;
39 | property MinP: Single read FMinP write FMinP;
40 | property TFS_Z: Single read FTFS_Z write FTFS_Z;
41 | property TypicalP: Single read FTypicalP write FTypicalP;
42 | property Temp: Single read FTemp write FTemp;
43 | property PenaltyLastN: Int32 read FPenaltyLastN write FPenaltyLastN;
44 | property PenaltyRepeat: Single read FPenaltyRepeat write FPenaltyRepeat;
45 | property PenaltyFreq: Single read FPenaltyFreq write FPenaltyFreq;
46 | property PenaltyPresent: Single read FPenaltyPresent write FPenaltyPresent;
47 | property Mirostat: Int32 read FMirostat write FMirostat;
48 | property MirostatTau: Single read FMirostatTau write FMirostatTau;
49 | property MirostatEta: Single read FMirostatEta write FMirostatEta;
50 | property PenalizeNL: Boolean read FPenalizeNL write FPenalizeNL;
51 | property Grammar: string read FGrammar write FGrammar;
52 | property CFGNegativePrompt: string read FCFGNegativePrompt write FCFGNegativePrompt;
53 | property CFGScale: Single read FCFGScale write FCFGScale;
54 | property LogitBias: TDictionary read FLogitBias write FLogitBias;
55 | end;
56 |
57 | implementation
58 |
59 | { TLlamaSamplingParams }
60 |
61 | constructor TLlamaSamplingParams.Create;
62 | begin
63 | FNPrev := 64;
64 | FNProbs := 0;
65 | FTopK := 40;
66 | FTopP := 0.95;
67 | FMinP := 0.05;
68 | FTFS_Z := 1.00;
69 | FTypicalP := 1.00;
70 | FTemp := 0.80;
71 | FPenaltyLastN := 64;
72 | FPenaltyRepeat := 1.0;
73 | FPenaltyFreq := 0.00;
74 | FPenaltyPresent := 0.00;
75 | FMirostat := 0;
76 | FMirostatTau := 5.00;
77 | FMirostatEta := 0.10;
78 | FPenalizeNL := True;
79 | FGrammar := '';
80 | FCFGNegativePrompt := '';
81 | FCFGScale := 1.00;
82 | FLogitBias := TDictionary.Create;
83 | end;
84 |
85 | destructor TLlamaSamplingParams.Destroy;
86 | begin
87 | LogitBias.Free();
88 | inherited;
89 | end;
90 |
91 | end.
92 |
--------------------------------------------------------------------------------
/src/Common/Sampling/LlamaCpp.Common.Sampling.Sampler.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Sampling.Sampler;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Classes,
8 | System.Generics.Collections,
9 | LlamaCpp.CType.Llama,
10 | LLamaCpp.Wrapper.LlamaModel,
11 | LLamaCpp.Wrapper.LlamaContext,
12 | LlamaCpp.Common.Types,
13 | LlamaCpp.Common.Sampling.CustomSampler;
14 |
15 | type
16 | TLlamaSampler = class
17 | private
18 | FSampler: PLlamaSampler; // Pointer to the llama sampler chain
19 | FSamplers: TList;
20 | FCustomSamplers: TList>;
21 |
22 | procedure AddSampler(Sampler: PLlamaSampler);
23 | public
24 | constructor Create;
25 | destructor Destroy; override;
26 |
27 | procedure AddGreedy;
28 | procedure AddDist(Seed: UInt32);
29 | procedure AddSoftmax;
30 | procedure AddTopK(K: Integer);
31 | procedure AddTopP(P: Single; MinKeep: Integer);
32 | procedure AddMinP(P: Single; MinKeep: Integer);
33 | procedure AddTypical(P: Single; MinKeep: Integer);
34 | procedure AddTemp(Temp: Single);
35 | procedure AddTempExt(T, Delta, Exponent: Single);
36 | procedure AddMirostat(NVocab, Seed: Integer; Tau, Eta: Single; M: Integer);
37 | procedure AddMirostatV2(Seed: Integer; Tau, Eta: Single);
38 | procedure AddGrammar(Model: TLlamaModel; Grammar: ILlamaGrammar);
39 | procedure AddPenalties(
40 | NVocab, SpecialEOSID, LinefeedID, PenaltyLastN: Integer;
41 | PenaltyRepeat, PenaltyFreq, PenaltyPresent: Single;
42 | PenalizeNL, IgnoreEOS: Boolean);
43 | procedure InitLogitBias(
44 | NVocab, NLogitBias: Integer; LogitBias: PLlamaLogitBias);
45 | procedure AddCustom(ApplyFunc: TApplyFunc);
46 |
47 | function GetSeed: Integer;
48 | function Sample(ACtx: TLlamaContext; AIdx: Integer): Integer;
49 | procedure Close;
50 | end;
51 |
52 | implementation
53 |
54 | uses
55 | LlamaCpp.Api.Llama;
56 |
57 | constructor TLlamaSampler.Create;
58 | var
59 | LParams: TLlamaSamplerChainParams;
60 | begin
61 | inherited Create;
62 | LParams := Default(TLlamaSamplerChainParams);
63 | FSampler := TLlamaApi.Instance.llama_sampler_chain_init(@LParams);
64 | FSamplers := TList.Create;
65 | FCustomSamplers := TList>.Create;
66 | end;
67 |
68 | destructor TLlamaSampler.Destroy;
69 | begin
70 | Close;
71 | FSamplers.Free;
72 | FCustomSamplers.Free;
73 | inherited Destroy;
74 | end;
75 |
76 | procedure TLlamaSampler.AddGreedy;
77 | var
78 | Sampler: PLlamaSampler;
79 | begin
80 | Sampler := TLlamaApi.Instance.llama_sampler_init_greedy();
81 | AddSampler(Sampler);
82 | end;
83 |
84 | procedure TLlamaSampler.AddDist(Seed: UInt32);
85 | var
86 | Sampler: PLlamaSampler;
87 | begin
88 | Sampler := TLlamaApi.Instance.llama_sampler_init_dist(Seed);
89 | AddSampler(Sampler);
90 | end;
91 |
92 | procedure TLlamaSampler.AddSoftmax;
93 | var
94 | Sampler: PLlamaSampler;
95 | begin
96 | Sampler := TLlamaApi.Instance.llama_sampler_init_softmax();
97 | AddSampler(Sampler);
98 | end;
99 |
100 | procedure TLlamaSampler.AddTopK(K: Integer);
101 | var
102 | Sampler: PLlamaSampler;
103 | begin
104 | Sampler := TLlamaApi.Instance.llama_sampler_init_top_k(K);
105 | AddSampler(Sampler);
106 | end;
107 |
108 | procedure TLlamaSampler.AddTopP(P: Single; MinKeep: Integer);
109 | var
110 | Sampler: PLlamaSampler;
111 | begin
112 | Sampler := TLlamaApi.Instance.llama_sampler_init_top_p(P, MinKeep);
113 | AddSampler(Sampler);
114 | end;
115 |
116 | procedure TLlamaSampler.AddMinP(P: Single; MinKeep: Integer);
117 | var
118 | Sampler: PLlamaSampler;
119 | begin
120 | Sampler := TLlamaApi.Instance.llama_sampler_init_min_p(P, MinKeep);
121 | AddSampler(Sampler);
122 | end;
123 |
124 | procedure TLlamaSampler.AddTypical(P: Single; MinKeep: Integer);
125 | var
126 | Sampler: PLlamaSampler;
127 | begin
128 | Sampler := TLlamaApi.Instance.llama_sampler_init_typical(P, MinKeep);
129 | AddSampler(Sampler);
130 | end;
131 |
132 | procedure TLlamaSampler.AddTemp(Temp: Single);
133 | var
134 | Sampler: PLlamaSampler;
135 | begin
136 | Sampler := TLlamaApi.Instance.llama_sampler_init_temp(Temp);
137 | AddSampler(Sampler);
138 | end;
139 |
140 | procedure TLlamaSampler.AddTempExt(T, Delta, Exponent: Single);
141 | var
142 | Sampler: PLlamaSampler;
143 | begin
144 | Sampler := TLlamaApi.Instance.llama_sampler_init_temp_ext(T, Delta, Exponent);
145 | AddSampler(Sampler);
146 | end;
147 |
148 | procedure TLlamaSampler.AddMirostat(NVocab, Seed: Integer; Tau, Eta: Single; M: Integer);
149 | var
150 | Sampler: PLlamaSampler;
151 | begin
152 | Sampler := TLlamaApi.Instance.llama_sampler_init_mirostat(NVocab, Seed, Tau, Eta, M);
153 | AddSampler(Sampler);
154 | end;
155 |
156 | procedure TLlamaSampler.AddMirostatV2(Seed: Integer; Tau, Eta: Single);
157 | var
158 | Sampler: PLlamaSampler;
159 | begin
160 | Sampler := TLlamaApi.Instance.llama_sampler_init_mirostat_v2(Seed, Tau, Eta);
161 | AddSampler(Sampler);
162 | end;
163 |
164 | procedure TLlamaSampler.AddGrammar(Model: TLlamaModel; Grammar: ILlamaGrammar);
165 | var
166 | Sampler: PLlamaSampler;
167 | begin
168 | Sampler := TLlamaApi.Instance.llama_sampler_init_grammar(
169 | Model.Model,
170 | PAnsiChar(UTF8Encode(Grammar.Grammar)),
171 | PAnsiChar(UTF8Encode(Grammar.Root))
172 | );
173 | AddSampler(Sampler);
174 | end;
175 |
176 | procedure TLlamaSampler.AddPenalties(
177 | NVocab, SpecialEOSID, LinefeedID, PenaltyLastN: Integer;
178 | PenaltyRepeat, PenaltyFreq, PenaltyPresent: Single;
179 | PenalizeNL, IgnoreEOS: Boolean);
180 | var
181 | Sampler: PLlamaSampler;
182 | begin
183 | Sampler := TLlamaApi.Instance.llama_sampler_init_penalties(
184 | NVocab, SpecialEOSID, LinefeedID, PenaltyLastN,
185 | PenaltyRepeat, PenaltyFreq, PenaltyPresent,
186 | PenalizeNL, IgnoreEOS);
187 | AddSampler(Sampler);
188 | end;
189 |
190 | procedure TLlamaSampler.InitLogitBias(
191 | NVocab, NLogitBias: Integer; LogitBias: PLlamaLogitBias);
192 | var
193 | Sampler: PLlamaSampler;
194 | begin
195 | Sampler := TLlamaApi.Instance.llama_sampler_init_logit_bias(NVocab, NLogitBias, LogitBias);
196 | AddSampler(Sampler);
197 | end;
198 |
199 | procedure TLlamaSampler.AddCustom(ApplyFunc: TApplyFunc);
200 | var
201 | LCustomSampler: TCustomSampler;
202 | begin
203 | LCustomSampler := TCustomSampler.Create(ApplyFunc);
204 | try
205 | AddSampler(LCustomSampler.GetSampler());
206 | FCustomSamplers.Add(TPair.Create(
207 | TLlamaApi.Instance.llama_sampler_chain_n(FSampler) - 1, LCustomSampler));
208 | except
209 | on E: Exception do
210 | begin
211 | LCustomSampler.Free();
212 | raise;
213 | end;
214 | end;
215 | end;
216 |
217 | procedure TLlamaSampler.AddSampler(Sampler: PLlamaSampler);
218 | begin
219 | Assert(FSampler <> nil);
220 | TLlamaApi.Instance.llama_sampler_chain_add(FSampler, Sampler);
221 | FSamplers.Add(Sampler);
222 | end;
223 |
224 | function TLlamaSampler.GetSeed: Integer;
225 | begin
226 | Assert(FSampler <> nil);
227 | Result := TLlamaApi.Instance.llama_sampler_get_seed(FSampler);
228 | end;
229 |
230 | function TLlamaSampler.Sample(ACtx: TLlamaContext; AIdx: Integer): Integer;
231 | begin
232 | Assert(FSampler <> nil);
233 | Result := TLlamaApi.Instance.llama_sampler_sample(FSampler, ACtx.Context, AIdx);
234 | end;
235 |
236 | procedure TLlamaSampler.Close;
237 | var
238 | LPair: TPair;
239 | begin
240 | if FSampler <> nil then
241 | begin
242 | for LPair in FCustomSamplers do
243 | begin
244 | TLlamaApi.Instance.llama_sampler_chain_remove(FSampler, LPair.Key);
245 | LPair.Value.Free();
246 | end;
247 |
248 | TLlamaApi.Instance.llama_sampler_free(FSampler);
249 | FSampler := nil;
250 | end;
251 | FSamplers.Clear;
252 | FCustomSamplers.Clear;
253 | end;
254 |
255 | end.
256 |
--------------------------------------------------------------------------------
/src/Common/Speculative/LlamaCpp.Common.Speculative.LookupDecoding.pas:
--------------------------------------------------------------------------------
1 | unit LlamaCpp.Common.Speculative.LookupDecoding;
2 |
3 | interface
4 |
5 | uses
6 | System.SysUtils,
7 | System.Classes,
8 | System.Generics.Collections,
9 | System.Math,
10 | LlamaCpp.Common.Types;
11 |
12 | type
13 | TLlamaPromptLookupDecoding = class(TInterfacedObject, ILlamaDraftModel)
14 | private
15 | FMaxNGramSize: Integer;
16 | FNumPredTokens: Integer;
17 |
18 | // Function to simulate "np.lib.stride_tricks.sliding_window_view"
19 | function CreateSlidingWindow(const AInputIds: TArray;
20 | const ANgramSize: Integer): TArray>;
21 |
22 | function GetNgramArray(const AInputIds: TArray;
23 | const ANgramSize: Integer): TArray;
24 | function CompareWindowsWithNgram(
25 | const SlidingWindows: TArray>;
26 | const ANgramSize: TArray): TArray;
27 | function FindMatchIndices(const AMatches: TArray): TList;
28 | function GetPredictedTokens(const AInputIds: TArray;
29 | const AMatchIndices: TList;
30 | const ANgramSize: Integer): TArray;
31 |
32 | function FindCandidatePredTokens(
33 | const AInputIds: TArray): TArray;
34 | public
35 | constructor Create(const AMaxNGramSize: Integer = 2;
36 | const ANumPredTokens: Integer = 10);
37 |
38 | function Execute(const AInputIds: TArray): TArray;
39 | end;
40 |
41 | implementation
42 |
43 | { TLlamaPromptLookupDecoding }
44 |
45 | constructor TLlamaPromptLookupDecoding.Create(
46 | const AMaxNGramSize: Integer = 2; const ANumPredTokens: Integer = 10);
47 | begin
48 | inherited Create;
49 | FMaxNGramSize := AMaxNGramSize;
50 | FNumPredTokens := ANumPredTokens;
51 | end;
52 |
53 | function TLlamaPromptLookupDecoding.CreateSlidingWindow(
54 | const AInputIds: TArray;
55 | const ANgramSize: Integer): TArray>;
56 | var
57 | I: integer;
58 | J: integer;
59 | begin
60 | if Length(AInputIds) < ANgramSize then
61 | raise Exception.Create('Ngram size is larger than the input length.');
62 |
63 | SetLength(Result, Length(AInputIds) - ANgramSize + 1);
64 |
65 | // Create sliding windows
66 | for I := Low(Result) to High(Result) do
67 | begin
68 | SetLength(Result[I], ANgramSize);
69 | for J := 0 to ANgramSize - 1 do
70 | begin
71 | Result[I][J] := AInputIds[I + J];
72 | end;
73 | end;
74 | end;
75 |
76 | function TLlamaPromptLookupDecoding.GetNgramArray(
77 | const AInputIds: TArray; const ANgramSize: Integer): TArray