├── .gitattributes ├── LlamaCpp.asmdef ├── LlamaCpp.asmdef.meta ├── LlamaExample.cs ├── LlamaExample.cs.meta ├── LlamaLibrary.cs ├── LlamaLibrary.cs.meta ├── LlamaModel.cs ├── LlamaModel.cs.meta ├── Plugins.meta ├── Plugins ├── Windows.meta └── Windows │ ├── ggml_shared.dll │ ├── ggml_shared.dll.meta │ ├── llama.dll │ └── llama.dll.meta ├── README.md ├── README.md.meta ├── Test.prefab ├── Test.prefab.meta ├── package.json └── package.json.meta /.gitattributes: -------------------------------------------------------------------------------- 1 | *.dll filter=lfs diff=lfs merge=lfs -text 2 | *.so filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /LlamaCpp.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Abuksigun.LlamaCpp", 3 | "rootNamespace": "", 4 | "references": [], 5 | "includePlatforms": [], 6 | "excludePlatforms": [], 7 | "allowUnsafeCode": true, 8 | "overrideReferences": false, 9 | "precompiledReferences": [], 10 | "autoReferenced": true, 11 | "defineConstraints": [], 12 | "versionDefines": [], 13 | "noEngineReferences": false 14 | } -------------------------------------------------------------------------------- /LlamaCpp.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: b7fbde52c76bfed42b2759f63f75341e 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /LlamaExample.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Threading; 4 | using UnityEngine; 5 | 6 | namespace Abuksigun.LlamaCpp 7 | { 8 | [ExecuteInEditMode] 9 | public class LlamaExample : MonoBehaviour 10 | { 11 | CancellationTokenSource cts; 12 | LlamaModel model; 13 | 14 | // Download model here: https://huggingface.co/TheBloke/speechless-mistral-dolphin-orca-platypus-samantha-7B-GGUF/blob/main/speechless-mistral-dolphin-orca-platypus-samantha-7b.Q4_K_M.gguf 15 | [SerializeField] string modelPath = "StreamingAssets/Models/speechless-mistral-dolphin-orca-platypus-samantha-7b.Q4_K_M.gguf"; 16 | [SerializeField, TextArea(10, 10)] string systemPrompt = "You are an AI game character"; 17 | [SerializeField, TextArea(10, 10)] string userPrompt = "You are in a Tavern\nHP:40%\nWhat is your next action:"; 18 | [SerializeField, TextArea(10, 10)] string assistantPrompt = "I will"; 19 | 20 | [SerializeField] int gpuLayers = 0; 21 | [SerializeField, Range(0, 1.5f)] float temperature = 0.8f; 22 | 23 | [ContextMenu("Run")] 24 | public async void RunAsync() 25 | { 26 | const string promptFormat = "<|im_start|>system\n{{system}}\n<|im_end|>\n<|im_start|>user\n{{user}}\n<|im_end|>\n<|im_start|>assistant\n{{assistant}}"; 27 | const string customEos = "<|im_end|>"; 28 | 29 | string fullModelPath = Path.Join(Application.streamingAssetsPath, modelPath); 30 | model ??= await LlamaModel.LoadModel(fullModelPath, new Progress(x => Debug.Log($"Progress {x}")), gpuLayers: gpuLayers); 31 | Debug.Log($"Model context size: {model.ContextSize} tokens."); 32 | 33 | cts = new CancellationTokenSource(); 34 | void Progress(string currentString) 35 | { 36 | if (currentString.EndsWith(customEos)) 37 | { 38 | cts.Cancel(); 39 | cts = null; 40 | } 41 | Debug.Log(currentString); 42 | } 43 | string fullPrompt = FormatPrompt(promptFormat, systemPrompt, userPrompt, assistantPrompt); 44 | Debug.Log(fullPrompt); 45 | var parameters = new LlamaModel.SamplingParams { Temp = temperature }; 46 | string result = await model.RunAsync(fullPrompt, 100, parameters, new Progress(Progress), cts.Token); 47 | Debug.Log($"Result: {result}"); 48 | } 49 | 50 | [ContextMenu("Stop")] 51 | public void Stop() 52 | { 53 | cts?.Cancel(); 54 | } 55 | 56 | [ContextMenu("Reset Model")] 57 | public void ResetModel() 58 | { 59 | cts?.Cancel(); 60 | model?.Dispose(); 61 | model = null; 62 | } 63 | 64 | public static string FormatPrompt(string promptFormat, string system, string user, string assistant = "") 65 | { 66 | return promptFormat 67 | .Replace("{{system}}", system) 68 | .Replace("{{user}}", user) 69 | .Replace("{{assistant}}", assistant); 70 | } 71 | } 72 | } -------------------------------------------------------------------------------- /LlamaExample.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a5631fab5f542454fb4b67eabbb95a14 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /LlamaLibrary.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.InteropServices; 3 | 4 | namespace Abuksigun.LlamaCpp 5 | { 6 | public unsafe static class LlamaLibrary 7 | { 8 | private const string DllName = "llama"; 9 | 10 | [UnmanagedFunctionPointer(CallingConvention.Cdecl)] 11 | public delegate void LlamaProgressCallback(float progress, IntPtr ctx); 12 | 13 | [StructLayout(LayoutKind.Sequential)] 14 | public struct LlamaModelParams 15 | { 16 | public int n_gpu_layers; 17 | public int main_gpu; 18 | public IntPtr tensor_split; 19 | public LlamaProgressCallback progress_callback; 20 | public IntPtr progress_callback_user_data; 21 | [MarshalAs(UnmanagedType.I1)] public bool vocab_only; 22 | [MarshalAs(UnmanagedType.I1)] public bool use_mmap; 23 | [MarshalAs(UnmanagedType.I1)] public bool use_mlock; 24 | 25 | public LlamaModelParams(LlamaProgressCallback progressCallback, IntPtr progressCallbackUserData, int nGpuLayers = 0) 26 | { 27 | n_gpu_layers = nGpuLayers; 28 | main_gpu = 0; 29 | tensor_split = IntPtr.Zero; 30 | progress_callback = progressCallback; 31 | progress_callback_user_data = IntPtr.Zero; 32 | vocab_only = false; 33 | use_mmap = true; 34 | use_mlock = false; 35 | } 36 | } 37 | 38 | [StructLayout(LayoutKind.Sequential)] 39 | public struct LlamaContextParams 40 | { 41 | public uint seed; 42 | public uint n_ctx; 43 | public uint n_batch; 44 | public uint n_threads; 45 | public uint n_threads_batch; 46 | public sbyte rope_scaling_type; 47 | public float rope_freq_base; 48 | public float rope_freq_scale; 49 | public float yarn_ext_factor; 50 | public float yarn_attn_factor; 51 | public float yarn_beta_fast; 52 | public float yarn_beta_slow; 53 | public uint yarn_orig_ctx; 54 | [MarshalAs(UnmanagedType.I1)] public bool mul_mat_q; 55 | [MarshalAs(UnmanagedType.I1)] public bool f16_kv; 56 | [MarshalAs(UnmanagedType.I1)] public bool logits_all; 57 | [MarshalAs(UnmanagedType.I1)] public bool embedding; 58 | 59 | public LlamaContextParams(uint seed, uint nThreads = 1, uint contextSize = 2048, sbyte ropeScaling = -1 ) 60 | { 61 | this.seed = seed; 62 | n_ctx = contextSize; 63 | n_batch = contextSize; 64 | n_threads = nThreads; 65 | n_threads_batch = nThreads; 66 | rope_scaling_type = ropeScaling; 67 | rope_freq_base = 0.0f; 68 | rope_freq_scale = 0.0f; 69 | yarn_ext_factor = -1.0f; 70 | yarn_attn_factor = 1.0f; 71 | yarn_beta_fast = 32.0f; 72 | yarn_beta_slow = 1.0f; 73 | yarn_orig_ctx = 0; 74 | mul_mat_q = true; 75 | f16_kv = true; 76 | logits_all = false; 77 | embedding = false; 78 | } 79 | } 80 | 81 | [StructLayout(LayoutKind.Sequential)] 82 | public struct LlamaTokenDataArray 83 | { 84 | public LlamaTokenData* data; 85 | public int size; 86 | [MarshalAs(UnmanagedType.I1)] public bool sorted; 87 | } 88 | 89 | [StructLayout(LayoutKind.Sequential)] 90 | public struct LlamaTokenData 91 | { 92 | public int id; 93 | public float logit; 94 | public float p; 95 | } 96 | 97 | [StructLayout(LayoutKind.Sequential)] 98 | public unsafe struct LlamaBatch 99 | { 100 | public int n_tokens; 101 | public int* token; 102 | public float* embd; 103 | public int* pos; 104 | public int* n_seq_id; 105 | public int** seq_id; 106 | public byte* logits; 107 | 108 | // Legacy, may require removal in future llama.cpp versions 109 | private int _all_pos_0; 110 | private int _all_pos_1; 111 | private int _all_seq_id; 112 | } 113 | 114 | [StructLayout(LayoutKind.Sequential)] 115 | public struct LlamaGrammar 116 | { 117 | // const std::vector> rules; 118 | // std::vector> stacks; 119 | 120 | // llama_partial_utf8 partial_utf8; 121 | } 122 | 123 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 124 | public static extern void llama_backend_init(bool numa); 125 | 126 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 127 | public static extern IntPtr llama_load_model_from_file(string path_model, LlamaModelParams model_params); 128 | 129 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 130 | public static extern void llama_free_model(IntPtr model); 131 | 132 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 133 | public static extern int llama_n_ctx(IntPtr ctx); 134 | 135 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 136 | public static extern LlamaBatch llama_batch_init(int n_tokens, int embd, int n_seq_max); 137 | 138 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 139 | public static extern int llama_decode(IntPtr ctx, LlamaBatch batch); 140 | 141 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 142 | public static extern IntPtr llama_new_context_with_model(IntPtr model, LlamaContextParams ctx_params); 143 | 144 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 145 | public static extern void llama_free(IntPtr ctx); 146 | 147 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 148 | public static extern int llama_tokenize(IntPtr model, string text, int text_len, [MarshalAs(UnmanagedType.LPArray)] int[] tokens, int n_max_tokens, bool add_bos, bool special); 149 | 150 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 151 | public static extern IntPtr llama_get_logits(IntPtr ctx); 152 | 153 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 154 | public static extern IntPtr llama_get_logits_ith(IntPtr ctx, int i); 155 | 156 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 157 | public static extern int llama_n_vocab(IntPtr model); 158 | 159 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 160 | public static extern int llama_sample_token_greedy(IntPtr ctx, ref LlamaTokenDataArray candidates); 161 | 162 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 163 | public static extern int llama_token_to_piece(IntPtr model, int token, [MarshalAs(UnmanagedType.LPArray)] byte[] buffer, int length); 164 | 165 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 166 | public static extern void llama_backend_free(); 167 | 168 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 169 | public static extern int llama_token_eos(IntPtr model); 170 | 171 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 172 | public static extern int llama_token_nl(IntPtr model); 173 | 174 | 175 | 176 | // Sampling 177 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 178 | public static extern void llama_sample_repetition_penalties(IntPtr ctx, LlamaTokenDataArray* candidates, [MarshalAs(UnmanagedType.LPArray)] int[] lastTokens, int penaltyLastN, float penaltyRepeat, float penaltyFreq, float penaltyPresent); 179 | 180 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 181 | public static extern void llama_sample_classifier_free_guidance(IntPtr ctx, LlamaTokenDataArray* candidates, IntPtr guidanceCtx, float scale); 182 | 183 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 184 | public static extern void llama_sample_softmax(IntPtr ctx, LlamaTokenDataArray* candidates); 185 | 186 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 187 | public static extern void llama_sample_top_k(IntPtr ctx, LlamaTokenDataArray* candidates, int k, int minKeep); 188 | 189 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 190 | public static extern void llama_sample_top_p(IntPtr ctx, LlamaTokenDataArray* candidates, float p, int minKeep); 191 | 192 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 193 | public static extern void llama_sample_min_p(IntPtr ctx, LlamaTokenDataArray* candidates, float p, int minKeep); 194 | 195 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 196 | public static extern void llama_sample_tail_free(IntPtr ctx, LlamaTokenDataArray* candidates, float z, int minKeep); 197 | 198 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 199 | public static extern void llama_sample_typical(IntPtr ctx, LlamaTokenDataArray* candidates, float p, int minKeep); 200 | 201 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 202 | public static extern void llama_sample_temp(IntPtr ctx, LlamaTokenDataArray* candidates, float temp); 203 | 204 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 205 | public static extern void llama_sample_temperature(IntPtr ctx, LlamaTokenDataArray* candidates, float temp); 206 | 207 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 208 | public static extern void llama_sample_grammar(IntPtr ctx, LlamaTokenDataArray* candidates, IntPtr grammar); 209 | [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] 210 | public static extern int llama_sample_token(IntPtr ctx, LlamaTokenDataArray* candidates); 211 | } 212 | } -------------------------------------------------------------------------------- /LlamaLibrary.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6b3e88355e420f540a3cd5cf9ef82b3c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /LlamaModel.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Runtime.InteropServices; 5 | using System.Text; 6 | using System.Threading; 7 | using System.Threading.Tasks; 8 | using UnityEngine; 9 | 10 | namespace Abuksigun.LlamaCpp 11 | { 12 | public sealed class LlamaModel : IDisposable 13 | { 14 | public class LlamaException : Exception 15 | { 16 | public LlamaException(string message) : base(message) { } 17 | } 18 | 19 | IntPtr modelPointer; 20 | IntPtr contextPointer; 21 | readonly CancellationTokenSource disposeCancellationTokenSource = new(); 22 | 23 | public IntPtr NativeModelPointer => modelPointer; 24 | public IntPtr NativeContextPointer => contextPointer; 25 | public int EosToken => LlamaLibrary.llama_token_eos(modelPointer); 26 | public int ContextSize => LlamaLibrary.llama_n_ctx(contextPointer); 27 | public int VocabLength => LlamaLibrary.llama_n_vocab(modelPointer); 28 | 29 | public static async Task LoadModel(string modelPath, IProgress progress, uint contextSize = 2048, int gpuLayers = 0) 30 | { 31 | int threadsN = SystemInfo.processorCount; 32 | (IntPtr newModelPointer, IntPtr newContextPointer) = await Task.Run<(IntPtr, IntPtr)>(() => 33 | { 34 | LlamaLibrary.llama_backend_init(numa: false); 35 | 36 | var modelParams = new LlamaLibrary.LlamaModelParams((float progressFloat, IntPtr _) => progress.Report(progressFloat), IntPtr.Zero, gpuLayers); 37 | try 38 | { 39 | IntPtr model = LlamaLibrary.llama_load_model_from_file(modelPath, modelParams); 40 | if (model == IntPtr.Zero) 41 | throw new LlamaException("Failed to load the Llama model"); 42 | try 43 | { 44 | var ctxParams = new LlamaLibrary.LlamaContextParams(1234, (uint)threadsN, contextSize: contextSize); 45 | IntPtr ctx = LlamaLibrary.llama_new_context_with_model(model, ctxParams); 46 | if (ctx == IntPtr.Zero) 47 | throw new LlamaException("Failed to create the Llama context"); 48 | return (model, ctx); 49 | } 50 | catch 51 | { 52 | LlamaLibrary.llama_free_model(model); 53 | throw; 54 | } 55 | } 56 | catch 57 | { 58 | LlamaLibrary.llama_backend_free(); 59 | throw; 60 | } 61 | }); 62 | return new LlamaModel(newModelPointer, newContextPointer); 63 | } 64 | 65 | LlamaModel(IntPtr modelPointer, IntPtr contextPointer) 66 | { 67 | this.modelPointer = modelPointer; 68 | this.contextPointer = contextPointer; 69 | } 70 | 71 | ~LlamaModel() 72 | { 73 | Dispose(); 74 | } 75 | 76 | public void Dispose() 77 | { 78 | if (modelPointer == IntPtr.Zero && contextPointer == IntPtr.Zero) 79 | return; 80 | disposeCancellationTokenSource.Cancel(); 81 | if (contextPointer != IntPtr.Zero) 82 | { 83 | LlamaLibrary.llama_free(contextPointer); 84 | contextPointer = IntPtr.Zero; 85 | } 86 | if (modelPointer != IntPtr.Zero) 87 | { 88 | LlamaLibrary.llama_free_model(modelPointer); 89 | LlamaLibrary.llama_backend_free(); 90 | modelPointer = IntPtr.Zero; 91 | } 92 | } 93 | 94 | public Task RunAsync(string prompt, int outputLength = 32, SamplingParams samplingParams = null, IProgress progress = null, CancellationToken? ct = null) 95 | { 96 | return Task.Run(() => { 97 | var tokenSource = ct != null ? CancellationTokenSource.CreateLinkedTokenSource(disposeCancellationTokenSource.Token, ct.Value) : disposeCancellationTokenSource; 98 | return Run(prompt, contextPointer, outputLength, samplingParams ?? new(), progress, tokenSource.Token); 99 | }); 100 | } 101 | 102 | string Run(string prompt, IntPtr context, int outputLength, SamplingParams samplingParams, IProgress progress = null, CancellationToken? cancellationToken = null) 103 | { 104 | StringBuilder outputStringBuilder = new StringBuilder(); 105 | 106 | int eosToken = EosToken; 107 | int[] tokens = TokenizePrompt(prompt, true); 108 | 109 | var samplingContext = new LlamaSamplingContext(samplingParams, tokens); 110 | 111 | int totalTokens = tokens.Length + outputLength; 112 | if (totalTokens > ContextSize) 113 | throw new LlamaException($"Error: Model context size {ContextSize} tokens can't fit total of {totalTokens} tokens expected"); 114 | 115 | LlamaLibrary.LlamaBatch batch = CreateBatch(tokens, totalTokens); 116 | 117 | int decodeResult = LlamaLibrary.llama_decode(context, batch); 118 | if (decodeResult != 0) 119 | throw new LlamaException($"llama_decode() failed Code: {decodeResult}"); 120 | 121 | for (int i = batch.n_tokens; i < totalTokens; i++) 122 | { 123 | int newTokenId = SampleToken(samplingContext, batch.n_tokens - 1); 124 | 125 | samplingContext.AddToken(newTokenId); 126 | 127 | if (newTokenId == eosToken) 128 | break; 129 | 130 | // Output the generated text 131 | string tokenText = LlamaTokenToPiece(newTokenId); 132 | outputStringBuilder.Append(tokenText); 133 | progress?.Report(outputStringBuilder.ToString()); 134 | batch.n_tokens = 0; 135 | 136 | // push this new token for next evaluation 137 | LlamaBatchAdd(ref batch, newTokenId, i, true, 0); 138 | 139 | if (cancellationToken?.IsCancellationRequested ?? false) 140 | break; 141 | if (LlamaLibrary.llama_decode(context, batch) != 0) 142 | throw new LlamaException("llama_decode() failed"); 143 | } 144 | return outputStringBuilder.ToString(); 145 | } 146 | 147 | unsafe int SampleTokenGreedy(IntPtr ctx, int idx) 148 | { 149 | LlamaLibrary.LlamaTokenData[] candidates = FindCandidates(ctx, idx); 150 | 151 | fixed (LlamaLibrary.LlamaTokenData* pCandidates = candidates) 152 | { 153 | var candidatesArray = new LlamaLibrary.LlamaTokenDataArray 154 | { 155 | data = pCandidates, 156 | size = candidates.Length, 157 | sorted = false 158 | }; 159 | 160 | // Sample the most likely token 161 | int newTokenId = LlamaLibrary.llama_sample_token_greedy(ctx, ref candidatesArray); 162 | return newTokenId; 163 | } 164 | } 165 | 166 | public unsafe LlamaLibrary.LlamaTokenData[] FindCandidates(IntPtr ctx, int idx) 167 | { 168 | IntPtr logitsPtr = LlamaLibrary.llama_get_logits_ith(ctx, idx); 169 | int vocabLength = VocabLength; 170 | LlamaLibrary.LlamaTokenData[] candidates = new LlamaLibrary.LlamaTokenData[vocabLength]; 171 | 172 | float* logits = (float*)logitsPtr.ToPointer(); 173 | for (int j = 0; j < vocabLength; j++) 174 | candidates[j] = new LlamaLibrary.LlamaTokenData { id = j, logit = logits[j], p = 0.0f }; 175 | return candidates; 176 | } 177 | 178 | public static LlamaLibrary.LlamaBatch CreateBatch(int[] tokens, int size) 179 | { 180 | LlamaLibrary.LlamaBatch batch = LlamaLibrary.llama_batch_init(size, 0, 1); 181 | 182 | for (int i = 0; i < tokens.Length; i++) 183 | LlamaBatchAdd(ref batch, tokens[i], i, false, 0); 184 | 185 | unsafe 186 | { 187 | // Ensure logits are output for the last token of the prompt 188 | batch.logits[batch.n_tokens - 1] = 1; 189 | } 190 | 191 | return batch; 192 | } 193 | 194 | public unsafe static void LlamaBatchAdd(ref LlamaLibrary.LlamaBatch batch, int id, int pos, bool logits, params int[] seqIds) 195 | { 196 | batch.token[batch.n_tokens] = id; 197 | batch.pos[batch.n_tokens] = pos; 198 | batch.n_seq_id[batch.n_tokens] = seqIds.Length; 199 | 200 | for (int i = 0; i < seqIds.Length; ++i) 201 | { 202 | batch.seq_id[batch.n_tokens][i] = seqIds[i]; 203 | } 204 | 205 | batch.logits[batch.n_tokens] = logits ? (byte)1 : (byte)0; 206 | batch.n_tokens++; 207 | } 208 | 209 | public int[] TokenizePrompt(string prompt, bool addBos) 210 | { 211 | int[] tokens = new int[prompt.Length + (addBos ? 1 : 0)]; 212 | int nTokens = LlamaLibrary.llama_tokenize(modelPointer, prompt, prompt.Length, tokens, tokens.Length, addBos, false); 213 | Array.Resize(ref tokens, nTokens); 214 | return tokens; 215 | } 216 | 217 | public string LlamaTokenToPiece(int token) 218 | { 219 | const int initialSize = 16; 220 | byte[] buffer = new byte[initialSize]; 221 | 222 | int nTokens = LlamaLibrary.llama_token_to_piece(modelPointer, token, buffer, buffer.Length); 223 | if (nTokens < 0) 224 | { 225 | Array.Resize(ref buffer, -nTokens); 226 | int check = LlamaLibrary.llama_token_to_piece(modelPointer, token, buffer, buffer.Length); 227 | if (check == -nTokens) 228 | return null; 229 | } 230 | else 231 | { 232 | Array.Resize(ref buffer, nTokens); 233 | } 234 | 235 | string result = Encoding.UTF8.GetString(buffer); 236 | return result; 237 | } 238 | 239 | public unsafe class LlamaSamplingContext 240 | { 241 | public SamplingParams Params { get; } 242 | public int[] Prev { get; } 243 | public List Cur { get; } = new(); 244 | public LlamaLibrary.LlamaGrammar* Grammar { get; } 245 | 246 | public LlamaSamplingContext(SamplingParams parameters, int[] promptTokens) 247 | { 248 | Params = parameters; 249 | int fillLength = Mathf.Max(parameters.NPrev - promptTokens.Length, 0); 250 | int skipLength = Mathf.Max(promptTokens.Length - parameters.NPrev, 0); 251 | Prev = Enumerable.Repeat(0, fillLength).Concat(promptTokens.Skip(skipLength)).ToArray(); 252 | } 253 | 254 | public void AddToken(int id) 255 | { 256 | for (int i = 0; i < Prev.Length - 1; i++) 257 | Prev[i] = Prev[i + 1]; 258 | Prev[Prev.Length - 1] = id; 259 | } 260 | } 261 | 262 | public class SamplingParams 263 | { 264 | public float Temp { get; set; } = 0.80f; 265 | public int TopK { get; set; } = 40; 266 | public float TopP { get; set; } = 0.95f; 267 | public float MinP { get; set; } = 0.05f; 268 | public float TfsZ { get; set; } = 1.00f; 269 | public float TypicalP { get; set; } = 1.00f; 270 | public int PenaltyLastN { get; set; } = 64; 271 | public float PenaltyRepeat { get; set; } = 1.10f; 272 | public float PenaltyFreq { get; set; } = 0.00f; 273 | public float PenaltyPresent { get; set; } = 0.00f; 274 | public bool PenalizeNl { get; set; } = true; 275 | public Dictionary LogitBias { get; set; } = new Dictionary(); 276 | public int NPrev { get; set; } = 64; 277 | public int NProbs { get; set; } = 0; 278 | } 279 | 280 | public unsafe int SampleToken(LlamaSamplingContext samplingContext, int idx) 281 | { 282 | SamplingParams parameters = samplingContext.Params; 283 | 284 | int vocabLength = VocabLength; 285 | 286 | float temp = parameters.Temp; 287 | int topK = parameters.TopK <= 0 ? vocabLength : parameters.TopK; 288 | float topP = parameters.TopP; 289 | float minP = parameters.MinP; 290 | float tfsZ = parameters.TfsZ; 291 | float typicalP = parameters.TypicalP; 292 | int penaltyLastN = parameters.PenaltyLastN < 0 ? parameters.NPrev : parameters.PenaltyLastN; 293 | float penaltyRepeat = parameters.PenaltyRepeat; 294 | float penaltyFreq = parameters.PenaltyFreq; 295 | float penaltyPresent = parameters.PenaltyPresent; 296 | bool penalizeNl = parameters.PenalizeNl; 297 | 298 | var prev = samplingContext.Prev; 299 | var cur = samplingContext.Cur; 300 | 301 | IntPtr logitsPtr = LlamaLibrary.llama_get_logits_ith(contextPointer, idx); 302 | float[] logits = new float[vocabLength]; 303 | Marshal.Copy(logitsPtr, logits, 0, vocabLength); 304 | 305 | foreach (var bias in parameters.LogitBias) 306 | logits[bias.Key] += bias.Value; 307 | 308 | cur.Clear(); 309 | 310 | for (int tokenID = 0; tokenID < vocabLength; tokenID++) 311 | cur.Add(new LlamaLibrary.LlamaTokenData { id = tokenID, logit = logits[tokenID], p = 0 }); 312 | 313 | var curArray = cur.ToArray(); 314 | fixed (LlamaLibrary.LlamaTokenData* pCurArray = curArray) 315 | { 316 | LlamaLibrary.LlamaTokenDataArray curP = new LlamaLibrary.LlamaTokenDataArray 317 | { 318 | data = pCurArray, 319 | size = cur.Count, 320 | sorted = false 321 | }; 322 | 323 | if (prev.Length > 0) 324 | { 325 | int nlTokenId = LlamaLibrary.llama_token_nl(modelPointer); 326 | float nlLogit = logits[nlTokenId]; 327 | 328 | LlamaLibrary.llama_sample_repetition_penalties(contextPointer, &curP, prev, prev.Length, penaltyRepeat, penaltyFreq, penaltyPresent); 329 | 330 | // If not penalizing new lines, reset the logit for the newline token 331 | if (!penalizeNl) 332 | { 333 | for (int i = 0; i < curP.size; i++) 334 | { 335 | if (curP.data[i].id == nlTokenId) 336 | { 337 | curP.data[i].logit = nlLogit; 338 | break; 339 | } 340 | } 341 | } 342 | } 343 | 344 | int id = 0; 345 | if (temp < 0.0f) 346 | { 347 | LlamaLibrary.llama_sample_softmax(contextPointer, &curP); 348 | id = curP.data[0].id; 349 | } 350 | else if (temp == 0.0f) 351 | { 352 | id = LlamaLibrary.llama_sample_token_greedy(contextPointer, ref curP); 353 | } 354 | else 355 | { 356 | int minKeep = Math.Max(1, parameters.NProbs); 357 | 358 | LlamaLibrary.llama_sample_top_k(contextPointer, &curP, topK, minKeep); 359 | LlamaLibrary.llama_sample_tail_free(contextPointer, &curP, tfsZ, minKeep); 360 | LlamaLibrary.llama_sample_typical(contextPointer, &curP, typicalP, minKeep); 361 | LlamaLibrary.llama_sample_top_p(contextPointer, &curP, topP, minKeep); 362 | LlamaLibrary.llama_sample_min_p(contextPointer, &curP, minP, minKeep); 363 | LlamaLibrary.llama_sample_temp(contextPointer, &curP, temp); 364 | 365 | id = LlamaLibrary.llama_sample_token(contextPointer, &curP); 366 | } 367 | 368 | return id; 369 | } 370 | } 371 | } 372 | } -------------------------------------------------------------------------------- /LlamaModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 5dc9c60cbd0717d418f42fb37ebad8be 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /Plugins.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d8e2fdd2102960c43aa396f1c01c40e7 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Plugins/Windows.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ed686b97be567094ab43ba784312a039 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Plugins/Windows/ggml_shared.dll: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b28ceb00c45740ba4ea7fa42401ef177b1d154274c9a3e5be5e9bc0fb163fc5f 3 | size 399872 4 | -------------------------------------------------------------------------------- /Plugins/Windows/ggml_shared.dll.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6109b031034663b418885707046a0b2a 3 | PluginImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | iconMap: {} 7 | executionOrder: {} 8 | defineConstraints: [] 9 | isPreloaded: 0 10 | isOverridable: 1 11 | isExplicitlyReferenced: 0 12 | validateReferences: 1 13 | platformData: 14 | - first: 15 | Any: 16 | second: 17 | enabled: 1 18 | settings: {} 19 | - first: 20 | Editor: Editor 21 | second: 22 | enabled: 0 23 | settings: 24 | DefaultValueInitialized: true 25 | userData: 26 | assetBundleName: 27 | assetBundleVariant: 28 | -------------------------------------------------------------------------------- /Plugins/Windows/llama.dll: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a0ef491ee239e62db7efbf0b72319230d3014cf9746d97be1b62f0b7e95598cb 3 | size 922624 4 | -------------------------------------------------------------------------------- /Plugins/Windows/llama.dll.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bee97d240a8768742a1363079665312c 3 | PluginImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | iconMap: {} 7 | executionOrder: {} 8 | defineConstraints: [] 9 | isPreloaded: 0 10 | isOverridable: 1 11 | isExplicitlyReferenced: 0 12 | validateReferences: 1 13 | platformData: 14 | - first: 15 | Any: 16 | second: 17 | enabled: 1 18 | settings: {} 19 | - first: 20 | Editor: Editor 21 | second: 22 | enabled: 0 23 | settings: 24 | DefaultValueInitialized: true 25 | userData: 26 | assetBundleName: 27 | assetBundleVariant: 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Semi-abandoned 2 | Sadly, I don't have time to support this package at this moment, I recommend using https://github.com/SciSharp/LLamaSharp that supports the latest version of llama.cpp 3 | 4 | # UnityLlamaCpp 5 | Connect llama.cpp to Unity3d in two clicks 6 | 7 | # MacOS/Linux/Windows CUDA 8 | The bindings were made for b1518 version of llama.cpp, so if you need libraries for your OS, build from sources from release b1518 9 | You can find Windows CUDA llama.cpp dll here - [https://github.com/SciSharp/LLamaSharp](https://github.com/ggerganov/llama.cpp/releases/tag/b1518) 10 | 11 | # Installation: 12 | - Add git repo as package Window -> Package Manager -> Add from Git URL https://github.com/mrtrizer/UnityLlamaCpp.git 13 | - Download a GGUF model, for example this - https://huggingface.co/TheBloke/speechless-mistral-dolphin-orca-platypus-samantha-7B-GGUF/blob/main/speechless-mistral-dolphin-orca-platypus-samantha-7b.Q4_K_M.gguf 14 | - Put model file in StreamingAssets/Models 15 | - Find Test.prefab in package dir and use component context menu to Run it, it should generate some response to a prompt 16 | - Use LlamaExample.cs as and example 17 | -------------------------------------------------------------------------------- /README.md.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 47226f57bca379e44af565bc3548c402 3 | TextScriptImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Test.prefab: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!1 &1432381695479700277 4 | GameObject: 5 | m_ObjectHideFlags: 0 6 | m_CorrespondingSourceObject: {fileID: 0} 7 | m_PrefabInstance: {fileID: 0} 8 | m_PrefabAsset: {fileID: 0} 9 | serializedVersion: 6 10 | m_Component: 11 | - component: {fileID: 1432381695479700283} 12 | - component: {fileID: 1432381695479700282} 13 | m_Layer: 0 14 | m_Name: Test 15 | m_TagString: Untagged 16 | m_Icon: {fileID: 0} 17 | m_NavMeshLayer: 0 18 | m_StaticEditorFlags: 0 19 | m_IsActive: 1 20 | --- !u!4 &1432381695479700283 21 | Transform: 22 | m_ObjectHideFlags: 0 23 | m_CorrespondingSourceObject: {fileID: 0} 24 | m_PrefabInstance: {fileID: 0} 25 | m_PrefabAsset: {fileID: 0} 26 | m_GameObject: {fileID: 1432381695479700277} 27 | m_LocalRotation: {x: 0, y: 0, z: 0, w: 1} 28 | m_LocalPosition: {x: 607.92804, y: 362.18805, z: 1.1204722} 29 | m_LocalScale: {x: 1, y: 1, z: 1} 30 | m_ConstrainProportionsScale: 0 31 | m_Children: [] 32 | m_Father: {fileID: 0} 33 | m_RootOrder: 0 34 | m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} 35 | --- !u!114 &1432381695479700282 36 | MonoBehaviour: 37 | m_ObjectHideFlags: 0 38 | m_CorrespondingSourceObject: {fileID: 0} 39 | m_PrefabInstance: {fileID: 0} 40 | m_PrefabAsset: {fileID: 0} 41 | m_GameObject: {fileID: 1432381695479700277} 42 | m_Enabled: 1 43 | m_EditorHideFlags: 0 44 | m_Script: {fileID: 11500000, guid: a5631fab5f542454fb4b67eabbb95a14, type: 3} 45 | m_Name: 46 | m_EditorClassIdentifier: 47 | modelPath: Models/speechless-mistral-dolphin-orca-platypus-samantha-7b.Q4_K_M.gguf 48 | systemPrompt: You are an AI game character 49 | userPrompt: 'You are in a Tavern 50 | 51 | HP:40% 52 | 53 | What is your next action:' 54 | assistantPrompt: I will 55 | -------------------------------------------------------------------------------- /Test.prefab.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: cd215450262fb1f4d8ad4f8842fdb9e0 3 | PrefabImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "com.abuksigun.llama-cpp", 3 | "version": "0.1.0", 4 | "displayName": "Llama.Cpp", 5 | "description": "", 6 | "unity": "2021.2" 7 | } -------------------------------------------------------------------------------- /package.json.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f7d7eae25846bbf489a881073d34fd23 3 | PackageManifestImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | --------------------------------------------------------------------------------