├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── lib ├── ex_llama.ex └── ex_llama │ ├── chat_template.ex │ ├── chat_template │ ├── alpaca.ex │ ├── amber_chat.ex │ ├── chat_ml.ex │ ├── falcon_instruct.ex │ ├── gemma_instruct.ex │ ├── llama_2_chat.ex │ ├── mistral_instruct.ex │ ├── open_chat.ex │ ├── saiga.ex │ ├── solar_instruct.ex │ ├── vicuna.ex │ └── zephyr.ex │ ├── context_params.ex │ ├── embedding_options.ex │ ├── model.ex │ ├── model_options.ex │ ├── nif.ex │ ├── session.ex │ └── session_options.ex ├── mix.exs ├── mix.lock ├── native └── erlang_llama_cpp_nif │ ├── Cargo.lock │ ├── Cargo.toml │ └── src │ ├── lib.rs │ ├── nifs.rs │ ├── nifs │ ├── ex_llama_model.rs │ └── ex_llama_session.rs │ ├── refs.rs │ ├── refs │ ├── model_ref.rs │ └── session_ref.rs │ ├── structs.rs │ └── structs │ ├── completion.rs │ ├── embedding_options.rs │ ├── model.rs │ ├── model_options.rs │ ├── session.rs │ └── session_options.rs ├── priv └── models │ └── local_llama │ └── tiny_llama │ ├── .gitignore │ └── init.sh └── test ├── ex_llama_test.exs └── test_helper.exs /.gitignore: -------------------------------------------------------------------------------- 1 | # Intellij 2 | ex_llama.iml 3 | .idea/ 4 | 5 | # Test Models 6 | test/models/* 7 | 8 | # asdf 9 | .tool-versions 10 | 11 | # The directory Mix will write compiled artifacts to. 12 | /_build/ 13 | 14 | # If you run "mix test --cover", coverage assets end up here. 15 | /cover/ 16 | 17 | # The directory Mix downloads your dependencies sources to. 18 | /deps/ 19 | 20 | # Where third-party dependencies like ExDoc output generated docs. 21 | /doc/ 22 | 23 | # Ignore .fetch files in case you like to edit your project deps locally. 24 | /.fetch 25 | 26 | # If the VM crashes, it generates a dump, let's ignore it too. 27 | erl_crash.dump 28 | 29 | # Also ignore archive artifacts (built via "mix archive.build"). 30 | *.ez 31 | 32 | # Ignore package tarball (built via "mix hex.build"). 33 | ex_llama-*.tar 34 | 35 | # Temporary files, for example, from tests. 36 | /tmp/ 37 | 38 | # Static Libs for rust deps. 39 | /priv/native/* 40 | 41 | # Build artifacts 42 | /native/erlang_llama_cpp_nif/target/ 43 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Change Log 2 | ===== 3 | 4 | ## 0.1.0 5 | Update to use GenAI Core structs 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 noizu-labs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ExLLama: LlammaCpp.rs NIF wrapper for Elixir/Erlang. 2 | ======= 3 | 4 | This is an Alpha Library for loading and interacting with models via the llama_cpp rust client exposed as nif extensions. 5 | Inspired By [llama_cpp_ex](https://github.com/jeregrine/llama_cpp_ex) 6 | 7 | 8 | ## Getting Started 9 | 1. Add the `ex_llama` dependency to your `mix.exs` file: 10 | 11 | ```elixir 12 | def deps do 13 | [ 14 | {:ex_llama, "~> 0.0.1"} 15 | ] 16 | 17 | end 18 | ``` 19 | 20 | 21 | ## Chat Completion 22 | As of this build only `<|role|>messsage` format chat completion is supported, such as used by tiny llama. 23 | 24 | 25 | ```elixir 26 | 27 | {:ok, llama} = ExLLama.load_model("./test/models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf") 28 | thread = [ 29 | %{role: :user, content: "Say Hello. And only hello. Example \"Hello\"."}, 30 | %{role: :assistant, content: "Hello"}, 31 | %{role: :user, content: "Repeat what you just said."}, 32 | %{role: :assistant, content: "Hello"}, 33 | %{role: :user, content: "Say Goodbye."}, 34 | %{role: :assistant, content: "Goodbye"}, 35 | %{role: :user, content: "Say Apple."}, 36 | %{role: :assistant, content: "Apple"}, 37 | %{role: :user, content: "What did you just say?."}, 38 | ] 39 | 40 | {:ok, response} = ExLLama.chat_completion(llama, thread, %{seed: 2}) 41 | # response = %{ 42 | # choices: [ 43 | # %{reason: :end, role: "assistant", content: "Apple"}, 44 | # %{reason: :end, role: "assistant", content: "Apple"}, 45 | # %{reason: :end, role: "assistant", content: "Apple"} 46 | # ] 47 | # } 48 | 49 | ``` 50 | 51 | 52 | ## Simple Completion (direct) 53 | ```elixir 54 | {:ok, llama} = ExLLama.load_model("./test/models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf") 55 | {:ok, options} = ExLLama.Session.default_options() 56 | {:ok, session} = ExLLama.create_session(llama, %{options| seed: 2}) 57 | ExLLama.advance_context(session, "<|user|>\n Say Hello. And only hello. Example \"Hello\".\n<|assistant|>\n Hello\n<|user|>\n Repeat what you just said.\n<|assistant|>\n Hello\n<|user|>\n Say Goodbye.\n<|assistant|>\n") 58 | {:ok, response} = ExLLama.completion(session, 512, "\n*") 59 | response = String.trim_leading(response) 60 | # "Goodbye." 61 | ``` 62 | 63 | ## Streaming Completion (final mechanism will be replaced with a Stream 64 | ```elixir 65 | 66 | def receive_text(acc \\ []) do 67 | receive do 68 | x = {:ok, _} -> Enum.reverse([x|acc]) 69 | x = {:error, _} -> Enum.reverse([x|acc]) 70 | :fin -> 71 | Enum.reverse(acc) 72 | x -> 73 | receive_text([x | acc]) 74 | end 75 | end 76 | 77 | #... 78 | {:ok, llama} = ExLLama.load_model("./test/models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf") 79 | {:ok, options} = ExLLama.Session.default_options() 80 | {:ok, session} = ExLLama.create_session(llama, %{options| seed: 2}) 81 | ExLLama.advance_context(session, "<|user|>\n Say Hello. And only hello. Example \"Hello\".\n<|assistant|>\n Hello\n<|user|>\n Repeat what you just said.\n<|assistant|>\n Hello\n<|user|>\n Say Goodbye.\n<|assistant|>\n") 82 | ExLLama.Session.start_completing_with(session, %{max_tokens: 512}) 83 | receive_text() 84 | 85 | 86 | ``` 87 | 88 | -------------------------------------------------------------------------------- /lib/ex_llama.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama do 2 | def load_model(path), do: ExLLama.Model.load_from_file(path) 3 | def load_model(path, %ExLLama.ModelOptions{} = opts), do: ExLLama.Model.load_from_file(path, opts) 4 | 5 | def create_session(model), do: ExLLama.Model.create_session(model) 6 | def create_session(model, options), do: ExLLama.Model.create_session(model, options) 7 | 8 | def advance_context(session, content), do: ExLLama.Session.advance_context(session, content) 9 | def completion(session, max_tokens, stop), do: ExLLama.Session.completion(session, max_tokens, stop) 10 | 11 | 12 | @default_choices 1 13 | @default_max_tokens 512 14 | 15 | def chat_completion(model, thread, options) do 16 | 17 | so = cond do 18 | x = options[:session_options] -> put_in(x, [:seed], options[:seed]) 19 | x = options[:seed] -> [seed: x] 20 | :else -> nil 21 | end 22 | options = update_in(options || [], [:add_generation_prompt], 23 | fn 24 | x when is_nil(x) -> true 25 | x -> x 26 | end 27 | ) 28 | session_options = ExLLama.SessionOptions.new(so) 29 | seed = session_options.seed 30 | choices = options[:choices] || @default_choices 31 | max_tokens = options[:max_tokens] || @default_max_tokens 32 | with {:ok, session} <- ExLLama.create_session(model, session_options), 33 | {:ok, thread_context} <- ExLLama.ChatTemplate.to_context(thread, model, options), 34 | {:ok, _} <- ExLLama.Session.set_context(session, thread_context), 35 | {:ok, prompt_tokens} = ExLLama.Session.context_size(session) do 36 | choices = Enum.map(1..choices, 37 | fn(_) -> 38 | with {:ok, %{content: result, token_length: l}} <- ExLLama.Session.completion(session, max_tokens, nil) do 39 | {:ok, {l, result}} 40 | end 41 | end 42 | ) 43 | |> Enum.filter( 44 | fn 45 | {:ok, _} -> true 46 | _ -> false 47 | end) 48 | |> Enum.map(fn {:ok, x} -> x end) 49 | 50 | options = (options || []) 51 | |> put_in([:prompt_tokens], prompt_tokens) 52 | ExLLama.ChatTemplate.extract_response(choices, model, options) 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate do 2 | @type thread :: [map] 3 | @type model :: ExLLama.Model.t 4 | @type meta :: Keyword.t | nil 5 | @type model_response :: {tokens :: integer, String.t} 6 | 7 | @callback support_list() :: {:ok, MapSet.t} 8 | @callback to_context(thread, model, meta) :: {:ok, String.t} 9 | @callback extract_response(response :: [model_response], model, meta) :: {:ok, ExLLama.ChatResponse.t} 10 | 11 | def pick_handler(model, meta) do 12 | cond do 13 | x = meta[:template] -> x 14 | :else -> 15 | # wip 16 | ExLLama.ChatTemplate.Zephyr 17 | end 18 | end 19 | 20 | def to_context(thread, model, meta), do: apply(pick_handler(model, meta), :to_context, [thread, model, meta]) 21 | def extract_response(responses, model, meta), do: apply(pick_handler(model, meta), :extract_response, [responses, model, meta]) 22 | end 23 | 24 | defmodule ExLLama.ChatTemplate.Exception do 25 | defexception [:message, :handler, :entry, :row] 26 | def message(%{message: m, handler: h, entry: e, row: r}) do 27 | "#{h}@#{r}: #{m}\n#{inspect e}" 28 | end 29 | 30 | end 31 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/alpaca.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.Alpaca do 2 | @moduledoc """ 3 | based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/alpaca.jinja] 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set loop_messages = messages[1:] %} 7 | {% set system_message = messages[0]['content'].strip() + '\n\n' %} 8 | {% else %} 9 | {% set loop_messages = messages %} 10 | {% set system_message = '' %} 11 | {% endif %} 12 | 13 | {{ bos_token + system_message }} 14 | {% for message in loop_messages %} 15 | {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} 16 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 17 | {% endif %} 18 | 19 | {% if message['role'] == 'user' %} 20 | {{ '### Instruction:\n' + message['content'].strip() + '\n\n' }} 21 | {% elif message['role'] == 'assistant' %} 22 | {{ '### Response:\n' + message['content'].strip() + eos_token + '\n\n' }} 23 | {% endif %} 24 | 25 | {% if loop.last and message['role'] == 'user' and add_generation_prompt %} 26 | {{ '### Instruction:\n' }} 27 | {% endif %} 28 | {% endfor %} 29 | ```` 30 | """ 31 | end 32 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/amber_chat.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.AmberChat do 2 | @moduledoc """ 3 | based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/amberchat.jinja] 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set loop_messages = messages[1:] %} 7 | {% set system_message = messages[0]['content'].strip() + '\n' %} 8 | {% else %} 9 | {% set loop_messages = messages %} 10 | {% set system_message = '' %} 11 | {% endif %} 12 | 13 | {% for message in loop_messages %} 14 | {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} 15 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 16 | {% endif %} 17 | 18 | {% if loop.index0 == 0 %} 19 | {{ bos_token + system_message }} 20 | {% endif %} 21 | 22 | {% if message['role'] == 'user' %} 23 | {{ '###Human: ' + message['content'].strip() + '\n' }} 24 | {% elif message['role'] == 'assistant' %} 25 | {{ '###Assistant: ' + message['content'].strip() + '\n' }} 26 | {% endif %} 27 | 28 | {% if loop.last and message['role'] == 'user' and add_generation_prompt %} 29 | {{ '###Assistant:' }} 30 | {% endif %} 31 | {% endfor %} 32 | ```` 33 | """ 34 | end 35 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/chat_ml.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.ChatML do 2 | @moduledoc """ 3 | based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/chatml.jinja] 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set offset = 1 %} 7 | {% else %} 8 | {% set offset = 0 %} 9 | {% endif %} 10 | 11 | {{ bos_token }} 12 | {% for message in messages %} 13 | {% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %} 14 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 15 | {% endif %} 16 | 17 | {{ '<|im_start|>' + message['role'] + '\n' + message['content'].strip() + '<|im_end|>\n' }} 18 | 19 | {% if loop.last and message['role'] == 'user' and add_generation_prompt %} 20 | {{ '<|im_start|>assistant\n' }} 21 | {% endif %} 22 | {% endfor %} 23 | ```` 24 | """ 25 | end 26 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/falcon_instruct.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.FalconInstruct do 2 | @moduledoc """ 3 | based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/falcon-instruct.jinja]] 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set loop_messages = messages[1:] %} 7 | {% set system_message = messages[0]['content'] %} 8 | {% else %} 9 | {% set loop_messages = messages %} 10 | {% set system_message = '' %} 11 | {% endif %} 12 | 13 | {% for message in loop_messages %} 14 | {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} 15 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 16 | {% endif %} 17 | 18 | {% if loop.index0 == 0 %} 19 | {{ system_message.strip() }} 20 | {% endif %} 21 | {{ '\n\n' + message['role'].title() + ': ' + message['content'].strip().replace('\r\n', '\n').replace('\n\n', '\n') }} 22 | 23 | {% if loop.last and message['role'] == 'user' and add_generation_prompt %} 24 | {{ '\n\nAssistant:' }} 25 | {% endif %} 26 | {% endfor %} 27 | ```` 28 | """ 29 | end 30 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/gemma_instruct.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.GemmaInstruct do 2 | @moduledoc """ 3 | based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/gemma-it.jinja] 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set loop_messages = messages[1:] %} 7 | {% set system_message = messages[0]['content'].strip() + '\n\n' %} 8 | {% else %} 9 | {% set loop_messages = messages %} 10 | {% set system_message = '' %} 11 | {% endif %} 12 | 13 | {% for message in loop_messages %} 14 | {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} 15 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 16 | {% endif %} 17 | 18 | {% if loop.index0 == 0 %} 19 | {% set content = system_message + message['content'] %} 20 | {% else %} 21 | {% set content = message['content'] %} 22 | {% endif %} 23 | 24 | {% if (message['role'] == 'assistant') %} 25 | {% set role = 'model' %} 26 | {% else %} 27 | {% set role = message['role'] %} 28 | {% endif %} 29 | 30 | {{ '' + role + '\n' + content.strip() + '\n' }} 31 | 32 | {% if loop.last and message['role'] == 'user' and add_generation_prompt %} 33 | {{'model\n'}} 34 | {% endif %} 35 | {% endfor %} 36 | ```` 37 | """ 38 | 39 | 40 | def support_list() do 41 | [ ] 42 | end 43 | 44 | defp format_line(message, eos_token) do 45 | role = case message.role do 46 | :assistant -> :model 47 | x -> x 48 | end 49 | "#{role}\n#{String.trim(message.content)}\n" 50 | end 51 | 52 | def extract_response(responses, model, options) do 53 | with {:ok, model_name} <- ExLLama.Model.__model_name__(model), 54 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 55 | choices = responses 56 | |> Enum.with_index() 57 | |> Enum.map( 58 | fn 59 | {{tokens, x}, index} -> 60 | x = x 61 | |> String.trim() 62 | |> String.trim_trailing(eos_token) 63 | 64 | x = GenAI.Message.assistant(x) 65 | finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens 66 | %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason} 67 | end) 68 | completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max() 69 | prompt_tokens = options[:prompt_tokens] 70 | usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens} 71 | completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage} 72 | {:ok, completion} 73 | end 74 | end 75 | 76 | def compact(thread, acc \\ []) 77 | def compact([], acc), do: acc 78 | def compact([h], acc), do: [h | acc] 79 | def compact([%{role: :system} = s, %{role: :user} = u|t], acc) do 80 | c = %{u| content: "#{String.trim(s.content)}\n\n#{String.trim(u.content)}"} 81 | compact(t, [c| acc]) 82 | end 83 | def compact([h|t], acc), do: compact(t, [h|acc]) 84 | 85 | def to_context(thread, model, options) do 86 | with {:ok, bos_token} <- ExLLama.Model.__bos__(model), 87 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 88 | lines = thread 89 | |> compact() 90 | |> Enum.reverse() 91 | |> Enum.map(&format_line(&1, eos_token)) 92 | |> Enum.join("") 93 | if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do 94 | {:ok, lines <> "model\n"} 95 | else 96 | {:ok, lines} 97 | end 98 | end 99 | end 100 | 101 | end 102 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/llama_2_chat.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.LLama2Chat do 2 | @moduledoc """ 3 | based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/llama-2-chat.jinja] 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set loop_messages = messages[1:] %} 7 | {% set system_message = '<>\n' + messages[0]['content'].strip() + '\n<>\n\n' %} 8 | {% else %} 9 | {% set loop_messages = messages %} 10 | {% set system_message = '' %} 11 | {% endif %} 12 | 13 | {% for message in loop_messages %} 14 | {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} 15 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 16 | {% endif %} 17 | 18 | {% if loop.index0 == 0 %} 19 | {% set content = system_message + message['content'] %} 20 | {% else %} 21 | {% set content = message['content'] %} 22 | {% endif %} 23 | 24 | {% if message['role'] == 'user' %} 25 | {{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }} 26 | {% elif message['role'] == 'assistant' %} 27 | {{ ' ' + content.strip() + ' ' + eos_token }} 28 | {% endif %} 29 | {% endfor %} 30 | ```` 31 | """ 32 | 33 | 34 | 35 | def support_list() do 36 | [ ] 37 | end 38 | 39 | defp format_line(message, bos_token, eos_token) do 40 | case message.role do 41 | :user -> "#{bos_token}[INST] #{String.trim(message.content)} [/INST]" 42 | :assistant -> " #{String.trim(message.content)} #{eos_token}" 43 | end 44 | end 45 | 46 | def extract_response(responses, model, options) do 47 | with {:ok, model_name} <- ExLLama.Model.__model_name__(model), 48 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 49 | choices = responses 50 | |> Enum.with_index() 51 | |> Enum.map( 52 | fn 53 | {{tokens, x}, index} -> 54 | x = x 55 | |> String.trim() 56 | |> String.trim_trailing(eos_token) 57 | x = GenAI.Message.assistant(x) 58 | finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens 59 | %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason} 60 | end) 61 | completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max() 62 | prompt_tokens = options[:prompt_tokens] 63 | usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens} 64 | completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage} 65 | {:ok, completion} 66 | end 67 | end 68 | 69 | def compact(thread, acc \\ []) 70 | def compact([], acc), do: acc 71 | def compact([h], acc), do: [h | acc] 72 | def compact([%{role: :system} = s, %{role: :user} = u|t], acc) do 73 | c = %{u| content: "<>\n#{String.trim(s.content)}\n<>\n\n#{String.trim(u.content)}"} 74 | compact(t, [c| acc]) 75 | end 76 | def compact([h|t], acc), do: compact(t, [h|acc]) 77 | 78 | def to_context(thread, model, options) do 79 | with {:ok, bos_token} <- ExLLama.Model.__bos__(model), 80 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 81 | lines = thread 82 | |> compact() 83 | |> Enum.reverse() 84 | |> Enum.map(&format_line(&1, bos_token, eos_token)) 85 | |> Enum.join("") 86 | if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do 87 | {:ok, lines} 88 | else 89 | {:ok, lines} 90 | end 91 | end 92 | end 93 | 94 | end 95 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/mistral_instruct.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.MistralInstruct do 2 | @moduledoc """ 3 | based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/mistral-instruct.jinja] 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set loop_messages = messages[1:] %} 7 | {% set system_message = messages[0]['content'].strip() + '\n\n' %} 8 | {% else %} 9 | {% set loop_messages = messages %} 10 | {% set system_message = '' %} 11 | {% endif %} 12 | 13 | {{ bos_token }} 14 | {% for message in loop_messages %} 15 | {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} 16 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 17 | {% endif %} 18 | 19 | {% if loop.index0 == 0 %} 20 | {% set content = system_message + message['content'] %} 21 | {% else %} 22 | {% set content = message['content'] %} 23 | {% endif %} 24 | 25 | {% if message['role'] == 'user' %} 26 | {{ '[INST] ' + content.strip() + ' [/INST]' }} 27 | {% elif message['role'] == 'assistant' %} 28 | {{ ' ' + content.strip() + ' ' + eos_token }} 29 | {% endif %} 30 | {% endfor %} 31 | ```` 32 | """ 33 | 34 | 35 | 36 | def support_list() do 37 | [ ] 38 | end 39 | 40 | defp format_line(message, eos_token) do 41 | case message.role do 42 | :user -> "[INST] #{String.trim(message.content)} [/INST]" 43 | :assistant -> " #{String.trim(message.content)} #{eos_token}" 44 | end 45 | end 46 | 47 | def extract_response(responses, model, options) do 48 | with {:ok, model_name} <- ExLLama.Model.__model_name__(model), 49 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 50 | choices = responses 51 | |> Enum.with_index() 52 | |> Enum.map( 53 | fn 54 | {{tokens, x}, index} -> 55 | x = x 56 | |> String.trim() 57 | |> String.trim_trailing(eos_token) 58 | x = GenAI.Message.assistant(x) 59 | finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens 60 | %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason} 61 | end) 62 | completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max() 63 | prompt_tokens = options[:prompt_tokens] 64 | usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens} 65 | completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage} 66 | {:ok, completion} 67 | end 68 | end 69 | 70 | def compact(thread, acc \\ []) 71 | def compact([], acc), do: acc 72 | def compact([h], acc), do: [h | acc] 73 | def compact([%{role: :system} = s, %{role: :user} = u|t], acc) do 74 | c = %{u| content: "#{String.trim(s.content)}\n\n#{String.trim(u.content)}"} 75 | compact(t, [c| acc]) 76 | end 77 | def compact([h|t], acc), do: compact(t, [h|acc]) 78 | 79 | def to_context(thread, model, options) do 80 | with {:ok, bos_token} <- ExLLama.Model.__bos__(model), 81 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 82 | lines = thread 83 | |> compact() 84 | |> Enum.reverse() 85 | |> Enum.map(&format_line(&1, eos_token)) 86 | |> Enum.join("") 87 | if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do 88 | {:ok, bos_token <> lines} 89 | else 90 | {:ok, bos_token <> lines} 91 | end 92 | end 93 | end 94 | end 95 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/open_chat.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.OpenChat do 2 | @moduledoc """ 3 | based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/openchat.jinja]] 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set loop_messages = messages[1:] %} 7 | {% set system_message = messages[0]['content'].strip() + '<|end_of_turn|>' %} 8 | {% else %} 9 | {% set loop_messages = messages %} 10 | {% set system_message = '' %} 11 | {% endif %} 12 | 13 | {{ bos_token + system_message }} 14 | {% for message in loop_messages %} 15 | {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} 16 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 17 | {% endif %} 18 | 19 | {{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>' }} 20 | 21 | {% if loop.last and message['role'] == 'user' and add_generation_prompt %} 22 | {{ 'GPT4 Correct Assistant:' }} 23 | {% endif %} 24 | {% endfor %} 25 | ```` 26 | """ 27 | 28 | 29 | def support_list() do 30 | [ ] 31 | end 32 | 33 | 34 | defp format_line(%{role: :system} = message) do 35 | "#{String.trim(message.content)}<|end_of_turn|>" 36 | end 37 | defp format_line(message) do 38 | "GPT4 Correct #{String.capitalize(to_string(message.role))}: #{String.trim(message.content)}<|end_of_turn|>" 39 | end 40 | 41 | def extract_response(responses, model, options) do 42 | with {:ok, model_name} <- ExLLama.Model.__model_name__(model), 43 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 44 | choices = responses 45 | |> Enum.with_index() 46 | |> Enum.map( 47 | fn 48 | {{tokens, x}, index} -> 49 | x = x 50 | |> String.trim() 51 | |> String.trim_trailing(eos_token) 52 | x = GenAI.Message.assistant(x) 53 | finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens 54 | %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason} 55 | end) 56 | completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max() 57 | prompt_tokens = options[:prompt_tokens] 58 | usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens} 59 | completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage} 60 | {:ok, completion} 61 | end 62 | end 63 | 64 | def to_context(thread, model, options) do 65 | with {:ok, bos_token} <- ExLLama.Model.__bos__(model) do 66 | lines = thread 67 | |> Enum.map(&format_line/1) 68 | |> Enum.join("") 69 | if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do 70 | {:ok, bos_token <> lines <> "GPT4 Correct Assistant:"} 71 | else 72 | {:ok, bos_token <> lines} 73 | end 74 | end 75 | end 76 | end 77 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/saiga.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.Saiga do 2 | @moduledoc """ 3 | based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/saiga.jinja] 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set loop_messages = messages[1:] %} 7 | {% set system_message = bos_token + 'system' + '\n' + messages[0]['content'].strip() + eos_token %} 8 | {% else %} 9 | {% set loop_messages = messages %} 10 | {% set system_message = '' %} 11 | {% endif %} 12 | 13 | {% for message in loop_messages %} 14 | {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} 15 | {{ raise_exception('Conversation roles must alternate user/bot/user/bot/...') }} 16 | {% endif %} 17 | 18 | {% if loop.index0 == 0 %} 19 | {{ system_message }} 20 | {% endif %} 21 | 22 | {{ bos_token + message['role'] + '\n' + message['content'].strip() + eos_token }} 23 | 24 | {% if loop.last and message['role'] == 'user' and add_generation_prompt %} 25 | {{ bos_token + 'bot\n' }} 26 | {% endif %} 27 | {% endfor %} 28 | ```` 29 | """ 30 | 31 | def support_list() do 32 | [ ] 33 | end 34 | 35 | defp format_line(message, bos_token, eos_token) do 36 | "#{bos_token}#{message.role}\n#{String.trim(message.content)}#{eos_token}" 37 | end 38 | 39 | def extract_response(responses, model, options) do 40 | with {:ok, model_name} <- ExLLama.Model.__model_name__(model), 41 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 42 | choices = responses 43 | |> Enum.with_index() 44 | |> Enum.map( 45 | fn 46 | {{tokens, x}, index} -> 47 | x = x 48 | |> String.trim() 49 | |> String.trim_trailing(eos_token) 50 | x = GenAI.Message.assistant(x) 51 | finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens 52 | %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason} 53 | end) 54 | completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max() 55 | prompt_tokens = options[:prompt_tokens] 56 | usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens} 57 | completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage} 58 | {:ok, completion} 59 | end 60 | end 61 | 62 | def to_context(thread, model, options) do 63 | with {:ok, bos_token} <- ExLLama.Model.__bos__(model), 64 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 65 | lines = thread 66 | |> Enum.map(&format_line(&1, bos_token, eos_token)) 67 | |> Enum.join("") 68 | if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do 69 | {:ok, lines <> "#{bos_token}bot\n"} 70 | else 71 | {:ok, lines} 72 | end 73 | end 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/solar_instruct.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.SolarInstruct do 2 | @moduledoc """ 3 | based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/solar-instruct.jinja] 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set offset = 1 %} 7 | {% else %} 8 | {% set offset = 0 %} 9 | {% endif %} 10 | 11 | {{ bos_token }} 12 | {% for message in messages %} 13 | {% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %} 14 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 15 | {% endif %} 16 | 17 | {{ '### ' + message['role'].title() + ':\n' + message['content'].strip() + '\n\n' }} 18 | 19 | {% if loop.last and message['role'] == 'user' and add_generation_prompt %} 20 | {{ '### Assistant:\n' }} 21 | {% endif %} 22 | {% endfor %} 23 | ```` 24 | """ 25 | 26 | 27 | def support_list() do 28 | [ ] 29 | end 30 | 31 | defp format_line(message) do 32 | "### #{String.capitalize(to_string(message.role))}:\n#{String.trim(message.content)}\n\n" 33 | end 34 | 35 | def extract_response(responses, model, options) do 36 | with {:ok, model_name} <- ExLLama.Model.__model_name__(model), 37 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 38 | choices = responses 39 | |> Enum.with_index() 40 | |> Enum.map( 41 | fn 42 | {{tokens, x}, index} -> 43 | x = x 44 | |> String.trim() 45 | |> String.trim_trailing(eos_token) 46 | x = GenAI.Message.assistant(x) 47 | finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens 48 | %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason} 49 | end) 50 | completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max() 51 | prompt_tokens = options[:prompt_tokens] 52 | usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens} 53 | completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage} 54 | {:ok, completion} 55 | end 56 | end 57 | 58 | def to_context(thread, model, options) do 59 | with {:ok, bos_token} <- ExLLama.Model.__bos__(model) do 60 | lines = thread 61 | |> Enum.map(&format_line/1) 62 | |> Enum.join("") 63 | if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do 64 | {:ok, bos_token <> lines <> "### Assistant:\n"} 65 | else 66 | {:ok, bos_token <> lines} 67 | end 68 | end 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/vicuna.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.Vicuna do 2 | @moduledoc """ 3 | based on: https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/zephyr.jinja 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set loop_messages = messages[1:] %} 7 | {% set system_message = messages[0]['content'].strip() + '\n\n' %} 8 | {% else %} 9 | {% set loop_messages = messages %} 10 | {% set system_message = '' %} 11 | {% endif %} 12 | 13 | {{ bos_token + system_message }} 14 | {% for message in loop_messages %} 15 | {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} 16 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 17 | {% endif %} 18 | 19 | {% if message['role'] == 'user' %} 20 | {{ 'USER: ' + message['content'].strip() + '\n' }} 21 | {% elif message['role'] == 'assistant' %} 22 | {{ 'ASSISTANT: ' + message['content'].strip() + eos_token + '\n' }} 23 | {% endif %} 24 | 25 | {% if loop.last and message['role'] == 'user' and add_generation_prompt %} 26 | {{ 'ASSISTANT:' }} 27 | {% endif %} 28 | {% endfor %} 29 | ```` 30 | """ 31 | 32 | 33 | def support_list() do 34 | [] 35 | end 36 | 37 | defp format_line(message = %{role: :system}, eos_token) do 38 | "#{String.trim(message.content)}\n\n" 39 | end 40 | defp format_line(message, eos_token) do 41 | "#{message.role |> String.upcase()}: #{String.trim(message.content)}#{eos_token}\n" 42 | end 43 | 44 | def extract_response(responses, model, options) do 45 | with {:ok, model_name} <- ExLLama.Model.__model_name__(model), 46 | {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 47 | choices = responses 48 | |> Enum.with_index() 49 | |> Enum.map( 50 | fn 51 | {{tokens, x}, index} -> 52 | x = x 53 | |> String.trim() 54 | |> String.trim_trailing(eos_token) 55 | x = GenAI.Message.assistant(x) 56 | finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens 57 | %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason} 58 | end) 59 | completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max() 60 | prompt_tokens = options[:prompt_tokens] 61 | usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens} 62 | completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage} 63 | {:ok, completion} 64 | end 65 | end 66 | 67 | def to_context(thread, model, options) do 68 | with {:ok, eos_token} <- ExLLama.Model.__eos__(model), 69 | {:ok, bos_token} <- ExLLama.Model.__bos__(model) do 70 | system_message_offset = if (Enum.at(thread, 0)[:role] == :system), do: 1, else: 0 71 | lines = thread 72 | |> Enum.with_index() 73 | |> Enum.map( 74 | fn 75 | {msg = %{role: :system = role, content: content}, 0} -> 76 | format_line(msg, eos_token) 77 | {msg = %{role: :system = role, content: content}, index} -> 78 | unless options[:strict] == false do 79 | raise ExLLama.ChatTemplate.Exception, message: "Only the first message may be from system. Use a different handler or pass `strict: false` to allow", handler: __MODULE__, entry: msg, row: index 80 | end 81 | format_line(msg, eos_token) 82 | 83 | {msg =%{role: :assistant = role, content: content}, index} -> 84 | unless options[:strict] == false or index <= (1 + system_message_offset) do 85 | if Enum.at(thread, index - 2)[:role] != role do 86 | raise ExLLama.ChatTemplate.Exception, message: "Conversation roles must alternate user/assistant/user/assistant/...", handler: __MODULE__, entry: msg, row: index 87 | end 88 | end 89 | format_line(msg, eos_token) 90 | 91 | {msg = %{role: :user = role, content: content}, index} -> 92 | unless options[:strict] == false or index <= (2 + system_message_offset) do 93 | if Enum.at(thread, index - 2)[:role] != role do 94 | raise ExLLama.ChatTemplate.Exception, message: "Conversation roles must alternate user/assistant/user/assistant/...", handler: __MODULE__, entry: msg, row: index 95 | end 96 | end 97 | format_line(msg, eos_token) 98 | 99 | {msg = %{role: role, content: content}, index} -> 100 | unless options[:strict] == false or options[:expanded_roles] do 101 | raise ExLLama.ChatTemplate.Exception, message: "Only the first user,assistant,system roles are supported. Use a different handler or pass `strict: false` to allow", handler: __MODULE__, entry: msg, row: index 102 | end 103 | format_line(msg, eos_token) 104 | end 105 | ) |> Enum.join("\n") 106 | if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do 107 | {:ok, bos_token <> lines <> "ASSISTANT:"} 108 | else 109 | {:ok, bos_token <> lines} 110 | end 111 | end 112 | end 113 | 114 | end 115 | -------------------------------------------------------------------------------- /lib/ex_llama/chat_template/zephyr.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ChatTemplate.Zephyr do 2 | @moduledoc """ 3 | based on: https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/zephyr.jinja 4 | ```jinja 5 | {% if messages[0]['role'] == 'system' %} 6 | {% set offset = 1 %} 7 | {% else %} 8 | {% set offset = 0 %} 9 | {% endif %} 10 | 11 | {% for message in messages %} 12 | {% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %} 13 | {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }} 14 | {% endif %} 15 | 16 | {{ '<|' + message['role'] + '|>\n' + message['content'].strip() + eos_token + '\n' }} 17 | 18 | {% if loop.last and message['role'] == 'user' and add_generation_prompt %} 19 | {{ '<|assistant|>\n' }} 20 | {% endif %} 21 | {% endfor %} 22 | ``` 23 | """ 24 | 25 | def support_list() do 26 | [ {~r"^tinyllama-1.1b.*$" , 1}] 27 | end 28 | 29 | defp format_line(message, eos_token) do 30 | "<|#{message.role}|>\n #{String.trim(message.content)}#{eos_token}\n" 31 | end 32 | 33 | def extract_response(responses, model, options) do 34 | with {:ok, eos_token} <- ExLLama.Model.__eos__(model), 35 | {:ok, model_name} <- ExLLama.Model.__model_name__(model) do 36 | 37 | choices = responses 38 | |> Enum.with_index() 39 | |> Enum.map( 40 | fn 41 | {{tokens, x}, index} -> 42 | x = x 43 | |> String.trim() 44 | |> String.trim_trailing(eos_token) 45 | x = GenAI.Message.assistant(x) 46 | # todo tool use extension and finish_reason support 47 | finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens 48 | %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason} 49 | end) 50 | completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max() 51 | prompt_tokens = options[:prompt_tokens] 52 | usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens} 53 | completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage} 54 | {:ok, completion} 55 | 56 | end 57 | end 58 | 59 | def to_context(thread, model, options) do 60 | with {:ok, eos_token} <- ExLLama.Model.__eos__(model) do 61 | system_message_offset = if (Enum.at(thread, 0)[:role] == :system), do: 1, else: 0 62 | lines = thread 63 | |> Enum.with_index() 64 | |> Enum.map( 65 | fn 66 | {msg = %{role: :system = role, content: content}, 0} -> 67 | format_line(msg, eos_token) 68 | {msg = %{role: :system = role, content: content}, index} -> 69 | unless options[:strict] == false do 70 | raise ExLLama.ChatTemplate.Exception, message: "Only the first message may be from system. Use a different handler or pass `strict: false` to allow", handler: __MODULE__, entry: msg, row: index 71 | end 72 | format_line(msg, eos_token) 73 | 74 | {msg =%{role: :assistant = role, content: content}, index} -> 75 | unless options[:strict] == false or index <= (1 + system_message_offset) do 76 | if Enum.at(thread, index - 2)[:role] != role do 77 | raise ExLLama.ChatTemplate.Exception, message: "Conversation roles must alternate user/assistant/user/assistant/...", handler: __MODULE__, entry: msg, row: index 78 | end 79 | end 80 | format_line(msg, eos_token) 81 | 82 | {msg = %{role: :user = role, content: content}, index} -> 83 | unless options[:strict] == false or index <= (2 + system_message_offset) do 84 | if Enum.at(thread, index - 2)[:role] != role do 85 | raise ExLLama.ChatTemplate.Exception, message: "Conversation roles must alternate user/assistant/user/assistant/...", handler: __MODULE__, entry: msg, row: index 86 | end 87 | end 88 | format_line(msg, eos_token) 89 | 90 | {msg = %{role: role, content: content}, index} -> 91 | unless options[:strict] == false or options[:expanded_roles] do 92 | raise ExLLama.ChatTemplate.Exception, message: "Only the first user,assistant,system roles are supported. Use a different handler or pass `strict: false` to allow", handler: __MODULE__, entry: msg, row: index 93 | end 94 | format_line(msg, eos_token) 95 | end 96 | ) |> Enum.join("\n") 97 | if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do 98 | {:ok, lines <> "<|assistant|>\n"} 99 | else 100 | {:ok, lines} 101 | end 102 | end 103 | end 104 | end 105 | -------------------------------------------------------------------------------- /lib/ex_llama/context_params.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ContextParams do 2 | defstruct [ 3 | :seed, 4 | :n_ctx, 5 | :n_batch, 6 | :n_threads, 7 | :n_threads_batch, 8 | :rope_scaling_type, 9 | :rope_freq_base, 10 | :rope_freq_scale, 11 | :yarn_ext_factor, 12 | :yarn_attn_factor, 13 | :yarn_beta_fast, 14 | :yarn_beta_slow, 15 | :yarn_orig_ctx, 16 | :type_k, 17 | :type_v, 18 | :embedding, 19 | :offload_kqv, 20 | :pooling 21 | ] 22 | 23 | @type t :: %__MODULE__{ 24 | seed: non_neg_integer(), 25 | n_ctx: non_neg_integer(), 26 | n_batch: non_neg_integer(), 27 | n_threads: non_neg_integer(), 28 | n_threads_batch: non_neg_integer(), 29 | rope_scaling_type: integer(), 30 | rope_freq_base: float(), 31 | rope_freq_scale: float(), 32 | yarn_ext_factor: float(), 33 | yarn_attn_factor: float(), 34 | yarn_beta_fast: float(), 35 | yarn_beta_slow: float(), 36 | yarn_orig_ctx: non_neg_integer(), 37 | type_k: non_neg_integer(), 38 | type_v: non_neg_integer(), 39 | embedding: boolean(), 40 | offload_kqv: boolean(), 41 | pooling: boolean() 42 | } 43 | end 44 | -------------------------------------------------------------------------------- /lib/ex_llama/embedding_options.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.EmbeddingOptions do 2 | defstruct [ 3 | n_threads: 1, 4 | n_threads_batch: 1, 5 | ] 6 | end 7 | -------------------------------------------------------------------------------- /lib/ex_llama/model.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.Model do 2 | defstruct [ 3 | resource: nil, 4 | eos: nil, 5 | bos: nil, 6 | name: nil 7 | ] 8 | 9 | def __eos__(model) do 10 | {:ok, List.to_string(model.eos)} 11 | end 12 | 13 | def __bos__(model) do 14 | {:ok, List.to_string(model.bos)} 15 | end 16 | 17 | 18 | def __model_name__(model) do 19 | {:ok, model.name} 20 | end 21 | 22 | def load_from_file(path), do: ExLLama.Nif.__model_nif_load_from_file__(path, ExLLama.ModelOptions.new()) 23 | def load_from_file(path, %ExLLama.ModelOptions{} = opts), do: ExLLama.Nif.__model_nif_load_from_file__(path, opts) 24 | 25 | def detokenize(model, token), do: ExLLama.Nif.__model_nif_detokenize__(model, token) 26 | 27 | def token_to_byte_piece(model, token), do: ExLLama.Nif.__model_nif_token_to_byte_piece__(model, token) 28 | 29 | def token_to_piece(model, token), do: ExLLama.Nif.__model_nif_token_to_piece__(model, token) 30 | 31 | def decode_tokens(model, tokens), do: ExLLama.Nif.__model_nif_decode_tokens__(model, tokens) 32 | 33 | def create_session(model) do 34 | with {:ok, options} <- ExLLama.Session.default_options do 35 | create_session(model, options) 36 | end 37 | end 38 | def create_session(model, options), do: ExLLama.Nif.__model_nif_create_session__(model, options) 39 | 40 | def embeddings(model, inputs, options), do: ExLLama.Nif.__model_nif_embeddings__(model, inputs, options) 41 | 42 | def bos(model), do: ExLLama.Nif.__model_nif_bos__(model) 43 | 44 | def eos(model), do: ExLLama.Nif.__model_nif_eos__(model) 45 | 46 | def nl(model), do: ExLLama.Nif.__model_nif_nl__(model) 47 | 48 | def infill_prefix(model), do: ExLLama.Nif.__model_nif_infill_prefix__(model) 49 | 50 | def infill_middle(model), do: ExLLama.Nif.__model_nif_infill_middle__(model) 51 | 52 | def infill_suffix(model), do: ExLLama.Nif.__model_nif_infill_suffix__(model) 53 | 54 | def eot(model), do: ExLLama.Nif.__model_nif_eot__(model) 55 | 56 | def vocabulary_size(model), do: ExLLama.Nif.__model_nif_vocabulary_size__(model) 57 | 58 | def embed_len(model), do: ExLLama.Nif.__model_nif_embed_len__(model) 59 | 60 | def train_len(model), do: ExLLama.Nif.__model_nif_train_len__(model) 61 | end 62 | -------------------------------------------------------------------------------- /lib/ex_llama/model_options.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.ModelOptions do 2 | defstruct [ 3 | :n_gpu_layers, 4 | :split_mode, 5 | :main_gpu, 6 | :vocab_only, 7 | :use_mmap, 8 | :use_mlock 9 | ] 10 | 11 | @type t :: %__MODULE__{ 12 | n_gpu_layers: non_neg_integer(), 13 | split_mode: String.t, # :none | :layer | :row, 14 | main_gpu: non_neg_integer(), 15 | vocab_only: boolean(), 16 | use_mmap: boolean(), 17 | use_mlock: boolean() 18 | } 19 | 20 | def new() do 21 | %__MODULE__{ 22 | n_gpu_layers: 0, 23 | split_mode: "none", 24 | main_gpu: 0, 25 | vocab_only: false, 26 | use_mmap: false, 27 | use_mlock: false 28 | } 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/ex_llama/nif.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.Nif do 2 | use Rustler, 3 | otp_app: :ex_llama, 4 | crate: :erlang_llama_cpp_nif 5 | 6 | defstruct [ 7 | resource: nil 8 | ] 9 | 10 | def __model_nif_load_from_file__(_,_), do: :erlang.nif_error(:nif_not_loaded) 11 | def __model_nif_detokenize__(_, _), do: :erlang.nif_error(:nif_not_loaded) 12 | def __model_nif_token_to_byte_piece__(_, _), do: :erlang.nif_error(:nif_not_loaded) 13 | def __model_nif_token_to_piece__(_, _), do: :erlang.nif_error(:nif_not_loaded) 14 | def __model_nif_decode_tokens__(_, _), do: :erlang.nif_error(:nif_not_loaded) 15 | def __model_nif_create_session__(_, _), do: :erlang.nif_error(:nif_not_loaded) 16 | def __model_nif_embeddings__(_, _, _), do: :erlang.nif_error(:nif_not_loaded) 17 | def __model_nif_bos__(_), do: :erlang.nif_error(:nif_not_loaded) 18 | def __model_nif_eos__(_), do: :erlang.nif_error(:nif_not_loaded) 19 | def __model_nif_nl__(_), do: :erlang.nif_error(:nif_not_loaded) 20 | def __model_nif_infill_prefix__(_), do: :erlang.nif_error(:nif_not_loaded) 21 | def __model_nif_infill_middle__(_), do: :erlang.nif_error(:nif_not_loaded) 22 | def __model_nif_infill_suffix__(_), do: :erlang.nif_error(:nif_not_loaded) 23 | def __model_nif_eot__(_), do: :erlang.nif_error(:nif_not_loaded) 24 | def __model_nif_vocabulary_size__(_), do: :erlang.nif_error(:nif_not_loaded) 25 | def __model_nif_embed_len__(_), do: :erlang.nif_error(:nif_not_loaded) 26 | def __model_nif_train_len__(_), do: :erlang.nif_error(:nif_not_loaded) 27 | 28 | def __context_nif_load_model__(_,_), do: :erlang.nif_error(:nif_not_loaded) 29 | def __context_nif_default_session_options__(), do: :erlang.nif_error(:nif_not_loaded) 30 | def __context_nif_create_session__(_, _), do: :erlang.nif_error(:nif_not_loaded) 31 | def __context_nif_advance_context__(_, _), do: :erlang.nif_error(:nif_not_loaded) 32 | def __context_nif_complete__(_,_), do: :erlang.nif_error(:nif_not_loaded) 33 | 34 | 35 | def __session_nif_default_session_options__(), do: :erlang.nif_error(:nif_not_loaded) 36 | def __session_nif_advance_context_with_tokens__(_, _), do: :erlang.nif_error(:nif_not_loaded) 37 | def __session_nif_advance_context__(_, _), do: :erlang.nif_error(:nif_not_loaded) 38 | def __session_nif_start_completing_with__(_pid, _session, _max_tokens), do: :erlang.nif_error(:nif_not_loaded) 39 | def __session_nif_completion__(_, _, _), do: :erlang.nif_error(:nif_not_loaded) 40 | def __session_nif_model__(_), do: :erlang.nif_error(:nif_not_loaded) 41 | def __session_nif_params__(_), do: :erlang.nif_error(:nif_not_loaded) 42 | def __session_nif_context_size__(_), do: :erlang.nif_error(:nif_not_loaded) 43 | def __session_nif_context__(_), do: :erlang.nif_error(:nif_not_loaded) 44 | def __session_nif_truncate_context__(_, _), do: :erlang.nif_error(:nif_not_loaded) 45 | def __session_nif_set_context_to_tokens__(_, _), do: :erlang.nif_error(:nif_not_loaded) 46 | def __session_nif_set_context__(_, _), do: :erlang.nif_error(:nif_not_loaded) 47 | def __session_deep_copy__(_), do: :erlang.nif_error(:nif_not_loaded) 48 | 49 | 50 | 51 | 52 | 53 | 54 | end 55 | -------------------------------------------------------------------------------- /lib/ex_llama/session.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.Session do 2 | defstruct [ 3 | seed: nil, 4 | model_name: nil, 5 | resource: nil 6 | ] 7 | 8 | def default_options(), do: ExLLama.Nif.__session_nif_default_session_options__() 9 | def advance_context_with_tokens(%__MODULE__{resource: _} = session, context), do: ExLLama.Nif.__session_nif_advance_context_with_tokens__(session.resource, context) 10 | def advance_context(%__MODULE__{resource: _} = session, context), do: ExLLama.Nif.__session_nif_advance_context__(session.resource, context) 11 | def start_completing_with(%__MODULE__{resource: _} = session, options) do 12 | # @TODO this is a little hacky, threading should be done in nif but passing env into the thread is unsupported. 13 | max_tokens = options[:max_tokens] || 512 14 | pid = with nil <- options[:pid] do 15 | self() 16 | end 17 | spawn fn -> 18 | o = ExLLama.Nif.__session_nif_start_completing_with__(pid, session.resource, max_tokens) 19 | send(pid, o) 20 | end 21 | :ok 22 | end 23 | def completion(%__MODULE__{resource: _} = session, max_tokens, stop), do: ExLLama.Nif.__session_nif_completion__(session.resource, max_tokens, stop) 24 | def model(%__MODULE__{resource: _} = session), do: ExLLama.Nif.__session_nif_model__(session.resource) 25 | def params(%__MODULE__{resource: _} = session), do: ExLLama.Nif.__session_nif_params__(session.resource) 26 | def context_size(%__MODULE__{resource: _} = session), do: ExLLama.Nif.__session_nif_context_size__(session.resource) 27 | def context(%__MODULE__{resource: _} = session), do: ExLLama.Nif.__session_nif_context__(session.resource) 28 | def truncate_context(%__MODULE__{resource: _} = session, n_tokens), do: ExLLama.Nif.__session_nif_truncate_context__(session.resource, n_tokens) 29 | def set_context_to_tokens(%__MODULE__{resource: _} = session, tokens), do: ExLLama.Nif.__session_nif_set_context_to_tokens__(session.resource, tokens) 30 | def set_context(%__MODULE__{resource: _} = session, context), do: ExLLama.Nif.__session_nif_set_context__(session.resource, context) 31 | def deep_copy(%__MODULE__{resource: _} = session) do 32 | with {:ok, copy} <- ExLLama.Nif.__session_deep_copy__(session.resource) do 33 | {:ok, put_in(copy, [Access.key(:model_name)], session.model_name)} 34 | end 35 | end 36 | 37 | end 38 | -------------------------------------------------------------------------------- /lib/ex_llama/session_options.ex: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.SessionOptions do 2 | defstruct [ 3 | :seed, 4 | :n_ctx, 5 | :n_batch, 6 | :n_threads, 7 | :n_threads_batch, 8 | :rope_scaling_type, 9 | :rope_freq_base, 10 | :rope_freq_scale, 11 | :yarn_ext_factor, 12 | :yarn_attn_factor, 13 | :yarn_beta_fast, 14 | :yarn_beta_slow, 15 | :yarn_orig_ctx, 16 | :type_k, 17 | :type_v, 18 | :embedding, 19 | :offload_kqv, 20 | :pooling, 21 | ] 22 | 23 | def new() do 24 | {:ok, session_options} = ExLLama.Session.default_options() 25 | session_options 26 | end 27 | def new(nil), do: new() 28 | def new(%__MODULE__{} = x), do: x 29 | def new(params) when is_list(params), do: new(Map.new(params)) 30 | def new(params) when is_map(params) do 31 | {:ok, session_options} = ExLLama.Session.default_options() 32 | so = Map.from_struct(session_options) 33 | allowed_keys = Map.keys(so) 34 | po = Map.take(params, allowed_keys) 35 | unless po == %{} do 36 | ExLLama.SessionOptions.__struct__(Map.merge(so, po)) 37 | else 38 | session_options 39 | end 40 | end 41 | 42 | end 43 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule ExLLama.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :ex_llama, 7 | name: "LLama CPP Nif Wrapper", 8 | description: description(), 9 | package: package(), 10 | version: "0.1.0", 11 | elixir: "~> 1.16", 12 | start_permanent: Mix.env() == :prod, 13 | rustler_crates: rustler_crates(), 14 | docs: [ 15 | main: "ExLLama", 16 | extras: [ 17 | "README.md", 18 | "LICENSE" 19 | ] 20 | ], 21 | dialyzer: [ 22 | plt_file: {:no_warn, "priv/plts/project.plt"} 23 | ], 24 | deps: deps() 25 | ] 26 | end 27 | 28 | 29 | defp description() do 30 | "NIF Wrapper around the rust LLamaCPP client allowing elixir code to load/infer against gguf format models." 31 | end 32 | 33 | defp rustler_crates do 34 | [ 35 | erlang_llama_cpp_nif: [ 36 | path: "native/erlang_llama_cpp_nif", 37 | mode: rustc_mode(Mix.env()) 38 | ] 39 | ] 40 | end 41 | 42 | defp rustc_mode(:prod), do: :release 43 | defp rustc_mode(_), do: :debug 44 | 45 | defp package() do 46 | [ 47 | licenses: ["MIT"], 48 | links: %{ 49 | project: "https://github.com/noizu-labs-ml/ex_llama", 50 | developer_github: "https://github.com/noizu" 51 | }, 52 | files: ~w(lib native priv mix.exs README.md CHANGELOG.md LICENSE*), 53 | exclude_patterns: ["priv/models/local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"] 54 | ] 55 | end 56 | 57 | # Run "mix help compile.app" to learn about applications. 58 | def application do 59 | [ 60 | extra_applications: [:logger] 61 | ] 62 | end 63 | 64 | # Run "mix help deps" to learn about dependencies. 65 | defp deps do 66 | [ 67 | {:rustler, "~> 0.32.1", runtime: false}, 68 | {:ex_doc, "~> 0.28.3", only: [:dev, :test], optional: true, runtime: false}, # Documentation Provider 69 | {:dialyxir, "~> 1.4", only: [:dev, :test], runtime: false}, 70 | {:genai_core, "~> 0.2"}, 71 | {:finch, "~> 0.15", optional: true}, 72 | {:elixir_uuid, "~> 1.2", optional: true}, 73 | {:shortuuid, "~> 3.0", optional: true}, 74 | 75 | 76 | # {:dep_from_hexpm, "~> 0.3.0"}, 77 | # {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"} 78 | ] 79 | end 80 | end 81 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, 3 | "credo": {:hex, :credo, "1.7.12", "9e3c20463de4b5f3f23721527fcaf16722ec815e70ff6c60b86412c695d426c1", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8493d45c656c5427d9c729235b99d498bd133421f3e0a683e5c1b561471291e5"}, 4 | "dialyxir": {:hex, :dialyxir, "1.4.3", "edd0124f358f0b9e95bfe53a9fcf806d615d8f838e2202a9f430d59566b6b53b", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "bf2cfb75cd5c5006bec30141b131663299c661a864ec7fbbc72dfa557487a986"}, 5 | "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, 6 | "elixir_uuid": {:hex, :elixir_uuid, "1.2.1", "dce506597acb7e6b0daeaff52ff6a9043f5919a4c3315abb4143f0b00378c097", [:mix], [], "hexpm", "f7eba2ea6c3555cea09706492716b0d87397b88946e6380898c2889d68585752"}, 7 | "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, 8 | "ex_doc": {:hex, :ex_doc, "0.28.6", "2bbd7a143d3014fc26de9056793e97600ae8978af2ced82c2575f130b7c0d7d7", [:mix], [{:earmark_parser, "~> 1.4.19", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "bca1441614654710ba37a0e173079273d619f9160cbcc8cd04e6bd59f1ad0e29"}, 9 | "file_system": {:hex, :file_system, "1.1.0", "08d232062284546c6c34426997dd7ef6ec9f8bbd090eb91780283c9016840e8f", [:mix], [], "hexpm", "bfcf81244f416871f2a2e15c1b515287faa5db9c6bcf290222206d120b3d43f6"}, 10 | "finch": {:hex, :finch, "0.19.0", "c644641491ea854fc5c1bbaef36bfc764e3f08e7185e1f084e35e0672241b76d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "fc5324ce209125d1e2fa0fcd2634601c52a787aff1cd33ee833664a5af4ea2b6"}, 11 | "genai_core": {:hex, :genai_core, "0.2.0", "76324ac29a30b64543dd4ed15d7513902407e0dea0bf357c145da52712d25d9a", [:mix], [{:elixir_uuid, "~> 1.2", [hex: :elixir_uuid, repo: "hexpm", optional: true]}, {:finch, "~> 0.15", [hex: :finch, repo: "hexpm", optional: true]}, {:floki, ">= 0.30.0", [hex: :floki, repo: "hexpm", optional: true]}, {:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: true]}, {:noizu_labs_core, "~> 0.1", [hex: :noizu_labs_core, repo: "hexpm", optional: false]}, {:shortuuid, "~> 3.0", [hex: :shortuuid, repo: "hexpm", optional: true]}, {:sweet_xml, "~> 0.7", [hex: :sweet_xml, repo: "hexpm", optional: true]}, {:yaml_elixir, "~> 2.9.0", [hex: :yaml_elixir, repo: "hexpm", optional: true]}, {:ymlr, "~> 4.0", [hex: :ymlr, repo: "hexpm", optional: true]}], "hexpm", "14317445578e2654e84fd647359f4e5366e7ba8d94b89bab50774993f20117b0"}, 12 | "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, 13 | "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"}, 14 | "makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"}, 15 | "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, 16 | "makeup_erlang": {:hex, :makeup_erlang, "0.1.5", "e0ff5a7c708dda34311f7522a8758e23bfcd7d8d8068dc312b5eb41c6fd76eba", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "94d2e986428585a21516d7d7149781480013c56e30c6a233534bedf38867a59a"}, 17 | "mime": {:hex, :mime, "2.0.6", "8f18486773d9b15f95f4f4f1e39b710045fa1de891fada4516559967276e4dc2", [:mix], [], "hexpm", "c9945363a6b26d747389aac3643f8e0e09d30499a138ad64fe8fd1d13d9b153e"}, 18 | "mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"}, 19 | "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, 20 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, 21 | "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, 22 | "noizu_labs_core": {:hex, :noizu_labs_core, "0.1.5", "881988ee5c0492a9f5d00ecf228c8df1c0621e614ccacfb22431c1454da39016", [:mix], [{:credo, "~> 1.0", [hex: :credo, repo: "hexpm", optional: false]}], "hexpm", "887dcadfbc3fd176ba8d71fae9e29c3f825f2ee7354f54cb70af3a0e3de24567"}, 23 | "rustler": {:hex, :rustler, "0.32.1", "f4cf5a39f9e85d182c0a3f75fa15b5d0add6542ab0bf9ceac6b4023109ebd3fc", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "b96be75526784f86f6587f051bc8d6f4eaff23d6e0f88dbcfe4d5871f52946f7"}, 24 | "shortuuid": {:hex, :shortuuid, "3.0.0", "028684d9eeed0ad4b800e8481afd854e1a61c526f35952455b2ee4248601e7b8", [:mix], [], "hexpm", "dfd8f80f514cbb91622cb83f4ac0d6e2f06d98cc6d4aeba94444a212289d0d39"}, 25 | "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, 26 | "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"}, 27 | } 28 | -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.21.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler" 16 | version = "1.0.2" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 19 | 20 | [[package]] 21 | name = "aho-corasick" 22 | version = "1.1.3" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 | dependencies = [ 26 | "memchr", 27 | ] 28 | 29 | [[package]] 30 | name = "autocfg" 31 | version = "1.2.0" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" 34 | 35 | [[package]] 36 | name = "backtrace" 37 | version = "0.3.71" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" 40 | dependencies = [ 41 | "addr2line", 42 | "cc", 43 | "cfg-if", 44 | "libc", 45 | "miniz_oxide", 46 | "object", 47 | "rustc-demangle", 48 | ] 49 | 50 | [[package]] 51 | name = "bindgen" 52 | version = "0.69.4" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" 55 | dependencies = [ 56 | "bitflags", 57 | "cexpr", 58 | "clang-sys", 59 | "itertools", 60 | "lazy_static", 61 | "lazycell", 62 | "log", 63 | "prettyplease", 64 | "proc-macro2", 65 | "quote", 66 | "regex", 67 | "rustc-hash", 68 | "shlex", 69 | "syn 2.0.58", 70 | "which", 71 | ] 72 | 73 | [[package]] 74 | name = "bitflags" 75 | version = "2.5.0" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" 78 | 79 | [[package]] 80 | name = "cc" 81 | version = "1.0.92" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41" 84 | dependencies = [ 85 | "jobserver", 86 | "libc", 87 | ] 88 | 89 | [[package]] 90 | name = "cexpr" 91 | version = "0.6.0" 92 | source = "registry+https://github.com/rust-lang/crates.io-index" 93 | checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" 94 | dependencies = [ 95 | "nom", 96 | ] 97 | 98 | [[package]] 99 | name = "cfg-if" 100 | version = "1.0.0" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 103 | 104 | [[package]] 105 | name = "clang-sys" 106 | version = "1.7.0" 107 | source = "registry+https://github.com/rust-lang/crates.io-index" 108 | checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" 109 | dependencies = [ 110 | "glob", 111 | "libc", 112 | "libloading", 113 | ] 114 | 115 | [[package]] 116 | name = "convert_case" 117 | version = "0.4.0" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" 120 | 121 | [[package]] 122 | name = "derive_more" 123 | version = "0.99.17" 124 | source = "registry+https://github.com/rust-lang/crates.io-index" 125 | checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" 126 | dependencies = [ 127 | "convert_case", 128 | "proc-macro2", 129 | "quote", 130 | "rustc_version", 131 | "syn 1.0.109", 132 | ] 133 | 134 | [[package]] 135 | name = "either" 136 | version = "1.10.0" 137 | source = "registry+https://github.com/rust-lang/crates.io-index" 138 | checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" 139 | 140 | [[package]] 141 | name = "erlang_llama_cpp_nif" 142 | version = "0.0.1" 143 | dependencies = [ 144 | "llama_cpp", 145 | "regex", 146 | "rustler", 147 | ] 148 | 149 | [[package]] 150 | name = "errno" 151 | version = "0.3.8" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" 154 | dependencies = [ 155 | "libc", 156 | "windows-sys", 157 | ] 158 | 159 | [[package]] 160 | name = "futures" 161 | version = "0.3.30" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" 164 | dependencies = [ 165 | "futures-channel", 166 | "futures-core", 167 | "futures-executor", 168 | "futures-io", 169 | "futures-sink", 170 | "futures-task", 171 | "futures-util", 172 | ] 173 | 174 | [[package]] 175 | name = "futures-channel" 176 | version = "0.3.30" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" 179 | dependencies = [ 180 | "futures-core", 181 | "futures-sink", 182 | ] 183 | 184 | [[package]] 185 | name = "futures-core" 186 | version = "0.3.30" 187 | source = "registry+https://github.com/rust-lang/crates.io-index" 188 | checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" 189 | 190 | [[package]] 191 | name = "futures-executor" 192 | version = "0.3.30" 193 | source = "registry+https://github.com/rust-lang/crates.io-index" 194 | checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" 195 | dependencies = [ 196 | "futures-core", 197 | "futures-task", 198 | "futures-util", 199 | ] 200 | 201 | [[package]] 202 | name = "futures-io" 203 | version = "0.3.30" 204 | source = "registry+https://github.com/rust-lang/crates.io-index" 205 | checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" 206 | 207 | [[package]] 208 | name = "futures-macro" 209 | version = "0.3.30" 210 | source = "registry+https://github.com/rust-lang/crates.io-index" 211 | checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" 212 | dependencies = [ 213 | "proc-macro2", 214 | "quote", 215 | "syn 2.0.58", 216 | ] 217 | 218 | [[package]] 219 | name = "futures-sink" 220 | version = "0.3.30" 221 | source = "registry+https://github.com/rust-lang/crates.io-index" 222 | checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" 223 | 224 | [[package]] 225 | name = "futures-task" 226 | version = "0.3.30" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" 229 | 230 | [[package]] 231 | name = "futures-util" 232 | version = "0.3.30" 233 | source = "registry+https://github.com/rust-lang/crates.io-index" 234 | checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" 235 | dependencies = [ 236 | "futures-channel", 237 | "futures-core", 238 | "futures-io", 239 | "futures-macro", 240 | "futures-sink", 241 | "futures-task", 242 | "memchr", 243 | "pin-project-lite", 244 | "pin-utils", 245 | "slab", 246 | ] 247 | 248 | [[package]] 249 | name = "gimli" 250 | version = "0.28.1" 251 | source = "registry+https://github.com/rust-lang/crates.io-index" 252 | checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" 253 | 254 | [[package]] 255 | name = "glob" 256 | version = "0.3.1" 257 | source = "registry+https://github.com/rust-lang/crates.io-index" 258 | checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" 259 | 260 | [[package]] 261 | name = "heck" 262 | version = "0.5.0" 263 | source = "registry+https://github.com/rust-lang/crates.io-index" 264 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 265 | 266 | [[package]] 267 | name = "hermit-abi" 268 | version = "0.3.9" 269 | source = "registry+https://github.com/rust-lang/crates.io-index" 270 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 271 | 272 | [[package]] 273 | name = "home" 274 | version = "0.5.9" 275 | source = "registry+https://github.com/rust-lang/crates.io-index" 276 | checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" 277 | dependencies = [ 278 | "windows-sys", 279 | ] 280 | 281 | [[package]] 282 | name = "itertools" 283 | version = "0.12.1" 284 | source = "registry+https://github.com/rust-lang/crates.io-index" 285 | checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" 286 | dependencies = [ 287 | "either", 288 | ] 289 | 290 | [[package]] 291 | name = "jobserver" 292 | version = "0.1.29" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "f08474e32172238f2827bd160c67871cdb2801430f65c3979184dc362e3ca118" 295 | dependencies = [ 296 | "libc", 297 | ] 298 | 299 | [[package]] 300 | name = "lazy_static" 301 | version = "1.4.0" 302 | source = "registry+https://github.com/rust-lang/crates.io-index" 303 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 304 | 305 | [[package]] 306 | name = "lazycell" 307 | version = "1.3.0" 308 | source = "registry+https://github.com/rust-lang/crates.io-index" 309 | checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" 310 | 311 | [[package]] 312 | name = "libc" 313 | version = "0.2.153" 314 | source = "registry+https://github.com/rust-lang/crates.io-index" 315 | checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" 316 | 317 | [[package]] 318 | name = "libloading" 319 | version = "0.8.3" 320 | source = "registry+https://github.com/rust-lang/crates.io-index" 321 | checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" 322 | dependencies = [ 323 | "cfg-if", 324 | "windows-targets", 325 | ] 326 | 327 | [[package]] 328 | name = "link-cplusplus" 329 | version = "1.0.9" 330 | source = "registry+https://github.com/rust-lang/crates.io-index" 331 | checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9" 332 | dependencies = [ 333 | "cc", 334 | ] 335 | 336 | [[package]] 337 | name = "linux-raw-sys" 338 | version = "0.4.13" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" 341 | 342 | [[package]] 343 | name = "llama_cpp" 344 | version = "0.3.1" 345 | source = "registry+https://github.com/rust-lang/crates.io-index" 346 | checksum = "746afa27b852045c93cffefb459f883b3c0a62552101c929241dccc6563d8fe9" 347 | dependencies = [ 348 | "derive_more", 349 | "futures", 350 | "llama_cpp_sys", 351 | "num_cpus", 352 | "thiserror", 353 | "tokio", 354 | "tracing", 355 | ] 356 | 357 | [[package]] 358 | name = "llama_cpp_sys" 359 | version = "0.3.1" 360 | source = "registry+https://github.com/rust-lang/crates.io-index" 361 | checksum = "b53030035eb5617fde2491c1607ff2b6107bc559e25e444163075e4281dfe43e" 362 | dependencies = [ 363 | "bindgen", 364 | "cc", 365 | "link-cplusplus", 366 | "once_cell", 367 | ] 368 | 369 | [[package]] 370 | name = "log" 371 | version = "0.4.21" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" 374 | 375 | [[package]] 376 | name = "memchr" 377 | version = "2.7.2" 378 | source = "registry+https://github.com/rust-lang/crates.io-index" 379 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" 380 | 381 | [[package]] 382 | name = "minimal-lexical" 383 | version = "0.2.1" 384 | source = "registry+https://github.com/rust-lang/crates.io-index" 385 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 386 | 387 | [[package]] 388 | name = "miniz_oxide" 389 | version = "0.7.2" 390 | source = "registry+https://github.com/rust-lang/crates.io-index" 391 | checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" 392 | dependencies = [ 393 | "adler", 394 | ] 395 | 396 | [[package]] 397 | name = "nom" 398 | version = "7.1.3" 399 | source = "registry+https://github.com/rust-lang/crates.io-index" 400 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 401 | dependencies = [ 402 | "memchr", 403 | "minimal-lexical", 404 | ] 405 | 406 | [[package]] 407 | name = "num_cpus" 408 | version = "1.16.0" 409 | source = "registry+https://github.com/rust-lang/crates.io-index" 410 | checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" 411 | dependencies = [ 412 | "hermit-abi", 413 | "libc", 414 | ] 415 | 416 | [[package]] 417 | name = "object" 418 | version = "0.32.2" 419 | source = "registry+https://github.com/rust-lang/crates.io-index" 420 | checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" 421 | dependencies = [ 422 | "memchr", 423 | ] 424 | 425 | [[package]] 426 | name = "once_cell" 427 | version = "1.19.0" 428 | source = "registry+https://github.com/rust-lang/crates.io-index" 429 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 430 | 431 | [[package]] 432 | name = "pin-project-lite" 433 | version = "0.2.14" 434 | source = "registry+https://github.com/rust-lang/crates.io-index" 435 | checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" 436 | 437 | [[package]] 438 | name = "pin-utils" 439 | version = "0.1.0" 440 | source = "registry+https://github.com/rust-lang/crates.io-index" 441 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 442 | 443 | [[package]] 444 | name = "prettyplease" 445 | version = "0.2.17" 446 | source = "registry+https://github.com/rust-lang/crates.io-index" 447 | checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" 448 | dependencies = [ 449 | "proc-macro2", 450 | "syn 2.0.58", 451 | ] 452 | 453 | [[package]] 454 | name = "proc-macro2" 455 | version = "1.0.79" 456 | source = "registry+https://github.com/rust-lang/crates.io-index" 457 | checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" 458 | dependencies = [ 459 | "unicode-ident", 460 | ] 461 | 462 | [[package]] 463 | name = "quote" 464 | version = "1.0.36" 465 | source = "registry+https://github.com/rust-lang/crates.io-index" 466 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 467 | dependencies = [ 468 | "proc-macro2", 469 | ] 470 | 471 | [[package]] 472 | name = "regex" 473 | version = "1.10.4" 474 | source = "registry+https://github.com/rust-lang/crates.io-index" 475 | checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" 476 | dependencies = [ 477 | "aho-corasick", 478 | "memchr", 479 | "regex-automata", 480 | "regex-syntax", 481 | ] 482 | 483 | [[package]] 484 | name = "regex-automata" 485 | version = "0.4.6" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" 488 | dependencies = [ 489 | "aho-corasick", 490 | "memchr", 491 | "regex-syntax", 492 | ] 493 | 494 | [[package]] 495 | name = "regex-syntax" 496 | version = "0.8.3" 497 | source = "registry+https://github.com/rust-lang/crates.io-index" 498 | checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" 499 | 500 | [[package]] 501 | name = "rustc-demangle" 502 | version = "0.1.23" 503 | source = "registry+https://github.com/rust-lang/crates.io-index" 504 | checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" 505 | 506 | [[package]] 507 | name = "rustc-hash" 508 | version = "1.1.0" 509 | source = "registry+https://github.com/rust-lang/crates.io-index" 510 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 511 | 512 | [[package]] 513 | name = "rustc_version" 514 | version = "0.4.0" 515 | source = "registry+https://github.com/rust-lang/crates.io-index" 516 | checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" 517 | dependencies = [ 518 | "semver", 519 | ] 520 | 521 | [[package]] 522 | name = "rustix" 523 | version = "0.38.32" 524 | source = "registry+https://github.com/rust-lang/crates.io-index" 525 | checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" 526 | dependencies = [ 527 | "bitflags", 528 | "errno", 529 | "libc", 530 | "linux-raw-sys", 531 | "windows-sys", 532 | ] 533 | 534 | [[package]] 535 | name = "rustler" 536 | version = "0.32.1" 537 | source = "registry+https://github.com/rust-lang/crates.io-index" 538 | checksum = "83c330a01eaed3ebce4708e2f1052e0676a9155c1583b8afadc69acaf6105e33" 539 | dependencies = [ 540 | "lazy_static", 541 | "rustler_codegen", 542 | "rustler_sys", 543 | ] 544 | 545 | [[package]] 546 | name = "rustler_codegen" 547 | version = "0.32.1" 548 | source = "registry+https://github.com/rust-lang/crates.io-index" 549 | checksum = "28516544e4ab5fd4c6802343d9676540fbbac1489d36c0898ad8c19ac11f5be2" 550 | dependencies = [ 551 | "heck", 552 | "proc-macro2", 553 | "quote", 554 | "syn 2.0.58", 555 | ] 556 | 557 | [[package]] 558 | name = "rustler_sys" 559 | version = "2.4.0" 560 | source = "registry+https://github.com/rust-lang/crates.io-index" 561 | checksum = "39e21c0f1bc2458e29df0249e0b6a047af44303c73856c179098b6fc3700fd38" 562 | dependencies = [ 563 | "regex", 564 | "unreachable", 565 | ] 566 | 567 | [[package]] 568 | name = "semver" 569 | version = "1.0.22" 570 | source = "registry+https://github.com/rust-lang/crates.io-index" 571 | checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" 572 | 573 | [[package]] 574 | name = "shlex" 575 | version = "1.3.0" 576 | source = "registry+https://github.com/rust-lang/crates.io-index" 577 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 578 | 579 | [[package]] 580 | name = "slab" 581 | version = "0.4.9" 582 | source = "registry+https://github.com/rust-lang/crates.io-index" 583 | checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" 584 | dependencies = [ 585 | "autocfg", 586 | ] 587 | 588 | [[package]] 589 | name = "syn" 590 | version = "1.0.109" 591 | source = "registry+https://github.com/rust-lang/crates.io-index" 592 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 593 | dependencies = [ 594 | "proc-macro2", 595 | "quote", 596 | "unicode-ident", 597 | ] 598 | 599 | [[package]] 600 | name = "syn" 601 | version = "2.0.58" 602 | source = "registry+https://github.com/rust-lang/crates.io-index" 603 | checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" 604 | dependencies = [ 605 | "proc-macro2", 606 | "quote", 607 | "unicode-ident", 608 | ] 609 | 610 | [[package]] 611 | name = "thiserror" 612 | version = "1.0.58" 613 | source = "registry+https://github.com/rust-lang/crates.io-index" 614 | checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" 615 | dependencies = [ 616 | "thiserror-impl", 617 | ] 618 | 619 | [[package]] 620 | name = "thiserror-impl" 621 | version = "1.0.58" 622 | source = "registry+https://github.com/rust-lang/crates.io-index" 623 | checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" 624 | dependencies = [ 625 | "proc-macro2", 626 | "quote", 627 | "syn 2.0.58", 628 | ] 629 | 630 | [[package]] 631 | name = "tokio" 632 | version = "1.37.0" 633 | source = "registry+https://github.com/rust-lang/crates.io-index" 634 | checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" 635 | dependencies = [ 636 | "backtrace", 637 | "pin-project-lite", 638 | ] 639 | 640 | [[package]] 641 | name = "tracing" 642 | version = "0.1.40" 643 | source = "registry+https://github.com/rust-lang/crates.io-index" 644 | checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" 645 | dependencies = [ 646 | "pin-project-lite", 647 | "tracing-attributes", 648 | "tracing-core", 649 | ] 650 | 651 | [[package]] 652 | name = "tracing-attributes" 653 | version = "0.1.27" 654 | source = "registry+https://github.com/rust-lang/crates.io-index" 655 | checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" 656 | dependencies = [ 657 | "proc-macro2", 658 | "quote", 659 | "syn 2.0.58", 660 | ] 661 | 662 | [[package]] 663 | name = "tracing-core" 664 | version = "0.1.32" 665 | source = "registry+https://github.com/rust-lang/crates.io-index" 666 | checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" 667 | dependencies = [ 668 | "once_cell", 669 | ] 670 | 671 | [[package]] 672 | name = "unicode-ident" 673 | version = "1.0.12" 674 | source = "registry+https://github.com/rust-lang/crates.io-index" 675 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 676 | 677 | [[package]] 678 | name = "unreachable" 679 | version = "1.0.0" 680 | source = "registry+https://github.com/rust-lang/crates.io-index" 681 | checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" 682 | dependencies = [ 683 | "void", 684 | ] 685 | 686 | [[package]] 687 | name = "void" 688 | version = "1.0.2" 689 | source = "registry+https://github.com/rust-lang/crates.io-index" 690 | checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" 691 | 692 | [[package]] 693 | name = "which" 694 | version = "4.4.2" 695 | source = "registry+https://github.com/rust-lang/crates.io-index" 696 | checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" 697 | dependencies = [ 698 | "either", 699 | "home", 700 | "once_cell", 701 | "rustix", 702 | ] 703 | 704 | [[package]] 705 | name = "windows-sys" 706 | version = "0.52.0" 707 | source = "registry+https://github.com/rust-lang/crates.io-index" 708 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 709 | dependencies = [ 710 | "windows-targets", 711 | ] 712 | 713 | [[package]] 714 | name = "windows-targets" 715 | version = "0.52.4" 716 | source = "registry+https://github.com/rust-lang/crates.io-index" 717 | checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" 718 | dependencies = [ 719 | "windows_aarch64_gnullvm", 720 | "windows_aarch64_msvc", 721 | "windows_i686_gnu", 722 | "windows_i686_msvc", 723 | "windows_x86_64_gnu", 724 | "windows_x86_64_gnullvm", 725 | "windows_x86_64_msvc", 726 | ] 727 | 728 | [[package]] 729 | name = "windows_aarch64_gnullvm" 730 | version = "0.52.4" 731 | source = "registry+https://github.com/rust-lang/crates.io-index" 732 | checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" 733 | 734 | [[package]] 735 | name = "windows_aarch64_msvc" 736 | version = "0.52.4" 737 | source = "registry+https://github.com/rust-lang/crates.io-index" 738 | checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" 739 | 740 | [[package]] 741 | name = "windows_i686_gnu" 742 | version = "0.52.4" 743 | source = "registry+https://github.com/rust-lang/crates.io-index" 744 | checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" 745 | 746 | [[package]] 747 | name = "windows_i686_msvc" 748 | version = "0.52.4" 749 | source = "registry+https://github.com/rust-lang/crates.io-index" 750 | checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" 751 | 752 | [[package]] 753 | name = "windows_x86_64_gnu" 754 | version = "0.52.4" 755 | source = "registry+https://github.com/rust-lang/crates.io-index" 756 | checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" 757 | 758 | [[package]] 759 | name = "windows_x86_64_gnullvm" 760 | version = "0.52.4" 761 | source = "registry+https://github.com/rust-lang/crates.io-index" 762 | checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" 763 | 764 | [[package]] 765 | name = "windows_x86_64_msvc" 766 | version = "0.52.4" 767 | source = "registry+https://github.com/rust-lang/crates.io-index" 768 | checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" 769 | -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "erlang_llama_cpp_nif" 3 | version = "0.0.1" 4 | authors = [] 5 | edition = "2021" 6 | 7 | [lib] 8 | name = "erlang_llama_cpp_nif" 9 | path = "src/lib.rs" 10 | crate-type = ["cdylib"] 11 | 12 | [dependencies] 13 | rustler = "0.32.1" 14 | llama_cpp = {version = "0.3.1"} 15 | regex = "1" 16 | -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod nifs; 2 | mod structs; 3 | mod refs; 4 | use rustler::{Env, Term}; 5 | use crate::refs::model_ref::ExLLamaModelRef; 6 | use crate::refs::session_ref::ExLLamaSessionRef; 7 | use crate::structs::session::ExLLamaSession; 8 | 9 | fn on_load(env: Env, _info: Term) -> bool { 10 | rustler::resource!(ExLLamaModelRef, env); 11 | rustler::resource!(ExLLamaSessionRef, env); 12 | rustler::resource!(ExLLamaSession, env); 13 | 14 | true 15 | } 16 | 17 | rustler::init!( 18 | "Elixir.ExLLama.Nif", 19 | [ 20 | nifs::ex_llama_model::__model_nif_load_from_file__, 21 | nifs::ex_llama_model::__model_nif_detokenize__, 22 | nifs::ex_llama_model::__model_nif_token_to_byte_piece__, 23 | nifs::ex_llama_model::__model_nif_token_to_piece__, 24 | nifs::ex_llama_model::__model_nif_decode_tokens__, 25 | nifs::ex_llama_model::__model_nif_create_session__, 26 | nifs::ex_llama_model::__model_nif_embeddings__, 27 | nifs::ex_llama_model::__model_nif_bos__, 28 | nifs::ex_llama_model::__model_nif_eos__, 29 | nifs::ex_llama_model::__model_nif_nl__, 30 | nifs::ex_llama_model::__model_nif_infill_prefix__, 31 | nifs::ex_llama_model::__model_nif_infill_middle__, 32 | nifs::ex_llama_model::__model_nif_infill_suffix__, 33 | nifs::ex_llama_model::__model_nif_eot__, 34 | nifs::ex_llama_model::__model_nif_vocabulary_size__, 35 | nifs::ex_llama_model::__model_nif_embed_len__, 36 | nifs::ex_llama_model::__model_nif_train_len__, 37 | 38 | nifs::ex_llama_session::__session_nif_default_session_options__, 39 | nifs::ex_llama_session::__session_nif_advance_context_with_tokens__, 40 | nifs::ex_llama_session::__session_nif_advance_context__, 41 | nifs::ex_llama_session::__session_nif_start_completing_with__, 42 | nifs::ex_llama_session::__session_nif_completion__, 43 | nifs::ex_llama_session::__session_nif_model__, 44 | nifs::ex_llama_session::__session_nif_params__, 45 | nifs::ex_llama_session::__session_nif_context_size__, 46 | nifs::ex_llama_session::__session_nif_context__, 47 | nifs::ex_llama_session::__session_nif_truncate_context__, 48 | nifs::ex_llama_session::__session_nif_set_context_to_tokens__, 49 | nifs::ex_llama_session::__session_nif_set_context__, 50 | nifs::ex_llama_session::__session_deep_copy__, 51 | // 52 | // nifs::ex_llama::__context_nif_load_model__, 53 | // nifs::ex_llama::__context_nif_default_session_options__, 54 | // nifs::ex_llama::__context_nif_create_session__, 55 | // nifs::ex_llama::__context_nif_advance_context__, 56 | // nifs::ex_llama::__context_nif_complete__, 57 | 58 | ], 59 | load = on_load 60 | ); 61 | -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/nifs.rs: -------------------------------------------------------------------------------- 1 | pub mod ex_llama_model; 2 | pub mod ex_llama_session; -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/nifs/ex_llama_model.rs: -------------------------------------------------------------------------------- 1 | use llama_cpp::{EmbeddingsParams, LlamaModel, LlamaParams, SessionParams, Token}; 2 | use crate::structs::embedding_options::ExLLamaEmbeddingOptions; 3 | use crate::structs::model::ExLLamaModel; 4 | use crate::structs::model_options::ModelOptions; 5 | use crate::structs::session::ExLLamaSession; 6 | use crate::structs::session_options::ExLLamaSessionOptions; 7 | 8 | 9 | #[rustler::nif(schedule = "DirtyCpu")] 10 | // This function creates a new instance of the ExLLama struct. 11 | pub fn __model_nif_load_from_file__(path: String, model_options: ModelOptions) -> Result { 12 | let p = path.clone(); 13 | let params = LlamaParams::from(model_options); 14 | let model = LlamaModel::load_from_file(path, params); 15 | match model { 16 | Ok(model) => 17 | Ok(ExLLamaModel::new(p, model)), 18 | Err(e) => 19 | Err(e.to_string()), 20 | } 21 | } 22 | 23 | #[rustler::nif(schedule = "DirtyCpu")] 24 | pub fn __model_nif_detokenize__(model: ExLLamaModel, token: i32) -> Result, String> { 25 | let t = Token(token); 26 | let x = model.detokenize(t); 27 | let vector = Vec::from(x); 28 | Ok(vector) 29 | } 30 | 31 | #[rustler::nif(schedule = "DirtyCpu")] 32 | pub fn __model_nif_token_to_byte_piece__(model: ExLLamaModel, token: i32) -> Result, String> { 33 | let t = Token(token); 34 | let x = model.token_to_byte_piece(t); 35 | Ok(x) 36 | } 37 | 38 | #[rustler::nif(schedule = "DirtyCpu")] 39 | pub fn __model_nif_token_to_piece__(model: ExLLamaModel, token: i32) -> Result { 40 | let t = Token(token); 41 | let x = model.token_to_piece(t); 42 | Ok(x) 43 | } 44 | 45 | #[rustler::nif(schedule = "DirtyCpu")] 46 | pub fn __model_nif_decode_tokens__(model: ExLLamaModel, tokens: Vec) -> Result { 47 | let tokens: Vec = tokens.into_iter().map(Token).collect(); 48 | let x = model.decode_tokens(tokens); 49 | Ok(x) 50 | } 51 | 52 | 53 | #[rustler::nif(schedule = "DirtyCpu")] 54 | pub fn __model_nif_create_session__(model: ExLLamaModel, options: ExLLamaSessionOptions) -> Result { 55 | let seed = options.seed.clone(); 56 | let opts = SessionParams::from(options); 57 | let ctx = model.create_session(opts); 58 | match ctx { 59 | Ok(session) => 60 | Ok(ExLLamaSession::new(model.name, seed, session)), 61 | Err(e) => return Err(e.to_string()), 62 | } 63 | } 64 | 65 | 66 | #[rustler::nif(schedule = "DirtyCpu")] 67 | pub fn __model_nif_embeddings__(model: ExLLamaModel, inputs: String, options: ExLLamaEmbeddingOptions) -> Result>, String> { 68 | let options = EmbeddingsParams::from(options); 69 | let vec_of_vec = vec![inputs.as_bytes()]; 70 | let response = model.embeddings(&vec_of_vec, options); 71 | match response { 72 | Ok(value) => 73 | Ok(value), 74 | Err(e) => return Err(e.to_string()), 75 | } 76 | } 77 | 78 | // @TODO embeddings_async 79 | 80 | #[rustler::nif(schedule = "DirtyCpu")] 81 | pub fn __model_nif_bos__(model: ExLLamaModel) -> Result { 82 | let x = model.bos(); 83 | Ok(x.0) 84 | } 85 | 86 | #[rustler::nif(schedule = "DirtyCpu")] 87 | pub fn __model_nif_eos__(model: ExLLamaModel) -> Result { 88 | let x = model.eos(); 89 | Ok(x.0) 90 | } 91 | 92 | #[rustler::nif(schedule = "DirtyCpu")] 93 | pub fn __model_nif_nl__(model: ExLLamaModel) -> Result { 94 | let x = model.nl(); 95 | Ok(x.0) 96 | } 97 | 98 | #[rustler::nif(schedule = "DirtyCpu")] 99 | pub fn __model_nif_infill_prefix__(model: ExLLamaModel) -> Result { 100 | let x = model.infill_prefix(); 101 | Ok(x.0) 102 | } 103 | 104 | 105 | #[rustler::nif(schedule = "DirtyCpu")] 106 | pub fn __model_nif_infill_middle__(model: ExLLamaModel) -> Result { 107 | let x = model.infill_middle(); 108 | Ok(x.0) 109 | } 110 | 111 | 112 | #[rustler::nif(schedule = "DirtyCpu")] 113 | pub fn __model_nif_infill_suffix__(model: ExLLamaModel) -> Result { 114 | let x = model.infill_suffix(); 115 | Ok(x.0) 116 | } 117 | 118 | 119 | #[rustler::nif(schedule = "DirtyCpu")] 120 | pub fn __model_nif_eot__(model: ExLLamaModel) -> Result { 121 | let x = model.eot(); 122 | Ok(x.0) 123 | } 124 | 125 | 126 | #[rustler::nif(schedule = "DirtyCpu")] 127 | pub fn __model_nif_vocabulary_size__(model: ExLLamaModel) -> Result { 128 | let x = model.vocabulary_size(); 129 | Ok(x) 130 | } 131 | 132 | 133 | #[rustler::nif(schedule = "DirtyCpu")] 134 | pub fn __model_nif_embed_len__(model: ExLLamaModel) -> Result { 135 | let x = model.embed_len(); 136 | Ok(x) 137 | } 138 | 139 | #[rustler::nif(schedule = "DirtyCpu")] 140 | pub fn __model_nif_train_len__(model: ExLLamaModel) -> Result { 141 | let x = model.train_len(); 142 | Ok(x) 143 | } 144 | -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/nifs/ex_llama_session.rs: -------------------------------------------------------------------------------- 1 | use llama_cpp::standard_sampler::StandardSampler; 2 | use llama_cpp::{SessionParams, Token}; 3 | use regex::Regex; 4 | use rustler::{Env, ResourceArc}; 5 | use rustler::types::Pid; 6 | use crate::refs::session_ref::ExLLamaSessionRef; 7 | use crate::structs::completion::ExLLamaCompletion; 8 | use crate::structs::model::ExLLamaModel; 9 | use crate::structs::session::ExLLamaSession; 10 | use crate::structs::session_options::ExLLamaSessionOptions; 11 | 12 | 13 | #[rustler::nif(schedule = "DirtyCpu")] 14 | pub fn __session_nif_default_session_options__() -> Result { 15 | let r = ExLLamaSessionOptions::from(SessionParams::default()); 16 | Ok(r) 17 | } 18 | 19 | #[rustler::nif(schedule = "DirtyCpu")] 20 | pub fn __session_nif_advance_context_with_tokens__(session: ResourceArc, context: Vec) -> Result<&'static str, String> { 21 | let mut ctx = session.0.lock().expect("Locking the session failed"); 22 | let tokens: Vec = context.into_iter().map(Token).collect(); 23 | let result = ctx.advance_context_with_tokens(tokens); 24 | match result { 25 | Ok(_) => Ok("OK"), 26 | Err(e) => Err(e.to_string()) 27 | } 28 | } 29 | 30 | #[rustler::nif(schedule = "DirtyCpu")] 31 | pub fn __session_nif_advance_context__(session: ResourceArc, context: String) -> Result<&'static str,String> { 32 | let mut ctx = session.0.lock().expect("Locking the session failed"); 33 | let result = ctx.advance_context(context); 34 | match result { 35 | Ok(_) => Ok("OK"), 36 | Err(e) => Err(e.to_string()) 37 | } 38 | } 39 | 40 | // start_completing 41 | 42 | #[rustler::nif(schedule = "DirtyCpu")] 43 | pub fn __session_nif_start_completing_with__(env: Env, pid: Pid, session: ResourceArc, max_predictions: usize) -> Result<&'static str,String> { 44 | let lock = session.0.lock().expect("Locking the session failed"); 45 | let c = lock.deep_copy() ; 46 | match c { 47 | Ok(ctx) => { 48 | let mut pid = pid; 49 | let mut ctx = ctx; 50 | let handle = ctx.start_completing_with(StandardSampler::default(), max_predictions); 51 | let i = handle.into_strings(); 52 | for completion in i { 53 | //let gen_completion = rustler::types::tuple::make_tuple(&[rustler::types::atom::from_str("gen"), completion]); 54 | env.send(&pid, completion).expect("Encoding completion failed"); 55 | } 56 | let fin = rustler::types::Atom::from_str(env, "fin").expect("Encoding completion failed"); 57 | env.send(&pid, fin).expect("Encoding completion failed"); 58 | Ok("OK") 59 | }, 60 | Err(e) => Err(e.to_string()) 61 | } 62 | } 63 | 64 | 65 | #[rustler::nif(schedule = "DirtyCpu")] 66 | pub fn __session_nif_completion__(session: ResourceArc, max_predictions: usize, stop: Option) -> Result { 67 | let lock = session.0.lock().expect("Locking the session failed"); 68 | let c = lock.deep_copy() ; 69 | match c { 70 | Ok(ctx) => { 71 | let mut ctx = ctx; 72 | let prompt_size = ctx.context_size(); 73 | let completions = ctx.start_completing_with(StandardSampler::default(), max_predictions).into_strings(); 74 | let mut completions_str = String::new(); 75 | 76 | match stop { 77 | Some(x) => { 78 | let pattern = Regex::new(&x).unwrap(); // Compile the regex, handle errors as needed 79 | for completion in completions { 80 | completions_str.push_str(&completion); 81 | if let Some(mat) = pattern.find(&completions_str) { 82 | completions_str.truncate(mat.end()); 83 | break; 84 | } 85 | } 86 | }, 87 | None => { 88 | for completion in completions { 89 | completions_str.push_str(&completion); 90 | } 91 | } 92 | } 93 | Ok(ExLLamaCompletion::new(completions_str, ctx.context_size() - prompt_size)) 94 | }, 95 | Err(e) => Err(e.to_string()) 96 | } 97 | } 98 | 99 | #[rustler::nif(schedule = "DirtyCpu")] 100 | pub fn __session_nif_model__(session: ResourceArc) -> Result { 101 | let ctx = session.0.lock().expect("Locking the session failed"); 102 | let model = ctx.model(); 103 | let wrapper = ExLLamaModel::new("...".to_string(), model); 104 | Ok(wrapper) 105 | } 106 | 107 | #[rustler::nif(schedule = "DirtyCpu")] 108 | pub fn __session_nif_params__(session: ResourceArc) -> Result { 109 | let ctx = session.0.lock().expect("Locking the session failed"); 110 | let params = ctx.params(); 111 | let wrapper = ExLLamaSessionOptions::from(params); 112 | Ok(wrapper) 113 | } 114 | 115 | #[rustler::nif(schedule = "DirtyCpu")] 116 | pub fn __session_nif_context_size__(session: ResourceArc) -> Result { 117 | let ctx = session.0.lock().expect("Locking the session failed"); 118 | let result = ctx.context_size(); 119 | Ok(result) 120 | } 121 | 122 | #[rustler::nif(schedule = "DirtyCpu")] 123 | pub fn __session_nif_context__(session: ResourceArc) -> Result, String> { 124 | let ctx = session.0.lock().expect("Locking the session failed"); 125 | let result = ctx.context(); 126 | let tokens: Vec = result.into_iter().map(|x| x.0).collect(); 127 | Ok(tokens) 128 | } 129 | 130 | #[rustler::nif(schedule = "DirtyCpu")] 131 | pub fn __session_nif_truncate_context__(session: ResourceArc, n_tokens: usize) -> Result<&'static str, String> { 132 | let ctx = session.0.lock().expect("Locking the session failed"); 133 | ctx.truncate_context(n_tokens); 134 | Ok("OK") 135 | } 136 | 137 | #[rustler::nif(schedule = "DirtyCpu")] 138 | pub fn __session_nif_set_context_to_tokens__(session: ResourceArc, context: Vec) -> Result<&'static str,String> { 139 | let mut ctx = session.0.lock().expect("Locking the session failed"); 140 | let tokens: Vec = context.into_iter().map(Token).collect(); 141 | let result = ctx.set_context_to_tokens(tokens); 142 | match result { 143 | Ok(_) => Ok("OK"), 144 | Err(e) => Err(e.to_string()) 145 | } 146 | } 147 | 148 | #[rustler::nif(schedule = "DirtyCpu")] 149 | pub fn __session_nif_set_context__(session: ResourceArc, context: String) -> Result<&'static str,String> { 150 | let mut ctx = session.0.lock().expect("Locking the session failed"); 151 | let result = ctx.set_context(context); 152 | match result { 153 | Ok(_) => Ok("OK"), 154 | Err(e) => Err(e.to_string()) 155 | } 156 | } 157 | 158 | #[rustler::nif(schedule = "DirtyCpu")] 159 | pub fn __session_deep_copy__(session: ResourceArc) -> Result { 160 | let ctx = session.0.lock().expect("Locking the session failed"); 161 | let result = ctx.deep_copy(); 162 | match result { 163 | Ok(session) => 164 | Ok(ExLLamaSession::new("".to_string(), session.params().seed, session)), 165 | Err(e) => return Err(e.to_string()), 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/refs.rs: -------------------------------------------------------------------------------- 1 | pub mod model_ref; 2 | pub mod session_ref; 3 | -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/refs/model_ref.rs: -------------------------------------------------------------------------------- 1 | // This module contains the Ref Wrapper structs which are a wrapper around the LLama structs from the llama_cpp crate. 2 | // These ref structs are used to ensure safe concurrent access to the LLama objects. 3 | // These ref structs is also marked as Send and Sync, allowing it to be shared across threads. 4 | 5 | 6 | use llama_cpp::{LlamaModel}; 7 | 8 | 9 | // This struct is used to create a reference to the LLamaModel object. 10 | pub struct ExLLamaModelRef(pub LlamaModel); 11 | 12 | 13 | impl ExLLamaModelRef { 14 | pub fn new(llama: LlamaModel) -> Self { 15 | Self(llama) 16 | } 17 | } 18 | 19 | impl Drop for ExLLamaModelRef { 20 | fn drop(&mut self) { 21 | // Log or print a message indicating the resource is being dropped. 22 | // println!("Dropping ExLLamaModelRef"); 23 | } 24 | } 25 | unsafe impl Send for ExLLamaModelRef {} 26 | unsafe impl Sync for ExLLamaModelRef {} 27 | -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/refs/session_ref.rs: -------------------------------------------------------------------------------- 1 | // This module contains the Ref Wrapper structs which are a wrapper around the LLama structs from the llama_cpp crate. 2 | // These ref structs are used to ensure safe concurrent access to the LLama objects. 3 | // These ref structs is also marked as Send and Sync, allowing it to be shared across threads. 4 | 5 | 6 | use std::sync::Mutex; 7 | use llama_cpp::{LlamaSession}; 8 | 9 | pub struct ExLLamaSessionRef(pub Mutex); 10 | 11 | impl ExLLamaSessionRef { 12 | pub fn new(session: LlamaSession) -> Self { 13 | Self(Mutex::new(session)) 14 | } 15 | } 16 | 17 | impl Drop for ExLLamaSessionRef { 18 | fn drop(&mut self) { 19 | // println!("Dropping ExLLamaSessionRef"); 20 | } 21 | } 22 | 23 | unsafe impl Send for ExLLamaSessionRef {} 24 | unsafe impl Sync for ExLLamaSessionRef {} 25 | -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/structs.rs: -------------------------------------------------------------------------------- 1 | pub mod model_options; 2 | pub mod model; 3 | pub mod session_options; 4 | pub mod session; 5 | 6 | pub mod embedding_options; 7 | 8 | pub mod completion; -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/structs/completion.rs: -------------------------------------------------------------------------------- 1 | // This module contains the ExLLama struct which is a wrapper around the LLama struct from the llama_cpp_rs crate. 2 | // The ExLLama struct includes a ResourceArc to the ExLLamaRef struct, which is used to ensure safe concurrent access. 3 | // The ExLLama struct also implements the Deref trait to allow it to be treated as a LLama object. 4 | 5 | 6 | use llama_cpp::{LlamaSession}; 7 | use rustler::{NifStruct, ResourceArc}; 8 | use crate::refs::session_ref::ExLLamaSessionRef; 9 | 10 | 11 | #[derive(NifStruct)] 12 | #[module = "ExLLama.Completion"] 13 | pub struct ExLLamaCompletion { 14 | pub content: String, 15 | pub token_length: usize, 16 | } 17 | 18 | impl ExLLamaCompletion { 19 | pub fn new(content: String, token_length: usize) -> Self { 20 | Self { 21 | content: content, 22 | token_length: token_length 23 | } 24 | } 25 | // 26 | // // Provide a method to access the mutex protected session 27 | // pub fn lock_session(&self) -> std::sync::MutexGuard<'_, LlamaSession> { 28 | // self.resource.0.lock().expect("Locking the session failed") 29 | // } 30 | } -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/structs/embedding_options.rs: -------------------------------------------------------------------------------- 1 | use llama_cpp::EmbeddingsParams; 2 | use rustler::NifStruct; 3 | 4 | #[derive(NifStruct)] 5 | #[module = "ExLLama.EmbeddingOptions"] 6 | pub struct ExLLamaEmbeddingOptions { 7 | pub n_threads: u32, 8 | pub n_threads_batch: u32, 9 | } 10 | 11 | impl From for EmbeddingsParams { 12 | fn from(value: ExLLamaEmbeddingOptions) -> Self { 13 | Self { 14 | n_threads: value.n_threads, 15 | n_threads_batch: value.n_threads_batch, 16 | } 17 | } 18 | } 19 | 20 | 21 | impl From for ExLLamaEmbeddingOptions{ 22 | fn from(value: EmbeddingsParams) -> Self { 23 | Self { 24 | n_threads: value.n_threads, 25 | n_threads_batch: value.n_threads_batch, 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/structs/model.rs: -------------------------------------------------------------------------------- 1 | // This module contains the ExLLama struct which is a wrapper around the LLama struct from the llama_cpp_rs crate. 2 | // The ExLLama struct includes a ResourceArc to the ExLLamaRef struct, which is used to ensure safe concurrent access. 3 | // The ExLLama struct also implements the Deref trait to allow it to be treated as a LLama object. 4 | 5 | use std::ops::Deref; 6 | use llama_cpp::{LlamaModel}; 7 | use rustler::{NifStruct, ResourceArc}; 8 | use crate::refs::model_ref::ExLLamaModelRef; 9 | 10 | 11 | #[derive(NifStruct)] 12 | #[module = "ExLLama.Model"] 13 | // This struct is used to create a resource that can be passed between Elixir and Rust. 14 | pub struct ExLLamaModel { 15 | pub resource: ResourceArc, 16 | pub name: String, 17 | pub eos: Vec, 18 | pub bos: Vec 19 | } 20 | 21 | // This implementation of ExLLama creates a new instance of the ExLLama struct. 22 | impl ExLLamaModel { 23 | pub fn new(name: String, llama: LlamaModel) -> Self { 24 | Self { 25 | name: name, 26 | eos: llama.detokenize(llama.eos()).to_vec(), 27 | bos: llama.detokenize(llama.bos()).to_vec(), 28 | resource: ResourceArc::new(ExLLamaModelRef::new(llama)), 29 | } 30 | } 31 | } 32 | 33 | // This implementation of Deref allows the ExLLama struct to be treated as a LLama object. 34 | impl Deref for ExLLamaModel { 35 | type Target = LlamaModel; 36 | 37 | fn deref(&self) -> &Self::Target { 38 | &self.resource.0 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/structs/model_options.rs: -------------------------------------------------------------------------------- 1 | use llama_cpp::{LlamaParams, SplitMode}; 2 | use rustler::{NifStruct}; 3 | //use rustler::types::atom; 4 | 5 | #[derive(NifStruct)] 6 | #[module = "ExLLama.ModelOptions"] 7 | pub struct ModelOptions { 8 | pub n_gpu_layers: u32, 9 | pub split_mode: String, 10 | pub main_gpu: u32, 11 | pub vocab_only: bool, 12 | pub use_mmap: bool, 13 | pub use_mlock: bool, 14 | } 15 | 16 | impl From for LlamaParams { 17 | fn from(value: ModelOptions) -> Self { 18 | // Map the string values to the corresponding SplitMode enum values 19 | let split_mode = match value.split_mode.as_str() { 20 | "none" => SplitMode::None, 21 | "layer" => SplitMode::Layer, 22 | "row" => SplitMode::Row, 23 | _ => panic!("Invalid split_mode value"), 24 | }; 25 | 26 | Self { 27 | n_gpu_layers: value.n_gpu_layers, 28 | split_mode: split_mode, 29 | main_gpu: value.main_gpu, 30 | vocab_only: value.vocab_only, 31 | use_mmap: value.use_mmap, 32 | use_mlock: value.use_mlock, 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/structs/session.rs: -------------------------------------------------------------------------------- 1 | // This module contains the ExLLama struct which is a wrapper around the LLama struct from the llama_cpp_rs crate. 2 | // The ExLLama struct includes a ResourceArc to the ExLLamaRef struct, which is used to ensure safe concurrent access. 3 | // The ExLLama struct also implements the Deref trait to allow it to be treated as a LLama object. 4 | 5 | 6 | use llama_cpp::{LlamaSession}; 7 | use rustler::{NifStruct, ResourceArc}; 8 | use crate::refs::session_ref::ExLLamaSessionRef; 9 | 10 | 11 | #[derive(NifStruct)] 12 | #[module = "ExLLama.Session"] 13 | pub struct ExLLamaSession { 14 | pub model_name: String, 15 | pub seed: u32, 16 | pub resource: ResourceArc, 17 | } 18 | 19 | impl ExLLamaSession { 20 | pub fn new(model_name: String, seed: u32, session: LlamaSession) -> Self { 21 | Self { 22 | model_name: model_name, 23 | seed: seed, 24 | resource: ResourceArc::new(ExLLamaSessionRef::new(session)), 25 | } 26 | } 27 | // 28 | // // Provide a method to access the mutex protected session 29 | // pub fn lock_session(&self) -> std::sync::MutexGuard<'_, LlamaSession> { 30 | // self.resource.0.lock().expect("Locking the session failed") 31 | // } 32 | } -------------------------------------------------------------------------------- /native/erlang_llama_cpp_nif/src/structs/session_options.rs: -------------------------------------------------------------------------------- 1 | use llama_cpp::SessionParams; 2 | use rustler::NifStruct; 3 | 4 | #[derive(NifStruct)] 5 | #[module = "ExLLama.SessionOptions"] 6 | pub struct ExLLamaSessionOptions { 7 | pub seed: u32, 8 | pub n_ctx: u32, 9 | pub n_batch: u32, 10 | pub n_threads: u32, 11 | pub n_threads_batch: u32, 12 | pub rope_scaling_type: i32, 13 | pub rope_freq_base: f32, 14 | pub rope_freq_scale: f32, 15 | pub yarn_ext_factor: f32, 16 | pub yarn_attn_factor: f32, 17 | pub yarn_beta_fast: f32, 18 | pub yarn_beta_slow: f32, 19 | pub yarn_orig_ctx: u32, 20 | pub type_k: u32, 21 | pub type_v: u32, 22 | pub embedding: bool, 23 | pub offload_kqv: bool, 24 | pub pooling: bool, 25 | } 26 | 27 | impl From for SessionParams { 28 | fn from(value: ExLLamaSessionOptions) -> Self { 29 | Self { 30 | seed: value.seed, 31 | n_ctx: value.n_ctx, 32 | n_batch: value.n_batch, 33 | n_threads: value.n_threads, 34 | n_threads_batch: value.n_threads_batch, 35 | rope_scaling_type: value.rope_scaling_type, 36 | rope_freq_base: value.rope_freq_base, 37 | rope_freq_scale: value.rope_freq_scale, 38 | yarn_ext_factor: value.yarn_ext_factor, 39 | yarn_attn_factor: value.yarn_attn_factor, 40 | yarn_beta_fast: value.yarn_beta_fast, 41 | yarn_beta_slow: value.yarn_beta_slow, 42 | yarn_orig_ctx: value.yarn_orig_ctx, 43 | type_k: value.type_k, 44 | type_v: value.type_v, 45 | embedding: value.embedding, 46 | offload_kqv: value.offload_kqv, 47 | pooling: value.pooling, 48 | } 49 | } 50 | } 51 | 52 | 53 | impl From for ExLLamaSessionOptions{ 54 | fn from(value: SessionParams) -> Self { 55 | Self { 56 | seed: value.seed, 57 | n_ctx: value.n_ctx, 58 | n_batch: value.n_batch, 59 | n_threads: value.n_threads, 60 | n_threads_batch: value.n_threads_batch, 61 | rope_scaling_type: value.rope_scaling_type, 62 | rope_freq_base: value.rope_freq_base, 63 | rope_freq_scale: value.rope_freq_scale, 64 | yarn_ext_factor: value.yarn_ext_factor, 65 | yarn_attn_factor: value.yarn_attn_factor, 66 | yarn_beta_fast: value.yarn_beta_fast, 67 | yarn_beta_slow: value.yarn_beta_slow, 68 | yarn_orig_ctx: value.yarn_orig_ctx, 69 | type_k: value.type_k, 70 | type_v: value.type_v, 71 | embedding: value.embedding, 72 | offload_kqv: value.offload_kqv, 73 | pooling: value.pooling, 74 | } 75 | } 76 | } 77 | 78 | 79 | impl From<&SessionParams> for ExLLamaSessionOptions{ 80 | fn from(value: &SessionParams) -> Self { 81 | Self { 82 | seed: value.seed, 83 | n_ctx: value.n_ctx, 84 | n_batch: value.n_batch, 85 | n_threads: value.n_threads, 86 | n_threads_batch: value.n_threads_batch, 87 | rope_scaling_type: value.rope_scaling_type, 88 | rope_freq_base: value.rope_freq_base, 89 | rope_freq_scale: value.rope_freq_scale, 90 | yarn_ext_factor: value.yarn_ext_factor, 91 | yarn_attn_factor: value.yarn_attn_factor, 92 | yarn_beta_fast: value.yarn_beta_fast, 93 | yarn_beta_slow: value.yarn_beta_slow, 94 | yarn_orig_ctx: value.yarn_orig_ctx, 95 | type_k: value.type_k, 96 | type_v: value.type_v, 97 | embedding: value.embedding, 98 | offload_kqv: value.offload_kqv, 99 | pooling: value.pooling, 100 | } 101 | } 102 | } -------------------------------------------------------------------------------- /priv/models/local_llama/tiny_llama/.gitignore: -------------------------------------------------------------------------------- 1 | *.gguf 2 | -------------------------------------------------------------------------------- /priv/models/local_llama/tiny_llama/init.sh: -------------------------------------------------------------------------------- 1 | wget -O tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf?download=true 2 | -------------------------------------------------------------------------------- /test/ex_llama_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExLLamaTest do 2 | use ExUnit.Case 3 | 4 | defp priv_dir() do 5 | :code.priv_dir(:ex_llama) 6 | |> List.to_string() 7 | end 8 | 9 | defp load_model(path) do 10 | file = priv_dir() <> "/models/" <> path 11 | ExLLama.load_model(file) 12 | end 13 | 14 | test "Default Session Options" do 15 | {:ok, sut} = ExLLama.Session.default_options() 16 | assert sut.__struct__ == ExLLama.SessionOptions 17 | end 18 | 19 | test "Create Session" do 20 | {:ok, llama} = load_model("local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf") 21 | {:ok, session} = ExLLama.create_session(llama) 22 | assert session.__struct__ == ExLLama.Session 23 | end 24 | 25 | test "Load Model" do 26 | {:ok, llama} = load_model("local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf") 27 | assert llama.__struct__ == ExLLama.Model 28 | end 29 | 30 | def receive_text(acc \\ []) do 31 | receive do 32 | x = {:ok, _} -> Enum.reverse([x|acc]) 33 | x = {:error, _} -> Enum.reverse([x|acc]) 34 | :fin -> 35 | Enum.reverse(acc) 36 | x -> 37 | receive_text([x | acc]) 38 | end 39 | end 40 | 41 | test "Async complete_with" do 42 | {:ok, llama} = load_model("local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf") 43 | {:ok, options} = ExLLama.Session.default_options() 44 | {:ok, session} = ExLLama.create_session(llama, %{options| seed: 2}) 45 | ExLLama.advance_context(session, "<|user|>\n Say Hello. And only hello. Example \"Hello\".\n<|assistant|>\n Hello\n<|user|>\n Repeat what you just said.\n<|assistant|>\n Hello\n<|user|>\n Say Goodbye.\n<|assistant|>\n") 46 | ExLLama.Session.start_completing_with(session, %{max_tokens: 512}) 47 | r = receive_text() 48 | assert r == [" Good", "bye", "", ""] 49 | end 50 | 51 | test "Advance Context" do 52 | {:ok, llama} = load_model("local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf") 53 | {:ok, options} = ExLLama.Session.default_options() 54 | 55 | {:ok, session} = ExLLama.create_session(llama, %{options| seed: 2}) 56 | ExLLama.advance_context(session, "<|user|>\n Say Hello. And only hello. Example \"Hello\".\n<|assistant|>\n Hello\n<|user|>\n Repeat what you just said.\n<|assistant|>\n Hello\n<|user|>\n Say Goodbye.\n<|assistant|>\n") 57 | {:ok, context} = ExLLama.Session.context(session) 58 | {:ok, as_str} = ExLLama.Model.decode_tokens(llama, context) 59 | # There is a bug in advance_context in llama_cpp that injects a space 60 | assert as_str == " <|user|>\n Say Hello. And only hello. Example \"Hello\".\n<|assistant|>\n Hello\n<|user|>\n Repeat what you just said.\n<|assistant|>\n Hello\n<|user|>\n Say Goodbye.\n<|assistant|>\n" 61 | {:ok, %{content: response}} = ExLLama.completion(session, 512, "\n*") 62 | response = String.trim_leading(response) 63 | ExLLama.advance_context(session, response <> "\n<|user|>\n Say Apple.\n<|assistant|>\n") 64 | {:ok, %{content: response}} = ExLLama.completion(session, 512, "\n*") 65 | response = String.trim_leading(response) 66 | ExLLama.advance_context(session, response <> "\n<|user|>\n What did you just say?.\n<|assistant|>\n") 67 | {:ok, %{content: response}} = ExLLama.completion(session, 512, "\n*") 68 | response = String.trim_leading(response) 69 | assert response =~ "Apple" 70 | {:ok, context} = ExLLama.Session.context(session) 71 | {:ok, as_str} = ExLLama.Model.decode_tokens(llama, context) 72 | assert as_str == " <|user|>\n Say Hello. And only hello. Example \"Hello\".\n<|assistant|>\n Hello\n<|user|>\n Repeat what you just said.\n<|assistant|>\n Hello\n<|user|>\n Say Goodbye.\n<|assistant|>\n Goodbye\n<|user|>\n Say Apple.\n<|assistant|>\n Apple\n<|user|>\n What did you just say?.\n<|assistant|>\n" 73 | end 74 | 75 | test "Chat Completion" do 76 | {:ok, llama} = load_model("local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf") 77 | thread = [ 78 | %{role: :user, content: "Say Hello. And only hello. Example \"Hello\"."}, 79 | %{role: :assistant, content: "Hello"}, 80 | %{role: :user, content: "Repeat what you just said."}, 81 | %{role: :assistant, content: "Hello"}, 82 | %{role: :user, content: "Say Goodbye."}, 83 | %{role: :assistant, content: "Goodbye"}, 84 | %{role: :user, content: "Say Apple."}, 85 | %{role: :assistant, content: "Apple"}, 86 | %{role: :user, content: "What did you just say?."}, 87 | ] 88 | 89 | # After stripping completion_tokens are actually 3, although it's useful to know how many tokens were generated. 90 | {:ok, response} = ExLLama.chat_completion(llama, thread, [seed: 2, choices: 2]) 91 | expected_path = priv_dir() <> "/models/local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" 92 | assert = %GenAI.ChatCompletion{ 93 | choices: [ 94 | %GenAI.ChatCompletion.Choice{finish_reason: :stop, index: 0, message: choice_a}, 95 | %GenAI.ChatCompletion.Choice{finish_reason: :stop, index: 1, message: choice_b} 96 | ], 97 | id: nil, 98 | model: expected_path, 99 | seed: 2, 100 | usage: %GenAI.ChatCompletion.Usage{prompt_tokens: 143, total_tokens: 147, completion_tokens: 4}, 101 | vsn: 1.0 102 | } = response 103 | assert choice_a.content == "Apple" 104 | assert choice_b.content == "Apple" 105 | end 106 | 107 | 108 | end 109 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | # Download Required Models 2 | ExUnit.start() 3 | --------------------------------------------------------------------------------