├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── lib
    ├── ex_llama.ex
    └── ex_llama
    │   ├── chat_template.ex
    │   ├── chat_template
    │       ├── alpaca.ex
    │       ├── amber_chat.ex
    │       ├── chat_ml.ex
    │       ├── falcon_instruct.ex
    │       ├── gemma_instruct.ex
    │       ├── llama_2_chat.ex
    │       ├── mistral_instruct.ex
    │       ├── open_chat.ex
    │       ├── saiga.ex
    │       ├── solar_instruct.ex
    │       ├── vicuna.ex
    │       └── zephyr.ex
    │   ├── context_params.ex
    │   ├── embedding_options.ex
    │   ├── model.ex
    │   ├── model_options.ex
    │   ├── nif.ex
    │   ├── session.ex
    │   └── session_options.ex
├── mix.exs
├── mix.lock
├── native
    └── erlang_llama_cpp_nif
    │   ├── Cargo.lock
    │   ├── Cargo.toml
    │   └── src
    │       ├── lib.rs
    │       ├── nifs.rs
    │       ├── nifs
    │           ├── ex_llama_model.rs
    │           └── ex_llama_session.rs
    │       ├── refs.rs
    │       ├── refs
    │           ├── model_ref.rs
    │           └── session_ref.rs
    │       ├── structs.rs
    │       └── structs
    │           ├── completion.rs
    │           ├── embedding_options.rs
    │           ├── model.rs
    │           ├── model_options.rs
    │           ├── session.rs
    │           └── session_options.rs
├── priv
    └── models
    │   └── local_llama
    │       └── tiny_llama
    │           ├── .gitignore
    │           └── init.sh
└── test
    ├── ex_llama_test.exs
    └── test_helper.exs


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Intellij
 2 | ex_llama.iml
 3 | .idea/
 4 | 
 5 | # Test Models
 6 | test/models/*
 7 | 
 8 | #  asdf 
 9 | .tool-versions
10 | 
11 | # The directory Mix will write compiled artifacts to.
12 | /_build/
13 | 
14 | # If you run "mix test --cover", coverage assets end up here.
15 | /cover/
16 | 
17 | # The directory Mix downloads your dependencies sources to.
18 | /deps/
19 | 
20 | # Where third-party dependencies like ExDoc output generated docs.
21 | /doc/
22 | 
23 | # Ignore .fetch files in case you like to edit your project deps locally.
24 | /.fetch
25 | 
26 | # If the VM crashes, it generates a dump, let's ignore it too.
27 | erl_crash.dump
28 | 
29 | # Also ignore archive artifacts (built via "mix archive.build").
30 | *.ez
31 | 
32 | # Ignore package tarball (built via "mix hex.build").
33 | ex_llama-*.tar
34 | 
35 | # Temporary files, for example, from tests.
36 | /tmp/
37 | 
38 | # Static Libs for rust deps.
39 | /priv/native/*
40 | 
41 | # Build artifacts
42 | /native/erlang_llama_cpp_nif/target/
43 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | Change Log
2 | =====
3 | 
4 | ## 0.1.0
5 | Update to use GenAI Core structs
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 noizu-labs
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ExLLama: LlammaCpp.rs NIF wrapper for Elixir/Erlang.
 2 | =======
 3 | 
 4 | This is an Alpha Library for loading and interacting with models via the llama_cpp rust client exposed as nif extensions. 
 5 | Inspired By [llama_cpp_ex](https://github.com/jeregrine/llama_cpp_ex)
 6 | 
 7 | 
 8 | ## Getting Started
 9 | 1. Add the `ex_llama` dependency to your `mix.exs` file:
10 | 
11 | ```elixir
12 | def deps do 
13 |   [
14 |     {:ex_llama, "~> 0.0.1"}
15 |   ]
16 | 
17 | end
18 | ```
19 | 
20 | 
21 | ## Chat Completion 
22 | As of this build only `<|role|>messsage</s>` format chat completion is supported, such as used by tiny llama. 
23 | 
24 | 
25 | ```elixir 
26 | 
27 |     {:ok, llama} = ExLLama.load_model("./test/models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
28 |     thread = [
29 |       %{role: :user, content: "Say Hello. And only hello. Example \"Hello\"."},
30 |       %{role: :assistant, content: "Hello"},
31 |       %{role: :user, content: "Repeat what you just said."},
32 |       %{role: :assistant, content: "Hello"},
33 |       %{role: :user, content: "Say Goodbye."},
34 |       %{role: :assistant, content: "Goodbye"},
35 |       %{role: :user, content: "Say Apple."},
36 |       %{role: :assistant, content: "Apple"},
37 |       %{role: :user, content: "What did you just say?."},
38 |     ]
39 | 
40 |     {:ok, response} = ExLLama.chat_completion(llama, thread, %{seed: 2})
41 |     # response = %{
42 |     #         choices: [
43 |     #           %{reason: :end, role: "assistant", content: "Apple"},
44 |     #           %{reason: :end, role: "assistant", content: "Apple"},
45 |     #           %{reason: :end, role: "assistant", content: "Apple"}
46 |     #         ]
47 |     # }
48 | 
49 | ```
50 | 
51 | 
52 | ## Simple Completion (direct)
53 | ```elixir
54 |     {:ok, llama} = ExLLama.load_model("./test/models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
55 |     {:ok, options} = ExLLama.Session.default_options()
56 |     {:ok, session} = ExLLama.create_session(llama, %{options| seed: 2})
57 |     ExLLama.advance_context(session, "<|user|>\n Say Hello. And only hello. Example \"Hello\".</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Repeat what you just said.</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Say Goodbye.</s>\n<|assistant|>\n")
58 |     {:ok, response} = ExLLama.completion(session, 512, "</s>\n*")
59 |     response = String.trim_leading(response)
60 |     # "Goodbye.</s>"
61 | ```
62 | 
63 | ## Streaming Completion (final mechanism will be replaced with a Stream
64 | ```elixir
65 | 
66 |   def receive_text(acc \\ []) do
67 |     receive do
68 |       x = {:ok, _} -> Enum.reverse([x|acc])
69 |       x = {:error, _} -> Enum.reverse([x|acc])
70 |       :fin ->
71 |         Enum.reverse(acc)
72 |       x ->
73 |         receive_text([x | acc])
74 |     end
75 |   end
76 | 
77 | #...
78 |     {:ok, llama} = ExLLama.load_model("./test/models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
79 |     {:ok, options} = ExLLama.Session.default_options()
80 |     {:ok, session} = ExLLama.create_session(llama, %{options| seed: 2})
81 |     ExLLama.advance_context(session, "<|user|>\n Say Hello. And only hello. Example \"Hello\".</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Repeat what you just said.</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Say Goodbye.</s>\n<|assistant|>\n")
82 |     ExLLama.Session.start_completing_with(session, %{max_tokens: 512})
83 |     receive_text()
84 | 
85 | 
86 | ```
87 | 
88 | 


--------------------------------------------------------------------------------
/lib/ex_llama.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama do
 2 |   def load_model(path), do: ExLLama.Model.load_from_file(path)
 3 |   def load_model(path, %ExLLama.ModelOptions{} = opts), do: ExLLama.Model.load_from_file(path, opts)
 4 | 
 5 |   def create_session(model), do: ExLLama.Model.create_session(model)
 6 |   def create_session(model, options), do: ExLLama.Model.create_session(model, options)
 7 | 
 8 |   def advance_context(session, content), do: ExLLama.Session.advance_context(session, content)
 9 |   def completion(session, max_tokens, stop), do: ExLLama.Session.completion(session, max_tokens, stop)
10 | 
11 | 
12 |   @default_choices 1
13 |   @default_max_tokens 512
14 | 
15 |   def chat_completion(model, thread, options) do
16 | 
17 |     so = cond do
18 |       x = options[:session_options] -> put_in(x, [:seed], options[:seed])
19 |       x = options[:seed] -> [seed: x]
20 |       :else -> nil
21 |     end
22 |     options = update_in(options || [], [:add_generation_prompt],
23 |       fn
24 |         x when is_nil(x) -> true
25 |         x -> x
26 |       end
27 |     )
28 |     session_options = ExLLama.SessionOptions.new(so)
29 |     seed = session_options.seed
30 |     choices = options[:choices] || @default_choices
31 |     max_tokens = options[:max_tokens] || @default_max_tokens
32 |     with {:ok, session} <- ExLLama.create_session(model, session_options),
33 |          {:ok, thread_context} <- ExLLama.ChatTemplate.to_context(thread, model, options),
34 |          {:ok, _} <- ExLLama.Session.set_context(session, thread_context),
35 |          {:ok, prompt_tokens} = ExLLama.Session.context_size(session) do
36 |       choices = Enum.map(1..choices,
37 |                   fn(_) ->
38 |                     with {:ok, %{content: result, token_length: l}} <- ExLLama.Session.completion(session, max_tokens, nil) do
39 |                       {:ok, {l, result}}
40 |                     end
41 |                   end
42 |                 )
43 |                 |> Enum.filter(
44 |                      fn
45 |                        {:ok, _} -> true
46 |                        _ -> false
47 |                      end)
48 |                 |> Enum.map(fn {:ok, x} -> x end)
49 | 
50 |       options = (options || [])
51 |                 |> put_in([:prompt_tokens], prompt_tokens)
52 |       ExLLama.ChatTemplate.extract_response(choices, model, options)
53 |     end
54 |   end
55 | end
56 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ChatTemplate do
 2 |   @type thread :: [map]
 3 |   @type model :: ExLLama.Model.t
 4 |   @type meta :: Keyword.t | nil
 5 |   @type model_response :: {tokens :: integer, String.t}
 6 | 
 7 |   @callback support_list() :: {:ok, MapSet.t}
 8 |   @callback to_context(thread, model, meta) :: {:ok, String.t}
 9 |   @callback extract_response(response :: [model_response], model, meta) :: {:ok, ExLLama.ChatResponse.t}
10 | 
11 |   def pick_handler(model, meta) do
12 |     cond do
13 |       x = meta[:template] -> x
14 |       :else ->
15 |       # wip
16 |         ExLLama.ChatTemplate.Zephyr
17 |     end
18 |   end
19 | 
20 |   def to_context(thread, model, meta), do:  apply(pick_handler(model, meta), :to_context, [thread, model, meta])
21 |   def extract_response(responses, model, meta), do:  apply(pick_handler(model, meta), :extract_response, [responses, model, meta])
22 | end
23 | 
24 | defmodule ExLLama.ChatTemplate.Exception do
25 |   defexception [:message, :handler, :entry, :row]
26 |   def message(%{message: m, handler: h, entry: e, row: r}) do
27 |     "#{h}@#{r}: #{m}\n#{inspect e}"
28 |   end
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/alpaca.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ChatTemplate.Alpaca do
 2 |   @moduledoc """
 3 |   based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/alpaca.jinja]
 4 |   ```jinja
 5 |   {% if messages[0]['role'] == 'system' %}
 6 |     {% set loop_messages = messages[1:] %}
 7 |     {% set system_message = messages[0]['content'].strip() + '\n\n' %}
 8 |   {% else %}
 9 |     {% set loop_messages = messages %}
10 |     {% set system_message = '' %}
11 |   {% endif %}
12 | 
13 |   {{ bos_token + system_message }}
14 |   {% for message in loop_messages %}
15 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
16 |         {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
17 |     {% endif %}
18 | 
19 |     {% if message['role'] == 'user' %}
20 |         {{ '### Instruction:\n' + message['content'].strip() + '\n\n' }}
21 |     {% elif message['role'] == 'assistant' %}
22 |         {{ '### Response:\n' + message['content'].strip() + eos_token + '\n\n' }}
23 |     {% endif %}
24 | 
25 |     {% if loop.last and message['role'] == 'user' and add_generation_prompt %}
26 |         {{ '### Instruction:\n' }}
27 |     {% endif %}
28 |   {% endfor %}
29 |   ````
30 |   """
31 | end
32 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/amber_chat.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ChatTemplate.AmberChat do
 2 |   @moduledoc """
 3 |   based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/amberchat.jinja]
 4 |   ```jinja
 5 |   {% if messages[0]['role'] == 'system' %}
 6 |     {% set loop_messages = messages[1:] %}
 7 |     {% set system_message = messages[0]['content'].strip() + '\n' %}
 8 |   {% else %}
 9 |     {% set loop_messages = messages %}
10 |     {% set system_message = '' %}
11 |   {% endif %}
12 | 
13 |   {% for message in loop_messages %}
14 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
15 |         {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
16 |     {% endif %}
17 | 
18 |     {% if loop.index0 == 0 %}
19 |         {{ bos_token + system_message }}
20 |     {% endif %}
21 | 
22 |     {% if message['role'] == 'user' %}
23 |         {{ '###Human: ' + message['content'].strip() + '\n' }}
24 |     {% elif message['role'] == 'assistant' %}
25 |         {{ '###Assistant: ' + message['content'].strip() + '\n' }}
26 |     {% endif %}
27 | 
28 |     {% if loop.last and message['role'] == 'user' and add_generation_prompt %}
29 |         {{ '###Assistant:' }}
30 |     {% endif %}
31 |   {% endfor %}
32 |   ````
33 |   """
34 | end
35 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/chat_ml.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ChatTemplate.ChatML do
 2 |   @moduledoc """
 3 |   based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/chatml.jinja]
 4 |   ```jinja
 5 |     {% if messages[0]['role'] == 'system' %}
 6 |     {% set offset = 1 %}
 7 |   {% else %}
 8 |     {% set offset = 0 %}
 9 |   {% endif %}
10 | 
11 |   {{ bos_token }}
12 |   {% for message in messages %}
13 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %}
14 |         {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
15 |     {% endif %}
16 | 
17 |     {{ '<|im_start|>' + message['role'] + '\n' + message['content'].strip() + '<|im_end|>\n' }}
18 | 
19 |     {% if loop.last and message['role'] == 'user' and add_generation_prompt %}
20 |         {{ '<|im_start|>assistant\n' }}
21 |     {% endif %}
22 |   {% endfor %}
23 |   ````
24 |   """
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/falcon_instruct.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ChatTemplate.FalconInstruct do
 2 |   @moduledoc """
 3 |   based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/falcon-instruct.jinja]]
 4 |   ```jinja
 5 |   {% if messages[0]['role'] == 'system' %}
 6 |     {% set loop_messages = messages[1:] %}
 7 |     {% set system_message = messages[0]['content'] %}
 8 |   {% else %}
 9 |     {% set loop_messages = messages %}
10 |     {% set system_message = '' %}
11 |   {% endif %}
12 | 
13 |   {% for message in loop_messages %}
14 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
15 |         {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
16 |     {% endif %}
17 | 
18 |     {% if loop.index0 == 0 %}
19 |         {{ system_message.strip() }}
20 |     {% endif %}
21 |     {{ '\n\n' + message['role'].title() + ': ' + message['content'].strip().replace('\r\n', '\n').replace('\n\n', '\n') }}
22 | 
23 |     {% if loop.last and message['role'] == 'user' and add_generation_prompt %}
24 |         {{ '\n\nAssistant:' }}
25 |     {% endif %}
26 |   {% endfor %}
27 |   ````
28 |   """
29 | end
30 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/gemma_instruct.ex:
--------------------------------------------------------------------------------
  1 | defmodule ExLLama.ChatTemplate.GemmaInstruct do
  2 |   @moduledoc """
  3 |   based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/gemma-it.jinja]
  4 |   ```jinja
  5 |   {% if messages[0]['role'] == 'system' %}
  6 |     {% set loop_messages = messages[1:] %}
  7 |     {% set system_message = messages[0]['content'].strip() + '\n\n' %}
  8 |   {% else %}
  9 |     {% set loop_messages = messages %}
 10 |     {% set system_message = '' %}
 11 |   {% endif %}
 12 | 
 13 |   {% for message in loop_messages %}
 14 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
 15 |         {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
 16 |     {% endif %}
 17 | 
 18 |     {% if loop.index0 == 0 %}
 19 |         {% set content = system_message + message['content'] %}
 20 |     {% else %}
 21 |         {% set content = message['content'] %}
 22 |     {% endif %}
 23 | 
 24 |     {% if (message['role'] == 'assistant') %}
 25 |         {% set role = 'model' %}
 26 |     {% else %}
 27 |         {% set role = message['role'] %}
 28 |     {% endif %}
 29 | 
 30 |     {{ '<start_of_turn>' + role + '\n' + content.strip() + '<end_of_turn>\n' }}
 31 | 
 32 |     {% if loop.last and message['role'] == 'user' and add_generation_prompt %}
 33 |         {{'<start_of_turn>model\n'}}
 34 |     {% endif %}
 35 |   {% endfor %}
 36 |   ````
 37 |   """
 38 | 
 39 | 
 40 |   def support_list() do
 41 |     [ ]
 42 |   end
 43 | 
 44 |   defp format_line(message, eos_token) do
 45 |     role = case message.role do
 46 |       :assistant -> :model
 47 |       x -> x
 48 |     end
 49 |     "<start_of_turn>#{role}\n#{String.trim(message.content)}<end_of_turn>\n"
 50 |   end
 51 | 
 52 |   def extract_response(responses, model, options) do
 53 |     with {:ok, model_name} <- ExLLama.Model.__model_name__(model),
 54 |          {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
 55 |       choices = responses
 56 |                 |> Enum.with_index()
 57 |                 |> Enum.map(
 58 |                      fn
 59 |                        {{tokens, x}, index} ->
 60 |                          x = x
 61 |                              |> String.trim()
 62 |                              |> String.trim_trailing(eos_token)
 63 |                          
 64 |                          x = GenAI.Message.assistant(x)
 65 |                          finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens
 66 |                          %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason}
 67 |                      end)
 68 |       completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max()
 69 |       prompt_tokens = options[:prompt_tokens]
 70 |       usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens}
 71 |       completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage}
 72 |       {:ok, completion}
 73 |     end
 74 |   end
 75 | 
 76 |   def compact(thread, acc \\ [])
 77 |   def compact([], acc), do: acc
 78 |   def compact([h], acc), do: [h | acc]
 79 |   def compact([%{role: :system} = s, %{role: :user} = u|t], acc) do
 80 |     c = %{u| content: "#{String.trim(s.content)}\n\n#{String.trim(u.content)}"}
 81 |     compact(t, [c| acc])
 82 |   end
 83 |   def compact([h|t], acc), do: compact(t, [h|acc])
 84 | 
 85 |   def to_context(thread, model, options) do
 86 |     with {:ok, bos_token} <- ExLLama.Model.__bos__(model),
 87 |          {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
 88 |       lines = thread
 89 |               |> compact()
 90 |               |> Enum.reverse()
 91 |               |> Enum.map(&format_line(&1, eos_token))
 92 |               |> Enum.join("")
 93 |       if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do
 94 |         {:ok, lines <> "<start_of_turn>model\n"}
 95 |       else
 96 |         {:ok, lines}
 97 |       end
 98 |     end
 99 |   end
100 | 
101 | end
102 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/llama_2_chat.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ChatTemplate.LLama2Chat do
 2 |   @moduledoc """
 3 |   based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/llama-2-chat.jinja]
 4 |   ```jinja
 5 |   {% if messages[0]['role'] == 'system' %}
 6 |     {% set loop_messages = messages[1:] %}
 7 |     {% set system_message = '<<SYS>>\n' + messages[0]['content'].strip() + '\n<</SYS>>\n\n' %}
 8 |   {% else %}
 9 |     {% set loop_messages = messages %}
10 |     {% set system_message = '' %}
11 |   {% endif %}
12 | 
13 |   {% for message in loop_messages %}
14 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
15 |         {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
16 |     {% endif %}
17 | 
18 |     {% if loop.index0 == 0 %}
19 |         {% set content = system_message + message['content'] %}
20 |     {% else %}
21 |         {% set content = message['content'] %}
22 |     {% endif %}
23 | 
24 |     {% if message['role'] == 'user' %}
25 |         {{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}
26 |     {% elif message['role'] == 'assistant' %}
27 |         {{ ' '  + content.strip() + ' ' + eos_token }}
28 |     {% endif %}
29 |   {% endfor %}
30 |   ````
31 |   """
32 | 
33 | 
34 | 
35 |   def support_list() do
36 |     [ ]
37 |   end
38 | 
39 |   defp format_line(message, bos_token, eos_token) do
40 |     case message.role do
41 |       :user -> "#{bos_token}[INST] #{String.trim(message.content)} [/INST]"
42 |       :assistant -> " #{String.trim(message.content)} #{eos_token}"
43 |     end
44 |   end
45 | 
46 |   def extract_response(responses, model, options) do
47 |     with {:ok, model_name} <- ExLLama.Model.__model_name__(model),
48 |          {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
49 |       choices = responses
50 |                 |> Enum.with_index()
51 |                 |> Enum.map(
52 |                      fn
53 |                        {{tokens, x}, index} ->
54 |                          x = x
55 |                              |> String.trim()
56 |                              |> String.trim_trailing(eos_token)
57 |                          x = GenAI.Message.assistant(x)
58 |                          finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens
59 |                          %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason}
60 |                      end)
61 |       completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max()
62 |       prompt_tokens = options[:prompt_tokens]
63 |       usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens}
64 |       completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage}
65 |       {:ok, completion}
66 |     end
67 |   end
68 | 
69 |   def compact(thread, acc \\ [])
70 |   def compact([], acc), do: acc
71 |   def compact([h], acc), do: [h | acc]
72 |   def compact([%{role: :system} = s, %{role: :user} = u|t], acc) do
73 |     c = %{u| content: "<<SYS>>\n#{String.trim(s.content)}\n<</SYS>>\n\n#{String.trim(u.content)}"}
74 |     compact(t, [c| acc])
75 |   end
76 |   def compact([h|t], acc), do: compact(t, [h|acc])
77 | 
78 |   def to_context(thread, model, options) do
79 |     with {:ok, bos_token} <- ExLLama.Model.__bos__(model),
80 |          {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
81 |       lines = thread
82 |               |> compact()
83 |               |> Enum.reverse()
84 |               |> Enum.map(&format_line(&1, bos_token, eos_token))
85 |               |> Enum.join("")
86 |       if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do
87 |         {:ok, lines}
88 |       else
89 |         {:ok, lines}
90 |       end
91 |     end
92 |   end
93 | 
94 | end
95 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/mistral_instruct.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ChatTemplate.MistralInstruct do
 2 |   @moduledoc """
 3 |   based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/mistral-instruct.jinja]
 4 |   ```jinja
 5 |   {% if messages[0]['role'] == 'system' %}
 6 |     {% set loop_messages = messages[1:] %}
 7 |     {% set system_message = messages[0]['content'].strip() + '\n\n' %}
 8 |   {% else %}
 9 |     {% set loop_messages = messages %}
10 |     {% set system_message = '' %}
11 |   {% endif %}
12 | 
13 |   {{ bos_token }}
14 |   {% for message in loop_messages %}
15 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
16 |         {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
17 |     {% endif %}
18 | 
19 |     {% if loop.index0 == 0 %}
20 |         {% set content = system_message + message['content'] %}
21 |     {% else %}
22 |         {% set content = message['content'] %}
23 |     {% endif %}
24 | 
25 |     {% if message['role'] == 'user' %}
26 |         {{ '[INST] ' + content.strip() + ' [/INST]' }}
27 |     {% elif message['role'] == 'assistant' %}
28 |         {{ ' '  + content.strip() + ' ' + eos_token }}
29 |     {% endif %}
30 |   {% endfor %}
31 |   ````
32 |   """
33 | 
34 | 
35 | 
36 |   def support_list() do
37 |     [ ]
38 |   end
39 | 
40 |   defp format_line(message, eos_token) do
41 |     case message.role do
42 |       :user -> "[INST] #{String.trim(message.content)} [/INST]"
43 |       :assistant -> " #{String.trim(message.content)} #{eos_token}"
44 |     end
45 |   end
46 | 
47 |   def extract_response(responses, model, options) do
48 |     with {:ok, model_name} <- ExLLama.Model.__model_name__(model),
49 |          {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
50 |       choices = responses
51 |                 |> Enum.with_index()
52 |                 |> Enum.map(
53 |                      fn
54 |                        {{tokens, x}, index} ->
55 |                          x = x
56 |                              |> String.trim()
57 |                              |> String.trim_trailing(eos_token)
58 |                          x = GenAI.Message.assistant(x)
59 |                          finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens
60 |                          %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason}
61 |                      end)
62 |       completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max()
63 |       prompt_tokens = options[:prompt_tokens]
64 |       usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens}
65 |       completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage}
66 |       {:ok, completion}
67 |     end
68 |   end
69 | 
70 |   def compact(thread, acc \\ [])
71 |   def compact([], acc), do: acc
72 |   def compact([h], acc), do: [h | acc]
73 |   def compact([%{role: :system} = s, %{role: :user} = u|t], acc) do
74 |       c = %{u| content: "#{String.trim(s.content)}\n\n#{String.trim(u.content)}"}
75 |       compact(t, [c| acc])
76 |   end
77 |   def compact([h|t], acc), do: compact(t, [h|acc])
78 | 
79 |   def to_context(thread, model, options) do
80 |     with {:ok, bos_token} <- ExLLama.Model.__bos__(model),
81 |          {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
82 |       lines = thread
83 |               |> compact()
84 |               |> Enum.reverse()
85 |               |> Enum.map(&format_line(&1, eos_token))
86 |               |> Enum.join("")
87 |       if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do
88 |         {:ok, bos_token <> lines}
89 |       else
90 |         {:ok, bos_token <> lines}
91 |       end
92 |     end
93 |   end
94 | end
95 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/open_chat.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ChatTemplate.OpenChat do
 2 |   @moduledoc """
 3 |   based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/openchat.jinja]]
 4 |   ```jinja
 5 |   {% if messages[0]['role'] == 'system' %}
 6 |     {% set loop_messages = messages[1:] %}
 7 |     {% set system_message = messages[0]['content'].strip() + '<|end_of_turn|>' %}
 8 |   {% else %}
 9 |     {% set loop_messages = messages %}
10 |     {% set system_message = '' %}
11 |   {% endif %}
12 | 
13 |   {{ bos_token + system_message }}
14 |   {% for message in loop_messages %}
15 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
16 |         {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
17 |     {% endif %}
18 | 
19 |     {{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>' }}
20 | 
21 |     {% if loop.last and message['role'] == 'user' and add_generation_prompt %}
22 |         {{ 'GPT4 Correct Assistant:' }}
23 |     {% endif %}
24 |   {% endfor %}
25 |   ````
26 |   """
27 | 
28 | 
29 |   def support_list() do
30 |     [ ]
31 |   end
32 | 
33 | 
34 |   defp format_line(%{role: :system} = message) do
35 |     "#{String.trim(message.content)}<|end_of_turn|>"
36 |   end
37 |   defp format_line(message) do
38 |     "GPT4 Correct #{String.capitalize(to_string(message.role))}: #{String.trim(message.content)}<|end_of_turn|>"
39 |   end
40 | 
41 |   def extract_response(responses, model, options) do
42 |     with {:ok, model_name} <- ExLLama.Model.__model_name__(model),
43 |                                                   {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
44 |       choices = responses
45 |                 |> Enum.with_index()
46 |                 |> Enum.map(
47 |                      fn
48 |                        {{tokens, x}, index} ->
49 |                          x = x
50 |                              |> String.trim()
51 |                              |> String.trim_trailing(eos_token)
52 |                          x = GenAI.Message.assistant(x)
53 |                          finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens
54 |                          %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason}
55 |                      end)
56 |       completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max()
57 |       prompt_tokens = options[:prompt_tokens]
58 |       usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens}
59 |       completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage}
60 |       {:ok, completion}
61 |     end
62 |   end
63 | 
64 |   def to_context(thread, model, options) do
65 |     with {:ok, bos_token} <- ExLLama.Model.__bos__(model) do
66 |       lines = thread
67 |               |> Enum.map(&format_line/1)
68 |               |> Enum.join("")
69 |       if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do
70 |         {:ok, bos_token <> lines <> "GPT4 Correct Assistant:"}
71 |       else
72 |         {:ok, bos_token <> lines}
73 |       end
74 |     end
75 |   end
76 | end
77 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/saiga.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ChatTemplate.Saiga do
 2 |   @moduledoc """
 3 |   based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/saiga.jinja]
 4 |   ```jinja
 5 |   {% if messages[0]['role'] == 'system' %}
 6 |     {% set loop_messages = messages[1:] %}
 7 |     {% set system_message = bos_token + 'system' + '\n' + messages[0]['content'].strip() + eos_token %}
 8 |   {% else %}
 9 |     {% set loop_messages = messages %}
10 |     {% set system_message = '' %}
11 |   {% endif %}
12 | 
13 |   {% for message in loop_messages %}
14 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
15 |         {{ raise_exception('Conversation roles must alternate user/bot/user/bot/...') }}
16 |     {% endif %}
17 | 
18 |     {% if loop.index0 == 0 %}
19 |         {{ system_message }}
20 |     {% endif %}
21 | 
22 |     {{ bos_token + message['role'] + '\n' + message['content'].strip() + eos_token }}
23 | 
24 |     {% if loop.last and message['role'] == 'user' and add_generation_prompt %}
25 |         {{ bos_token + 'bot\n' }}
26 |     {% endif %}
27 |   {% endfor %}
28 |   ````
29 |   """
30 | 
31 |   def support_list() do
32 |     [ ]
33 |   end
34 | 
35 |   defp format_line(message, bos_token, eos_token) do
36 |     "#{bos_token}#{message.role}\n#{String.trim(message.content)}#{eos_token}"
37 |   end
38 | 
39 |   def extract_response(responses, model, options) do
40 |     with {:ok, model_name} <- ExLLama.Model.__model_name__(model),
41 |          {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
42 |       choices = responses
43 |                 |> Enum.with_index()
44 |                 |> Enum.map(
45 |                      fn
46 |                        {{tokens, x}, index} ->
47 |                          x = x
48 |                              |> String.trim()
49 |                              |> String.trim_trailing(eos_token)
50 |                          x = GenAI.Message.assistant(x)
51 |                          finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens
52 |                          %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason}
53 |                      end)
54 |       completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max()
55 |       prompt_tokens = options[:prompt_tokens]
56 |       usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens}
57 |       completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage}
58 |       {:ok, completion}
59 |     end
60 |   end
61 | 
62 |   def to_context(thread, model, options) do
63 |     with {:ok, bos_token} <- ExLLama.Model.__bos__(model),
64 |          {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
65 |       lines = thread
66 |               |> Enum.map(&format_line(&1, bos_token, eos_token))
67 |               |> Enum.join("")
68 |       if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do
69 |         {:ok, lines <> "#{bos_token}bot\n"}
70 |       else
71 |         {:ok, lines}
72 |       end
73 |     end
74 |   end
75 | end
76 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/solar_instruct.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ChatTemplate.SolarInstruct do
 2 |   @moduledoc """
 3 |   based on: [https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/solar-instruct.jinja]
 4 |   ```jinja
 5 |   {% if messages[0]['role'] == 'system' %}
 6 |     {% set offset = 1 %}
 7 |   {% else %}
 8 |     {% set offset = 0 %}
 9 |   {% endif %}
10 | 
11 |   {{ bos_token }}
12 |   {% for message in messages %}
13 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %}
14 |         {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
15 |     {% endif %}
16 | 
17 |     {{ '### ' + message['role'].title() + ':\n' + message['content'].strip() + '\n\n' }}
18 | 
19 |     {% if loop.last and message['role'] == 'user' and add_generation_prompt %}
20 |         {{ '### Assistant:\n' }}
21 |     {% endif %}
22 |   {% endfor %}
23 |   ````
24 |   """
25 | 
26 | 
27 |   def support_list() do
28 |     [ ]
29 |   end
30 | 
31 |   defp format_line(message) do
32 |     "### #{String.capitalize(to_string(message.role))}:\n#{String.trim(message.content)}\n\n"
33 |   end
34 | 
35 |   def extract_response(responses, model, options) do
36 |     with {:ok, model_name} <- ExLLama.Model.__model_name__(model),
37 |          {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
38 |       choices = responses
39 |                 |> Enum.with_index()
40 |                 |> Enum.map(
41 |                      fn
42 |                        {{tokens, x}, index} ->
43 |                          x = x
44 |                              |> String.trim()
45 |                              |> String.trim_trailing(eos_token)
46 |                          x = GenAI.Message.assistant(x)
47 |                          finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens
48 |                          %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason}
49 |                      end)
50 |       completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max()
51 |       prompt_tokens = options[:prompt_tokens]
52 |       usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens}
53 |       completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage}
54 |       {:ok, completion}
55 |     end
56 |   end
57 | 
58 |   def to_context(thread, model, options) do
59 |     with {:ok, bos_token} <- ExLLama.Model.__bos__(model) do
60 |       lines = thread
61 |               |> Enum.map(&format_line/1)
62 |               |> Enum.join("")
63 |       if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do
64 |         {:ok, bos_token <> lines <> "### Assistant:\n"}
65 |       else
66 |         {:ok, bos_token <> lines}
67 |       end
68 |     end
69 |   end
70 | end
71 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/vicuna.ex:
--------------------------------------------------------------------------------
  1 | defmodule ExLLama.ChatTemplate.Vicuna do
  2 |   @moduledoc """
  3 |   based on: https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/zephyr.jinja
  4 |   ```jinja
  5 |     {% if messages[0]['role'] == 'system' %}
  6 |       {% set loop_messages = messages[1:] %}
  7 |       {% set system_message = messages[0]['content'].strip() + '\n\n' %}
  8 |   {% else %}
  9 |       {% set loop_messages = messages %}
 10 |       {% set system_message = '' %}
 11 |   {% endif %}
 12 | 
 13 |   {{ bos_token + system_message }}
 14 |   {% for message in loop_messages %}
 15 |       {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
 16 |           {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
 17 |       {% endif %}
 18 | 
 19 |       {% if message['role'] == 'user' %}
 20 |           {{ 'USER: ' + message['content'].strip() + '\n' }}
 21 |       {% elif message['role'] == 'assistant' %}
 22 |           {{ 'ASSISTANT: ' + message['content'].strip() + eos_token + '\n' }}
 23 |       {% endif %}
 24 | 
 25 |       {% if loop.last and message['role'] == 'user' and add_generation_prompt %}
 26 |           {{ 'ASSISTANT:' }}
 27 |       {% endif %}
 28 |   {% endfor %}
 29 |   ````
 30 |   """
 31 | 
 32 | 
 33 |   def support_list() do
 34 |     []
 35 |   end
 36 | 
 37 |   defp format_line(message = %{role: :system}, eos_token) do
 38 |     "#{String.trim(message.content)}\n\n"
 39 |   end
 40 |   defp format_line(message, eos_token) do
 41 |     "#{message.role |> String.upcase()}: #{String.trim(message.content)}#{eos_token}\n"
 42 |   end
 43 | 
 44 |   def extract_response(responses, model, options) do
 45 |     with {:ok, model_name} <- ExLLama.Model.__model_name__(model),
 46 |          {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
 47 |       choices = responses
 48 |                 |> Enum.with_index()
 49 |                 |> Enum.map(
 50 |                      fn
 51 |                        {{tokens, x}, index} ->
 52 |                          x = x
 53 |                              |> String.trim()
 54 |                              |> String.trim_trailing(eos_token)
 55 |                          x = GenAI.Message.assistant(x)
 56 |                          finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens
 57 |                          %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason}
 58 |                      end)
 59 |       completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max()
 60 |       prompt_tokens = options[:prompt_tokens]
 61 |       usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens}
 62 |       completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage}
 63 |       {:ok, completion}
 64 |     end
 65 |   end
 66 | 
 67 |   def to_context(thread, model, options) do
 68 |     with {:ok, eos_token} <- ExLLama.Model.__eos__(model),
 69 |          {:ok, bos_token} <- ExLLama.Model.__bos__(model) do
 70 |       system_message_offset =  if (Enum.at(thread, 0)[:role] == :system), do: 1, else: 0
 71 |       lines = thread
 72 |               |> Enum.with_index()
 73 |               |> Enum.map(
 74 |                    fn
 75 |                      {msg = %{role: :system = role, content: content}, 0} ->
 76 |                        format_line(msg, eos_token)
 77 |                      {msg = %{role: :system = role, content: content}, index} ->
 78 |                        unless options[:strict] == false do
 79 |                          raise ExLLama.ChatTemplate.Exception, message: "Only the first message may be from system. Use a different handler or pass `strict: false` to allow", handler: __MODULE__, entry: msg, row: index
 80 |                        end
 81 |                        format_line(msg, eos_token)
 82 | 
 83 |                      {msg =%{role: :assistant = role, content: content}, index} ->
 84 |                        unless options[:strict] == false or index <= (1 + system_message_offset) do
 85 |                          if Enum.at(thread, index - 2)[:role] != role do
 86 |                            raise ExLLama.ChatTemplate.Exception, message: "Conversation roles must alternate user/assistant/user/assistant/...", handler: __MODULE__, entry: msg, row: index
 87 |                          end
 88 |                        end
 89 |                        format_line(msg, eos_token)
 90 | 
 91 |                      {msg = %{role: :user = role, content: content}, index} ->
 92 |                        unless options[:strict] == false or index <= (2 + system_message_offset) do
 93 |                          if Enum.at(thread, index - 2)[:role] != role do
 94 |                            raise ExLLama.ChatTemplate.Exception, message: "Conversation roles must alternate user/assistant/user/assistant/...", handler: __MODULE__, entry: msg, row: index
 95 |                          end
 96 |                        end
 97 |                        format_line(msg, eos_token)
 98 | 
 99 |                      {msg = %{role: role, content: content}, index} ->
100 |                        unless options[:strict] == false or options[:expanded_roles] do
101 |                          raise ExLLama.ChatTemplate.Exception, message: "Only the first user,assistant,system roles are supported. Use a different handler or pass `strict: false` to allow", handler: __MODULE__, entry: msg, row: index
102 |                        end
103 |                        format_line(msg, eos_token)
104 |                    end
105 |                  ) |> Enum.join("\n")
106 |       if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do
107 |         {:ok, bos_token <> lines <> "ASSISTANT:"}
108 |       else
109 |         {:ok, bos_token <> lines}
110 |       end
111 |     end
112 |   end
113 | 
114 | end
115 | 


--------------------------------------------------------------------------------
/lib/ex_llama/chat_template/zephyr.ex:
--------------------------------------------------------------------------------
  1 | defmodule ExLLama.ChatTemplate.Zephyr do
  2 |   @moduledoc """
  3 |   based on: https://github.com/chujiezheng/chat_templates/blob/main/chat_templates/zephyr.jinja
  4 |   ```jinja
  5 |   {% if messages[0]['role'] == 'system' %}
  6 |     {% set offset = 1 %}
  7 |   {% else %}
  8 |     {% set offset = 0 %}
  9 |   {% endif %}
 10 | 
 11 |   {% for message in messages %}
 12 |     {% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %}
 13 |         {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
 14 |     {% endif %}
 15 | 
 16 |     {{ '<|' + message['role'] + '|>\n' + message['content'].strip() + eos_token + '\n' }}
 17 | 
 18 |     {% if loop.last and message['role'] == 'user' and add_generation_prompt %}
 19 |         {{ '<|assistant|>\n' }}
 20 |     {% endif %}
 21 |   {% endfor %}
 22 |   ```
 23 |   """
 24 | 
 25 |   def support_list() do
 26 |     [ {~r"^tinyllama-1.1b.*$" , 1}]
 27 |   end
 28 | 
 29 |   defp format_line(message, eos_token) do
 30 |     "<|#{message.role}|>\n #{String.trim(message.content)}#{eos_token}\n"
 31 |   end
 32 | 
 33 |   def extract_response(responses, model, options) do
 34 |     with {:ok, eos_token} <- ExLLama.Model.__eos__(model),
 35 |          {:ok, model_name} <- ExLLama.Model.__model_name__(model) do
 36 | 
 37 |       choices = responses
 38 |                 |> Enum.with_index()
 39 |                 |> Enum.map(
 40 |                      fn
 41 |                        {{tokens, x}, index} ->
 42 |                          x = x
 43 |                              |> String.trim()
 44 |                              |> String.trim_trailing(eos_token)
 45 |                          x = GenAI.Message.assistant(x)
 46 |                          # todo tool use extension and finish_reason support
 47 |                          finish_reason = if (tokens < options[:max_tokens]), do: :stop, else: :max_tokens
 48 |                          %GenAI.ChatCompletion.Choice{index: index, message: x, finish_reason: finish_reason}
 49 |                      end)
 50 |       completion_tokens = Enum.map(responses, fn {tokens,_} -> tokens end) |> Enum.max()
 51 |       prompt_tokens = options[:prompt_tokens]
 52 |       usage = %GenAI.ChatCompletion.Usage{prompt_tokens: prompt_tokens, total_tokens: completion_tokens + prompt_tokens, completion_tokens: completion_tokens}
 53 |       completion = %GenAI.ChatCompletion{id: nil, model: model_name, seed: options[:seed], choices: choices, usage: usage}
 54 |       {:ok, completion}
 55 | 
 56 |     end
 57 |   end
 58 | 
 59 |   def to_context(thread, model, options) do
 60 |     with {:ok, eos_token} <- ExLLama.Model.__eos__(model) do
 61 |       system_message_offset =  if (Enum.at(thread, 0)[:role] == :system), do: 1, else: 0
 62 |       lines = thread
 63 |               |> Enum.with_index()
 64 |               |> Enum.map(
 65 |                    fn
 66 |                      {msg = %{role: :system = role, content: content}, 0} ->
 67 |                        format_line(msg, eos_token)
 68 |                      {msg = %{role: :system = role, content: content}, index} ->
 69 |                        unless options[:strict] == false do
 70 |                          raise ExLLama.ChatTemplate.Exception, message: "Only the first message may be from system. Use a different handler or pass `strict: false` to allow", handler: __MODULE__, entry: msg, row: index
 71 |                        end
 72 |                        format_line(msg, eos_token)
 73 | 
 74 |                      {msg =%{role: :assistant = role, content: content}, index} ->
 75 |                        unless options[:strict] == false or index <= (1 + system_message_offset) do
 76 |                          if Enum.at(thread, index - 2)[:role] != role do
 77 |                            raise ExLLama.ChatTemplate.Exception, message: "Conversation roles must alternate user/assistant/user/assistant/...", handler: __MODULE__, entry: msg, row: index
 78 |                          end
 79 |                        end
 80 |                        format_line(msg, eos_token)
 81 | 
 82 |                      {msg = %{role: :user = role, content: content}, index} ->
 83 |                        unless options[:strict] == false or index <= (2 + system_message_offset) do
 84 |                          if Enum.at(thread, index - 2)[:role] != role do
 85 |                            raise ExLLama.ChatTemplate.Exception, message: "Conversation roles must alternate user/assistant/user/assistant/...", handler: __MODULE__, entry: msg, row: index
 86 |                          end
 87 |                        end
 88 |                        format_line(msg, eos_token)
 89 | 
 90 |                      {msg = %{role: role, content: content}, index} ->
 91 |                        unless options[:strict] == false or options[:expanded_roles] do
 92 |                          raise ExLLama.ChatTemplate.Exception, message: "Only the first user,assistant,system roles are supported. Use a different handler or pass `strict: false` to allow", handler: __MODULE__, entry: msg, row: index
 93 |                        end
 94 |                        format_line(msg, eos_token)
 95 |                    end
 96 |                  ) |> Enum.join("\n")
 97 |       if options[:add_generation_prompt] && Enum.at(thread, -1)[:role] != :assistant do
 98 |         {:ok, lines <> "<|assistant|>\n"}
 99 |       else
100 |         {:ok, lines}
101 |       end
102 |     end
103 |   end
104 | end
105 | 


--------------------------------------------------------------------------------
/lib/ex_llama/context_params.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ContextParams do
 2 |   defstruct [
 3 |     :seed,
 4 |     :n_ctx,
 5 |     :n_batch,
 6 |     :n_threads,
 7 |     :n_threads_batch,
 8 |     :rope_scaling_type,
 9 |     :rope_freq_base,
10 |     :rope_freq_scale,
11 |     :yarn_ext_factor,
12 |     :yarn_attn_factor,
13 |     :yarn_beta_fast,
14 |     :yarn_beta_slow,
15 |     :yarn_orig_ctx,
16 |     :type_k,
17 |     :type_v,
18 |     :embedding,
19 |     :offload_kqv,
20 |     :pooling
21 |   ]
22 | 
23 |   @type t :: %__MODULE__{
24 |                seed: non_neg_integer(),
25 |                n_ctx: non_neg_integer(),
26 |                n_batch: non_neg_integer(),
27 |                n_threads: non_neg_integer(),
28 |                n_threads_batch: non_neg_integer(),
29 |                rope_scaling_type: integer(),
30 |                rope_freq_base: float(),
31 |                rope_freq_scale: float(),
32 |                yarn_ext_factor: float(),
33 |                yarn_attn_factor: float(),
34 |                yarn_beta_fast: float(),
35 |                yarn_beta_slow: float(),
36 |                yarn_orig_ctx: non_neg_integer(),
37 |                type_k: non_neg_integer(),
38 |                type_v: non_neg_integer(),
39 |                embedding: boolean(),
40 |                offload_kqv: boolean(),
41 |                pooling: boolean()
42 |              }
43 | end
44 | 


--------------------------------------------------------------------------------
/lib/ex_llama/embedding_options.ex:
--------------------------------------------------------------------------------
1 | defmodule ExLLama.EmbeddingOptions do
2 |   defstruct [
3 |     n_threads: 1,
4 |     n_threads_batch: 1,
5 |   ]
6 | end
7 | 


--------------------------------------------------------------------------------
/lib/ex_llama/model.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.Model do
 2 |   defstruct [
 3 |     resource: nil,
 4 |     eos: nil,
 5 |     bos: nil,
 6 |     name: nil
 7 |   ]
 8 | 
 9 |   def __eos__(model) do
10 |     {:ok, List.to_string(model.eos)}
11 |   end
12 | 
13 |   def __bos__(model) do
14 |     {:ok, List.to_string(model.bos)}
15 |   end
16 | 
17 | 
18 |   def __model_name__(model) do
19 |     {:ok, model.name}
20 |   end
21 | 
22 |   def load_from_file(path), do: ExLLama.Nif.__model_nif_load_from_file__(path, ExLLama.ModelOptions.new())
23 |   def load_from_file(path, %ExLLama.ModelOptions{} = opts), do: ExLLama.Nif.__model_nif_load_from_file__(path, opts)
24 | 
25 |   def detokenize(model, token), do: ExLLama.Nif.__model_nif_detokenize__(model, token)
26 | 
27 |   def token_to_byte_piece(model, token), do: ExLLama.Nif.__model_nif_token_to_byte_piece__(model, token)
28 | 
29 |   def token_to_piece(model, token), do: ExLLama.Nif.__model_nif_token_to_piece__(model, token)
30 | 
31 |   def decode_tokens(model, tokens), do: ExLLama.Nif.__model_nif_decode_tokens__(model, tokens)
32 | 
33 |   def create_session(model) do
34 |     with {:ok, options} <- ExLLama.Session.default_options do
35 |         create_session(model, options)
36 |     end
37 |   end
38 |   def create_session(model, options), do: ExLLama.Nif.__model_nif_create_session__(model, options)
39 | 
40 |   def embeddings(model, inputs, options), do: ExLLama.Nif.__model_nif_embeddings__(model, inputs, options)
41 | 
42 |   def bos(model), do: ExLLama.Nif.__model_nif_bos__(model)
43 | 
44 |   def eos(model), do: ExLLama.Nif.__model_nif_eos__(model)
45 | 
46 |   def nl(model), do: ExLLama.Nif.__model_nif_nl__(model)
47 | 
48 |   def infill_prefix(model), do: ExLLama.Nif.__model_nif_infill_prefix__(model)
49 | 
50 |   def infill_middle(model), do: ExLLama.Nif.__model_nif_infill_middle__(model)
51 | 
52 |   def infill_suffix(model), do: ExLLama.Nif.__model_nif_infill_suffix__(model)
53 | 
54 |   def eot(model), do: ExLLama.Nif.__model_nif_eot__(model)
55 | 
56 |   def vocabulary_size(model), do: ExLLama.Nif.__model_nif_vocabulary_size__(model)
57 | 
58 |   def embed_len(model), do: ExLLama.Nif.__model_nif_embed_len__(model)
59 | 
60 |   def train_len(model), do: ExLLama.Nif.__model_nif_train_len__(model)
61 | end
62 | 


--------------------------------------------------------------------------------
/lib/ex_llama/model_options.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.ModelOptions do
 2 |   defstruct [
 3 |     :n_gpu_layers,
 4 |     :split_mode,
 5 |     :main_gpu,
 6 |     :vocab_only,
 7 |     :use_mmap,
 8 |     :use_mlock
 9 |   ]
10 | 
11 |   @type t :: %__MODULE__{
12 |                n_gpu_layers: non_neg_integer(),
13 |                split_mode: String.t, #  :none | :layer | :row,
14 |                main_gpu: non_neg_integer(),
15 |                vocab_only: boolean(),
16 |                use_mmap: boolean(),
17 |                use_mlock: boolean()
18 |              }
19 | 
20 |   def new() do
21 |     %__MODULE__{
22 |       n_gpu_layers: 0,
23 |       split_mode: "none",
24 |       main_gpu: 0,
25 |       vocab_only: false,
26 |       use_mmap: false,
27 |       use_mlock: false
28 |     }
29 |   end
30 | end
31 | 


--------------------------------------------------------------------------------
/lib/ex_llama/nif.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.Nif do
 2 |   use Rustler,
 3 |       otp_app: :ex_llama,
 4 |       crate: :erlang_llama_cpp_nif
 5 | 
 6 |   defstruct [
 7 |     resource: nil
 8 |   ]
 9 | 
10 |   def __model_nif_load_from_file__(_,_), do: :erlang.nif_error(:nif_not_loaded)
11 |   def __model_nif_detokenize__(_, _), do: :erlang.nif_error(:nif_not_loaded)
12 |   def __model_nif_token_to_byte_piece__(_, _), do: :erlang.nif_error(:nif_not_loaded)
13 |   def __model_nif_token_to_piece__(_, _), do: :erlang.nif_error(:nif_not_loaded)
14 |   def __model_nif_decode_tokens__(_, _), do: :erlang.nif_error(:nif_not_loaded)
15 |   def __model_nif_create_session__(_, _), do: :erlang.nif_error(:nif_not_loaded)
16 |   def __model_nif_embeddings__(_, _, _), do: :erlang.nif_error(:nif_not_loaded)
17 |   def __model_nif_bos__(_), do: :erlang.nif_error(:nif_not_loaded)
18 |   def __model_nif_eos__(_), do: :erlang.nif_error(:nif_not_loaded)
19 |   def __model_nif_nl__(_), do: :erlang.nif_error(:nif_not_loaded)
20 |   def __model_nif_infill_prefix__(_), do: :erlang.nif_error(:nif_not_loaded)
21 |   def __model_nif_infill_middle__(_), do: :erlang.nif_error(:nif_not_loaded)
22 |   def __model_nif_infill_suffix__(_), do: :erlang.nif_error(:nif_not_loaded)
23 |   def __model_nif_eot__(_), do: :erlang.nif_error(:nif_not_loaded)
24 |   def __model_nif_vocabulary_size__(_), do: :erlang.nif_error(:nif_not_loaded)
25 |   def __model_nif_embed_len__(_), do: :erlang.nif_error(:nif_not_loaded)
26 |   def __model_nif_train_len__(_), do: :erlang.nif_error(:nif_not_loaded)
27 | 
28 |   def __context_nif_load_model__(_,_), do: :erlang.nif_error(:nif_not_loaded)
29 |   def __context_nif_default_session_options__(), do: :erlang.nif_error(:nif_not_loaded)
30 |   def __context_nif_create_session__(_, _), do: :erlang.nif_error(:nif_not_loaded)
31 |   def __context_nif_advance_context__(_, _), do: :erlang.nif_error(:nif_not_loaded)
32 |   def __context_nif_complete__(_,_), do: :erlang.nif_error(:nif_not_loaded)
33 | 
34 | 
35 |   def __session_nif_default_session_options__(), do: :erlang.nif_error(:nif_not_loaded)
36 |   def __session_nif_advance_context_with_tokens__(_, _), do: :erlang.nif_error(:nif_not_loaded)
37 |   def __session_nif_advance_context__(_, _), do: :erlang.nif_error(:nif_not_loaded)
38 |   def __session_nif_start_completing_with__(_pid, _session, _max_tokens), do: :erlang.nif_error(:nif_not_loaded)
39 |   def __session_nif_completion__(_, _, _), do: :erlang.nif_error(:nif_not_loaded)
40 |   def __session_nif_model__(_), do: :erlang.nif_error(:nif_not_loaded)
41 |   def __session_nif_params__(_), do: :erlang.nif_error(:nif_not_loaded)
42 |   def __session_nif_context_size__(_), do: :erlang.nif_error(:nif_not_loaded)
43 |   def __session_nif_context__(_), do: :erlang.nif_error(:nif_not_loaded)
44 |   def __session_nif_truncate_context__(_, _), do: :erlang.nif_error(:nif_not_loaded)
45 |   def __session_nif_set_context_to_tokens__(_, _), do: :erlang.nif_error(:nif_not_loaded)
46 |   def __session_nif_set_context__(_, _), do: :erlang.nif_error(:nif_not_loaded)
47 |   def __session_deep_copy__(_), do: :erlang.nif_error(:nif_not_loaded)
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | end
55 | 


--------------------------------------------------------------------------------
/lib/ex_llama/session.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.Session do
 2 |   defstruct [
 3 |     seed: nil,
 4 |     model_name: nil,
 5 |     resource: nil
 6 |   ]
 7 | 
 8 |   def default_options(), do: ExLLama.Nif.__session_nif_default_session_options__()
 9 |   def advance_context_with_tokens(%__MODULE__{resource: _} = session, context), do: ExLLama.Nif.__session_nif_advance_context_with_tokens__(session.resource, context)
10 |   def advance_context(%__MODULE__{resource: _} = session, context), do: ExLLama.Nif.__session_nif_advance_context__(session.resource, context)
11 |   def start_completing_with(%__MODULE__{resource: _} = session, options) do
12 |     # @TODO this is a little hacky, threading should be done in nif but passing env into the thread is unsupported.
13 |     max_tokens = options[:max_tokens] || 512
14 |     pid = with nil <- options[:pid] do
15 |       self()
16 |     end
17 |     spawn fn ->
18 |       o = ExLLama.Nif.__session_nif_start_completing_with__(pid, session.resource, max_tokens)
19 |       send(pid, o)
20 |     end
21 |     :ok
22 |   end
23 |   def completion(%__MODULE__{resource: _} = session, max_tokens, stop), do: ExLLama.Nif.__session_nif_completion__(session.resource, max_tokens, stop)
24 |   def model(%__MODULE__{resource: _} = session), do: ExLLama.Nif.__session_nif_model__(session.resource)
25 |   def params(%__MODULE__{resource: _} = session), do: ExLLama.Nif.__session_nif_params__(session.resource)
26 |   def context_size(%__MODULE__{resource: _} = session), do: ExLLama.Nif.__session_nif_context_size__(session.resource)
27 |   def context(%__MODULE__{resource: _} = session), do: ExLLama.Nif.__session_nif_context__(session.resource)
28 |   def truncate_context(%__MODULE__{resource: _} = session, n_tokens), do: ExLLama.Nif.__session_nif_truncate_context__(session.resource, n_tokens)
29 |   def set_context_to_tokens(%__MODULE__{resource: _} = session, tokens), do: ExLLama.Nif.__session_nif_set_context_to_tokens__(session.resource, tokens)
30 |   def set_context(%__MODULE__{resource: _} = session, context), do: ExLLama.Nif.__session_nif_set_context__(session.resource, context)
31 |   def deep_copy(%__MODULE__{resource: _} = session) do
32 |     with {:ok, copy} <- ExLLama.Nif.__session_deep_copy__(session.resource) do
33 |       {:ok, put_in(copy, [Access.key(:model_name)], session.model_name)}
34 |     end
35 |   end
36 | 
37 | end
38 | 


--------------------------------------------------------------------------------
/lib/ex_llama/session_options.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.SessionOptions do
 2 |   defstruct [
 3 |     :seed,
 4 |     :n_ctx,
 5 |     :n_batch,
 6 |     :n_threads,
 7 |     :n_threads_batch,
 8 |     :rope_scaling_type,
 9 |     :rope_freq_base,
10 |     :rope_freq_scale,
11 |     :yarn_ext_factor,
12 |     :yarn_attn_factor,
13 |     :yarn_beta_fast,
14 |     :yarn_beta_slow,
15 |     :yarn_orig_ctx,
16 |     :type_k,
17 |     :type_v,
18 |     :embedding,
19 |     :offload_kqv,
20 |     :pooling,
21 |   ]
22 | 
23 |   def new() do
24 |     {:ok, session_options} = ExLLama.Session.default_options()
25 |     session_options
26 |   end
27 |   def new(nil), do: new()
28 |   def new(%__MODULE__{} = x), do: x
29 |   def new(params) when is_list(params), do: new(Map.new(params))
30 |   def new(params) when is_map(params) do
31 |     {:ok, session_options} = ExLLama.Session.default_options()
32 |     so = Map.from_struct(session_options)
33 |     allowed_keys = Map.keys(so)
34 |     po = Map.take(params, allowed_keys)
35 |     unless po == %{} do
36 |       ExLLama.SessionOptions.__struct__(Map.merge(so, po))
37 |     else
38 |       session_options
39 |     end
40 |   end
41 | 
42 | end
43 | 


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExLLama.MixProject do
 2 |   use Mix.Project
 3 | 
 4 |   def project do
 5 |     [
 6 |       app: :ex_llama,
 7 |       name: "LLama CPP Nif Wrapper",
 8 |       description: description(),
 9 |       package: package(),
10 |       version: "0.1.0",
11 |       elixir: "~> 1.16",
12 |       start_permanent: Mix.env() == :prod,
13 |       rustler_crates: rustler_crates(),
14 |       docs: [
15 |         main: "ExLLama",
16 |         extras: [
17 |           "README.md",
18 |           "LICENSE"
19 |         ]
20 |       ],
21 |       dialyzer: [
22 |         plt_file: {:no_warn, "priv/plts/project.plt"}
23 |       ],
24 |       deps: deps()
25 |     ]
26 |   end
27 | 
28 | 
29 |   defp description() do
30 |     "NIF Wrapper around the rust LLamaCPP client allowing elixir code to load/infer against gguf format models."
31 |   end
32 | 
33 |   defp rustler_crates do
34 |     [
35 |       erlang_llama_cpp_nif: [
36 |         path: "native/erlang_llama_cpp_nif",
37 |         mode: rustc_mode(Mix.env())
38 |       ]
39 |     ]
40 |   end
41 |   
42 |   defp rustc_mode(:prod), do: :release
43 |   defp rustc_mode(_), do: :debug
44 |   
45 |   defp package() do
46 |     [
47 |       licenses: ["MIT"],
48 |       links: %{
49 |         project: "https://github.com/noizu-labs-ml/ex_llama",
50 |         developer_github: "https://github.com/noizu"
51 |       },
52 |       files: ~w(lib native priv mix.exs README.md CHANGELOG.md LICENSE*),
53 |       exclude_patterns: ["priv/models/local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"]
54 |     ]
55 |   end
56 | 
57 |   # Run "mix help compile.app" to learn about applications.
58 |   def application do
59 |     [
60 |       extra_applications: [:logger]
61 |     ]
62 |   end
63 | 
64 |   # Run "mix help deps" to learn about dependencies.
65 |   defp deps do
66 |     [
67 |       {:rustler, "~> 0.32.1", runtime: false},
68 |       {:ex_doc, "~> 0.28.3", only: [:dev, :test], optional: true, runtime: false}, # Documentation Provider
69 |       {:dialyxir, "~> 1.4", only: [:dev, :test], runtime: false},
70 |       {:genai_core, "~> 0.2"},
71 |       {:finch, "~> 0.15", optional: true},
72 |       {:elixir_uuid, "~> 1.2", optional: true},
73 |       {:shortuuid, "~> 3.0", optional: true},
74 |       
75 |       
76 |       # {:dep_from_hexpm, "~> 0.3.0"},
77 |       # {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"}
78 |     ]
79 |   end
80 | end
81 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
 1 | %{
 2 |   "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"},
 3 |   "credo": {:hex, :credo, "1.7.12", "9e3c20463de4b5f3f23721527fcaf16722ec815e70ff6c60b86412c695d426c1", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8493d45c656c5427d9c729235b99d498bd133421f3e0a683e5c1b561471291e5"},
 4 |   "dialyxir": {:hex, :dialyxir, "1.4.3", "edd0124f358f0b9e95bfe53a9fcf806d615d8f838e2202a9f430d59566b6b53b", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "bf2cfb75cd5c5006bec30141b131663299c661a864ec7fbbc72dfa557487a986"},
 5 |   "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"},
 6 |   "elixir_uuid": {:hex, :elixir_uuid, "1.2.1", "dce506597acb7e6b0daeaff52ff6a9043f5919a4c3315abb4143f0b00378c097", [:mix], [], "hexpm", "f7eba2ea6c3555cea09706492716b0d87397b88946e6380898c2889d68585752"},
 7 |   "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"},
 8 |   "ex_doc": {:hex, :ex_doc, "0.28.6", "2bbd7a143d3014fc26de9056793e97600ae8978af2ced82c2575f130b7c0d7d7", [:mix], [{:earmark_parser, "~> 1.4.19", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "bca1441614654710ba37a0e173079273d619f9160cbcc8cd04e6bd59f1ad0e29"},
 9 |   "file_system": {:hex, :file_system, "1.1.0", "08d232062284546c6c34426997dd7ef6ec9f8bbd090eb91780283c9016840e8f", [:mix], [], "hexpm", "bfcf81244f416871f2a2e15c1b515287faa5db9c6bcf290222206d120b3d43f6"},
10 |   "finch": {:hex, :finch, "0.19.0", "c644641491ea854fc5c1bbaef36bfc764e3f08e7185e1f084e35e0672241b76d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "fc5324ce209125d1e2fa0fcd2634601c52a787aff1cd33ee833664a5af4ea2b6"},
11 |   "genai_core": {:hex, :genai_core, "0.2.0", "76324ac29a30b64543dd4ed15d7513902407e0dea0bf357c145da52712d25d9a", [:mix], [{:elixir_uuid, "~> 1.2", [hex: :elixir_uuid, repo: "hexpm", optional: true]}, {:finch, "~> 0.15", [hex: :finch, repo: "hexpm", optional: true]}, {:floki, ">= 0.30.0", [hex: :floki, repo: "hexpm", optional: true]}, {:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: true]}, {:noizu_labs_core, "~> 0.1", [hex: :noizu_labs_core, repo: "hexpm", optional: false]}, {:shortuuid, "~> 3.0", [hex: :shortuuid, repo: "hexpm", optional: true]}, {:sweet_xml, "~> 0.7", [hex: :sweet_xml, repo: "hexpm", optional: true]}, {:yaml_elixir, "~> 2.9.0", [hex: :yaml_elixir, repo: "hexpm", optional: true]}, {:ymlr, "~> 4.0", [hex: :ymlr, repo: "hexpm", optional: true]}], "hexpm", "14317445578e2654e84fd647359f4e5366e7ba8d94b89bab50774993f20117b0"},
12 |   "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"},
13 |   "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"},
14 |   "makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"},
15 |   "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"},
16 |   "makeup_erlang": {:hex, :makeup_erlang, "0.1.5", "e0ff5a7c708dda34311f7522a8758e23bfcd7d8d8068dc312b5eb41c6fd76eba", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "94d2e986428585a21516d7d7149781480013c56e30c6a233534bedf38867a59a"},
17 |   "mime": {:hex, :mime, "2.0.6", "8f18486773d9b15f95f4f4f1e39b710045fa1de891fada4516559967276e4dc2", [:mix], [], "hexpm", "c9945363a6b26d747389aac3643f8e0e09d30499a138ad64fe8fd1d13d9b153e"},
18 |   "mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"},
19 |   "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"},
20 |   "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"},
21 |   "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
22 |   "noizu_labs_core": {:hex, :noizu_labs_core, "0.1.5", "881988ee5c0492a9f5d00ecf228c8df1c0621e614ccacfb22431c1454da39016", [:mix], [{:credo, "~> 1.0", [hex: :credo, repo: "hexpm", optional: false]}], "hexpm", "887dcadfbc3fd176ba8d71fae9e29c3f825f2ee7354f54cb70af3a0e3de24567"},
23 |   "rustler": {:hex, :rustler, "0.32.1", "f4cf5a39f9e85d182c0a3f75fa15b5d0add6542ab0bf9ceac6b4023109ebd3fc", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "b96be75526784f86f6587f051bc8d6f4eaff23d6e0f88dbcfe4d5871f52946f7"},
24 |   "shortuuid": {:hex, :shortuuid, "3.0.0", "028684d9eeed0ad4b800e8481afd854e1a61c526f35952455b2ee4248601e7b8", [:mix], [], "hexpm", "dfd8f80f514cbb91622cb83f4ac0d6e2f06d98cc6d4aeba94444a212289d0d39"},
25 |   "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"},
26 |   "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"},
27 | }
28 | 


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "addr2line"
  7 | version = "0.21.0"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
 10 | dependencies = [
 11 |  "gimli",
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "adler"
 16 | version = "1.0.2"
 17 | source = "registry+https://github.com/rust-lang/crates.io-index"
 18 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 19 | 
 20 | [[package]]
 21 | name = "aho-corasick"
 22 | version = "1.1.3"
 23 | source = "registry+https://github.com/rust-lang/crates.io-index"
 24 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
 25 | dependencies = [
 26 |  "memchr",
 27 | ]
 28 | 
 29 | [[package]]
 30 | name = "autocfg"
 31 | version = "1.2.0"
 32 | source = "registry+https://github.com/rust-lang/crates.io-index"
 33 | checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80"
 34 | 
 35 | [[package]]
 36 | name = "backtrace"
 37 | version = "0.3.71"
 38 | source = "registry+https://github.com/rust-lang/crates.io-index"
 39 | checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d"
 40 | dependencies = [
 41 |  "addr2line",
 42 |  "cc",
 43 |  "cfg-if",
 44 |  "libc",
 45 |  "miniz_oxide",
 46 |  "object",
 47 |  "rustc-demangle",
 48 | ]
 49 | 
 50 | [[package]]
 51 | name = "bindgen"
 52 | version = "0.69.4"
 53 | source = "registry+https://github.com/rust-lang/crates.io-index"
 54 | checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
 55 | dependencies = [
 56 |  "bitflags",
 57 |  "cexpr",
 58 |  "clang-sys",
 59 |  "itertools",
 60 |  "lazy_static",
 61 |  "lazycell",
 62 |  "log",
 63 |  "prettyplease",
 64 |  "proc-macro2",
 65 |  "quote",
 66 |  "regex",
 67 |  "rustc-hash",
 68 |  "shlex",
 69 |  "syn 2.0.58",
 70 |  "which",
 71 | ]
 72 | 
 73 | [[package]]
 74 | name = "bitflags"
 75 | version = "2.5.0"
 76 | source = "registry+https://github.com/rust-lang/crates.io-index"
 77 | checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
 78 | 
 79 | [[package]]
 80 | name = "cc"
 81 | version = "1.0.92"
 82 | source = "registry+https://github.com/rust-lang/crates.io-index"
 83 | checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41"
 84 | dependencies = [
 85 |  "jobserver",
 86 |  "libc",
 87 | ]
 88 | 
 89 | [[package]]
 90 | name = "cexpr"
 91 | version = "0.6.0"
 92 | source = "registry+https://github.com/rust-lang/crates.io-index"
 93 | checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
 94 | dependencies = [
 95 |  "nom",
 96 | ]
 97 | 
 98 | [[package]]
 99 | name = "cfg-if"
100 | version = "1.0.0"
101 | source = "registry+https://github.com/rust-lang/crates.io-index"
102 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
103 | 
104 | [[package]]
105 | name = "clang-sys"
106 | version = "1.7.0"
107 | source = "registry+https://github.com/rust-lang/crates.io-index"
108 | checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
109 | dependencies = [
110 |  "glob",
111 |  "libc",
112 |  "libloading",
113 | ]
114 | 
115 | [[package]]
116 | name = "convert_case"
117 | version = "0.4.0"
118 | source = "registry+https://github.com/rust-lang/crates.io-index"
119 | checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
120 | 
121 | [[package]]
122 | name = "derive_more"
123 | version = "0.99.17"
124 | source = "registry+https://github.com/rust-lang/crates.io-index"
125 | checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
126 | dependencies = [
127 |  "convert_case",
128 |  "proc-macro2",
129 |  "quote",
130 |  "rustc_version",
131 |  "syn 1.0.109",
132 | ]
133 | 
134 | [[package]]
135 | name = "either"
136 | version = "1.10.0"
137 | source = "registry+https://github.com/rust-lang/crates.io-index"
138 | checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
139 | 
140 | [[package]]
141 | name = "erlang_llama_cpp_nif"
142 | version = "0.0.1"
143 | dependencies = [
144 |  "llama_cpp",
145 |  "regex",
146 |  "rustler",
147 | ]
148 | 
149 | [[package]]
150 | name = "errno"
151 | version = "0.3.8"
152 | source = "registry+https://github.com/rust-lang/crates.io-index"
153 | checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
154 | dependencies = [
155 |  "libc",
156 |  "windows-sys",
157 | ]
158 | 
159 | [[package]]
160 | name = "futures"
161 | version = "0.3.30"
162 | source = "registry+https://github.com/rust-lang/crates.io-index"
163 | checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0"
164 | dependencies = [
165 |  "futures-channel",
166 |  "futures-core",
167 |  "futures-executor",
168 |  "futures-io",
169 |  "futures-sink",
170 |  "futures-task",
171 |  "futures-util",
172 | ]
173 | 
174 | [[package]]
175 | name = "futures-channel"
176 | version = "0.3.30"
177 | source = "registry+https://github.com/rust-lang/crates.io-index"
178 | checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
179 | dependencies = [
180 |  "futures-core",
181 |  "futures-sink",
182 | ]
183 | 
184 | [[package]]
185 | name = "futures-core"
186 | version = "0.3.30"
187 | source = "registry+https://github.com/rust-lang/crates.io-index"
188 | checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
189 | 
190 | [[package]]
191 | name = "futures-executor"
192 | version = "0.3.30"
193 | source = "registry+https://github.com/rust-lang/crates.io-index"
194 | checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d"
195 | dependencies = [
196 |  "futures-core",
197 |  "futures-task",
198 |  "futures-util",
199 | ]
200 | 
201 | [[package]]
202 | name = "futures-io"
203 | version = "0.3.30"
204 | source = "registry+https://github.com/rust-lang/crates.io-index"
205 | checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
206 | 
207 | [[package]]
208 | name = "futures-macro"
209 | version = "0.3.30"
210 | source = "registry+https://github.com/rust-lang/crates.io-index"
211 | checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
212 | dependencies = [
213 |  "proc-macro2",
214 |  "quote",
215 |  "syn 2.0.58",
216 | ]
217 | 
218 | [[package]]
219 | name = "futures-sink"
220 | version = "0.3.30"
221 | source = "registry+https://github.com/rust-lang/crates.io-index"
222 | checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5"
223 | 
224 | [[package]]
225 | name = "futures-task"
226 | version = "0.3.30"
227 | source = "registry+https://github.com/rust-lang/crates.io-index"
228 | checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
229 | 
230 | [[package]]
231 | name = "futures-util"
232 | version = "0.3.30"
233 | source = "registry+https://github.com/rust-lang/crates.io-index"
234 | checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
235 | dependencies = [
236 |  "futures-channel",
237 |  "futures-core",
238 |  "futures-io",
239 |  "futures-macro",
240 |  "futures-sink",
241 |  "futures-task",
242 |  "memchr",
243 |  "pin-project-lite",
244 |  "pin-utils",
245 |  "slab",
246 | ]
247 | 
248 | [[package]]
249 | name = "gimli"
250 | version = "0.28.1"
251 | source = "registry+https://github.com/rust-lang/crates.io-index"
252 | checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
253 | 
254 | [[package]]
255 | name = "glob"
256 | version = "0.3.1"
257 | source = "registry+https://github.com/rust-lang/crates.io-index"
258 | checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
259 | 
260 | [[package]]
261 | name = "heck"
262 | version = "0.5.0"
263 | source = "registry+https://github.com/rust-lang/crates.io-index"
264 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
265 | 
266 | [[package]]
267 | name = "hermit-abi"
268 | version = "0.3.9"
269 | source = "registry+https://github.com/rust-lang/crates.io-index"
270 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
271 | 
272 | [[package]]
273 | name = "home"
274 | version = "0.5.9"
275 | source = "registry+https://github.com/rust-lang/crates.io-index"
276 | checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
277 | dependencies = [
278 |  "windows-sys",
279 | ]
280 | 
281 | [[package]]
282 | name = "itertools"
283 | version = "0.12.1"
284 | source = "registry+https://github.com/rust-lang/crates.io-index"
285 | checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
286 | dependencies = [
287 |  "either",
288 | ]
289 | 
290 | [[package]]
291 | name = "jobserver"
292 | version = "0.1.29"
293 | source = "registry+https://github.com/rust-lang/crates.io-index"
294 | checksum = "f08474e32172238f2827bd160c67871cdb2801430f65c3979184dc362e3ca118"
295 | dependencies = [
296 |  "libc",
297 | ]
298 | 
299 | [[package]]
300 | name = "lazy_static"
301 | version = "1.4.0"
302 | source = "registry+https://github.com/rust-lang/crates.io-index"
303 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
304 | 
305 | [[package]]
306 | name = "lazycell"
307 | version = "1.3.0"
308 | source = "registry+https://github.com/rust-lang/crates.io-index"
309 | checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
310 | 
311 | [[package]]
312 | name = "libc"
313 | version = "0.2.153"
314 | source = "registry+https://github.com/rust-lang/crates.io-index"
315 | checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
316 | 
317 | [[package]]
318 | name = "libloading"
319 | version = "0.8.3"
320 | source = "registry+https://github.com/rust-lang/crates.io-index"
321 | checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19"
322 | dependencies = [
323 |  "cfg-if",
324 |  "windows-targets",
325 | ]
326 | 
327 | [[package]]
328 | name = "link-cplusplus"
329 | version = "1.0.9"
330 | source = "registry+https://github.com/rust-lang/crates.io-index"
331 | checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9"
332 | dependencies = [
333 |  "cc",
334 | ]
335 | 
336 | [[package]]
337 | name = "linux-raw-sys"
338 | version = "0.4.13"
339 | source = "registry+https://github.com/rust-lang/crates.io-index"
340 | checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
341 | 
342 | [[package]]
343 | name = "llama_cpp"
344 | version = "0.3.1"
345 | source = "registry+https://github.com/rust-lang/crates.io-index"
346 | checksum = "746afa27b852045c93cffefb459f883b3c0a62552101c929241dccc6563d8fe9"
347 | dependencies = [
348 |  "derive_more",
349 |  "futures",
350 |  "llama_cpp_sys",
351 |  "num_cpus",
352 |  "thiserror",
353 |  "tokio",
354 |  "tracing",
355 | ]
356 | 
357 | [[package]]
358 | name = "llama_cpp_sys"
359 | version = "0.3.1"
360 | source = "registry+https://github.com/rust-lang/crates.io-index"
361 | checksum = "b53030035eb5617fde2491c1607ff2b6107bc559e25e444163075e4281dfe43e"
362 | dependencies = [
363 |  "bindgen",
364 |  "cc",
365 |  "link-cplusplus",
366 |  "once_cell",
367 | ]
368 | 
369 | [[package]]
370 | name = "log"
371 | version = "0.4.21"
372 | source = "registry+https://github.com/rust-lang/crates.io-index"
373 | checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
374 | 
375 | [[package]]
376 | name = "memchr"
377 | version = "2.7.2"
378 | source = "registry+https://github.com/rust-lang/crates.io-index"
379 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
380 | 
381 | [[package]]
382 | name = "minimal-lexical"
383 | version = "0.2.1"
384 | source = "registry+https://github.com/rust-lang/crates.io-index"
385 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
386 | 
387 | [[package]]
388 | name = "miniz_oxide"
389 | version = "0.7.2"
390 | source = "registry+https://github.com/rust-lang/crates.io-index"
391 | checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
392 | dependencies = [
393 |  "adler",
394 | ]
395 | 
396 | [[package]]
397 | name = "nom"
398 | version = "7.1.3"
399 | source = "registry+https://github.com/rust-lang/crates.io-index"
400 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
401 | dependencies = [
402 |  "memchr",
403 |  "minimal-lexical",
404 | ]
405 | 
406 | [[package]]
407 | name = "num_cpus"
408 | version = "1.16.0"
409 | source = "registry+https://github.com/rust-lang/crates.io-index"
410 | checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
411 | dependencies = [
412 |  "hermit-abi",
413 |  "libc",
414 | ]
415 | 
416 | [[package]]
417 | name = "object"
418 | version = "0.32.2"
419 | source = "registry+https://github.com/rust-lang/crates.io-index"
420 | checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
421 | dependencies = [
422 |  "memchr",
423 | ]
424 | 
425 | [[package]]
426 | name = "once_cell"
427 | version = "1.19.0"
428 | source = "registry+https://github.com/rust-lang/crates.io-index"
429 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
430 | 
431 | [[package]]
432 | name = "pin-project-lite"
433 | version = "0.2.14"
434 | source = "registry+https://github.com/rust-lang/crates.io-index"
435 | checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02"
436 | 
437 | [[package]]
438 | name = "pin-utils"
439 | version = "0.1.0"
440 | source = "registry+https://github.com/rust-lang/crates.io-index"
441 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
442 | 
443 | [[package]]
444 | name = "prettyplease"
445 | version = "0.2.17"
446 | source = "registry+https://github.com/rust-lang/crates.io-index"
447 | checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7"
448 | dependencies = [
449 |  "proc-macro2",
450 |  "syn 2.0.58",
451 | ]
452 | 
453 | [[package]]
454 | name = "proc-macro2"
455 | version = "1.0.79"
456 | source = "registry+https://github.com/rust-lang/crates.io-index"
457 | checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
458 | dependencies = [
459 |  "unicode-ident",
460 | ]
461 | 
462 | [[package]]
463 | name = "quote"
464 | version = "1.0.36"
465 | source = "registry+https://github.com/rust-lang/crates.io-index"
466 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
467 | dependencies = [
468 |  "proc-macro2",
469 | ]
470 | 
471 | [[package]]
472 | name = "regex"
473 | version = "1.10.4"
474 | source = "registry+https://github.com/rust-lang/crates.io-index"
475 | checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
476 | dependencies = [
477 |  "aho-corasick",
478 |  "memchr",
479 |  "regex-automata",
480 |  "regex-syntax",
481 | ]
482 | 
483 | [[package]]
484 | name = "regex-automata"
485 | version = "0.4.6"
486 | source = "registry+https://github.com/rust-lang/crates.io-index"
487 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
488 | dependencies = [
489 |  "aho-corasick",
490 |  "memchr",
491 |  "regex-syntax",
492 | ]
493 | 
494 | [[package]]
495 | name = "regex-syntax"
496 | version = "0.8.3"
497 | source = "registry+https://github.com/rust-lang/crates.io-index"
498 | checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
499 | 
500 | [[package]]
501 | name = "rustc-demangle"
502 | version = "0.1.23"
503 | source = "registry+https://github.com/rust-lang/crates.io-index"
504 | checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
505 | 
506 | [[package]]
507 | name = "rustc-hash"
508 | version = "1.1.0"
509 | source = "registry+https://github.com/rust-lang/crates.io-index"
510 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
511 | 
512 | [[package]]
513 | name = "rustc_version"
514 | version = "0.4.0"
515 | source = "registry+https://github.com/rust-lang/crates.io-index"
516 | checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
517 | dependencies = [
518 |  "semver",
519 | ]
520 | 
521 | [[package]]
522 | name = "rustix"
523 | version = "0.38.32"
524 | source = "registry+https://github.com/rust-lang/crates.io-index"
525 | checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89"
526 | dependencies = [
527 |  "bitflags",
528 |  "errno",
529 |  "libc",
530 |  "linux-raw-sys",
531 |  "windows-sys",
532 | ]
533 | 
534 | [[package]]
535 | name = "rustler"
536 | version = "0.32.1"
537 | source = "registry+https://github.com/rust-lang/crates.io-index"
538 | checksum = "83c330a01eaed3ebce4708e2f1052e0676a9155c1583b8afadc69acaf6105e33"
539 | dependencies = [
540 |  "lazy_static",
541 |  "rustler_codegen",
542 |  "rustler_sys",
543 | ]
544 | 
545 | [[package]]
546 | name = "rustler_codegen"
547 | version = "0.32.1"
548 | source = "registry+https://github.com/rust-lang/crates.io-index"
549 | checksum = "28516544e4ab5fd4c6802343d9676540fbbac1489d36c0898ad8c19ac11f5be2"
550 | dependencies = [
551 |  "heck",
552 |  "proc-macro2",
553 |  "quote",
554 |  "syn 2.0.58",
555 | ]
556 | 
557 | [[package]]
558 | name = "rustler_sys"
559 | version = "2.4.0"
560 | source = "registry+https://github.com/rust-lang/crates.io-index"
561 | checksum = "39e21c0f1bc2458e29df0249e0b6a047af44303c73856c179098b6fc3700fd38"
562 | dependencies = [
563 |  "regex",
564 |  "unreachable",
565 | ]
566 | 
567 | [[package]]
568 | name = "semver"
569 | version = "1.0.22"
570 | source = "registry+https://github.com/rust-lang/crates.io-index"
571 | checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca"
572 | 
573 | [[package]]
574 | name = "shlex"
575 | version = "1.3.0"
576 | source = "registry+https://github.com/rust-lang/crates.io-index"
577 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
578 | 
579 | [[package]]
580 | name = "slab"
581 | version = "0.4.9"
582 | source = "registry+https://github.com/rust-lang/crates.io-index"
583 | checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
584 | dependencies = [
585 |  "autocfg",
586 | ]
587 | 
588 | [[package]]
589 | name = "syn"
590 | version = "1.0.109"
591 | source = "registry+https://github.com/rust-lang/crates.io-index"
592 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
593 | dependencies = [
594 |  "proc-macro2",
595 |  "quote",
596 |  "unicode-ident",
597 | ]
598 | 
599 | [[package]]
600 | name = "syn"
601 | version = "2.0.58"
602 | source = "registry+https://github.com/rust-lang/crates.io-index"
603 | checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687"
604 | dependencies = [
605 |  "proc-macro2",
606 |  "quote",
607 |  "unicode-ident",
608 | ]
609 | 
610 | [[package]]
611 | name = "thiserror"
612 | version = "1.0.58"
613 | source = "registry+https://github.com/rust-lang/crates.io-index"
614 | checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
615 | dependencies = [
616 |  "thiserror-impl",
617 | ]
618 | 
619 | [[package]]
620 | name = "thiserror-impl"
621 | version = "1.0.58"
622 | source = "registry+https://github.com/rust-lang/crates.io-index"
623 | checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
624 | dependencies = [
625 |  "proc-macro2",
626 |  "quote",
627 |  "syn 2.0.58",
628 | ]
629 | 
630 | [[package]]
631 | name = "tokio"
632 | version = "1.37.0"
633 | source = "registry+https://github.com/rust-lang/crates.io-index"
634 | checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787"
635 | dependencies = [
636 |  "backtrace",
637 |  "pin-project-lite",
638 | ]
639 | 
640 | [[package]]
641 | name = "tracing"
642 | version = "0.1.40"
643 | source = "registry+https://github.com/rust-lang/crates.io-index"
644 | checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
645 | dependencies = [
646 |  "pin-project-lite",
647 |  "tracing-attributes",
648 |  "tracing-core",
649 | ]
650 | 
651 | [[package]]
652 | name = "tracing-attributes"
653 | version = "0.1.27"
654 | source = "registry+https://github.com/rust-lang/crates.io-index"
655 | checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
656 | dependencies = [
657 |  "proc-macro2",
658 |  "quote",
659 |  "syn 2.0.58",
660 | ]
661 | 
662 | [[package]]
663 | name = "tracing-core"
664 | version = "0.1.32"
665 | source = "registry+https://github.com/rust-lang/crates.io-index"
666 | checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
667 | dependencies = [
668 |  "once_cell",
669 | ]
670 | 
671 | [[package]]
672 | name = "unicode-ident"
673 | version = "1.0.12"
674 | source = "registry+https://github.com/rust-lang/crates.io-index"
675 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
676 | 
677 | [[package]]
678 | name = "unreachable"
679 | version = "1.0.0"
680 | source = "registry+https://github.com/rust-lang/crates.io-index"
681 | checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
682 | dependencies = [
683 |  "void",
684 | ]
685 | 
686 | [[package]]
687 | name = "void"
688 | version = "1.0.2"
689 | source = "registry+https://github.com/rust-lang/crates.io-index"
690 | checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
691 | 
692 | [[package]]
693 | name = "which"
694 | version = "4.4.2"
695 | source = "registry+https://github.com/rust-lang/crates.io-index"
696 | checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
697 | dependencies = [
698 |  "either",
699 |  "home",
700 |  "once_cell",
701 |  "rustix",
702 | ]
703 | 
704 | [[package]]
705 | name = "windows-sys"
706 | version = "0.52.0"
707 | source = "registry+https://github.com/rust-lang/crates.io-index"
708 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
709 | dependencies = [
710 |  "windows-targets",
711 | ]
712 | 
713 | [[package]]
714 | name = "windows-targets"
715 | version = "0.52.4"
716 | source = "registry+https://github.com/rust-lang/crates.io-index"
717 | checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
718 | dependencies = [
719 |  "windows_aarch64_gnullvm",
720 |  "windows_aarch64_msvc",
721 |  "windows_i686_gnu",
722 |  "windows_i686_msvc",
723 |  "windows_x86_64_gnu",
724 |  "windows_x86_64_gnullvm",
725 |  "windows_x86_64_msvc",
726 | ]
727 | 
728 | [[package]]
729 | name = "windows_aarch64_gnullvm"
730 | version = "0.52.4"
731 | source = "registry+https://github.com/rust-lang/crates.io-index"
732 | checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
733 | 
734 | [[package]]
735 | name = "windows_aarch64_msvc"
736 | version = "0.52.4"
737 | source = "registry+https://github.com/rust-lang/crates.io-index"
738 | checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
739 | 
740 | [[package]]
741 | name = "windows_i686_gnu"
742 | version = "0.52.4"
743 | source = "registry+https://github.com/rust-lang/crates.io-index"
744 | checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
745 | 
746 | [[package]]
747 | name = "windows_i686_msvc"
748 | version = "0.52.4"
749 | source = "registry+https://github.com/rust-lang/crates.io-index"
750 | checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
751 | 
752 | [[package]]
753 | name = "windows_x86_64_gnu"
754 | version = "0.52.4"
755 | source = "registry+https://github.com/rust-lang/crates.io-index"
756 | checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
757 | 
758 | [[package]]
759 | name = "windows_x86_64_gnullvm"
760 | version = "0.52.4"
761 | source = "registry+https://github.com/rust-lang/crates.io-index"
762 | checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
763 | 
764 | [[package]]
765 | name = "windows_x86_64_msvc"
766 | version = "0.52.4"
767 | source = "registry+https://github.com/rust-lang/crates.io-index"
768 | checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
769 | 


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "erlang_llama_cpp_nif"
 3 | version = "0.0.1"
 4 | authors = []
 5 | edition = "2021"
 6 | 
 7 | [lib]
 8 | name = "erlang_llama_cpp_nif"
 9 | path = "src/lib.rs"
10 | crate-type = ["cdylib"]
11 | 
12 | [dependencies]
13 | rustler = "0.32.1"
14 | llama_cpp = {version = "0.3.1"}
15 | regex = "1"
16 | 


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/lib.rs:
--------------------------------------------------------------------------------
 1 | mod nifs;
 2 | mod structs;
 3 | mod refs;
 4 | use rustler::{Env, Term};
 5 | use crate::refs::model_ref::ExLLamaModelRef;
 6 | use crate::refs::session_ref::ExLLamaSessionRef;
 7 | use crate::structs::session::ExLLamaSession;
 8 | 
 9 | fn on_load(env: Env, _info: Term) -> bool {
10 |     rustler::resource!(ExLLamaModelRef, env);
11 |     rustler::resource!(ExLLamaSessionRef, env);
12 |     rustler::resource!(ExLLamaSession, env);
13 | 
14 |     true
15 | }
16 | 
17 | rustler::init!(
18 |     "Elixir.ExLLama.Nif",
19 |     [
20 |         nifs::ex_llama_model::__model_nif_load_from_file__,
21 |         nifs::ex_llama_model::__model_nif_detokenize__,
22 |         nifs::ex_llama_model::__model_nif_token_to_byte_piece__,
23 |         nifs::ex_llama_model::__model_nif_token_to_piece__,
24 |         nifs::ex_llama_model::__model_nif_decode_tokens__,
25 |         nifs::ex_llama_model::__model_nif_create_session__,
26 |         nifs::ex_llama_model::__model_nif_embeddings__,
27 |         nifs::ex_llama_model::__model_nif_bos__,
28 |         nifs::ex_llama_model::__model_nif_eos__,
29 |         nifs::ex_llama_model::__model_nif_nl__,
30 |         nifs::ex_llama_model::__model_nif_infill_prefix__,
31 |         nifs::ex_llama_model::__model_nif_infill_middle__,
32 |         nifs::ex_llama_model::__model_nif_infill_suffix__,
33 |         nifs::ex_llama_model::__model_nif_eot__,
34 |         nifs::ex_llama_model::__model_nif_vocabulary_size__,
35 |         nifs::ex_llama_model::__model_nif_embed_len__,
36 |         nifs::ex_llama_model::__model_nif_train_len__,
37 | 
38 |         nifs::ex_llama_session::__session_nif_default_session_options__,
39 |         nifs::ex_llama_session::__session_nif_advance_context_with_tokens__,
40 |         nifs::ex_llama_session::__session_nif_advance_context__,
41 |         nifs::ex_llama_session::__session_nif_start_completing_with__,
42 |         nifs::ex_llama_session::__session_nif_completion__,
43 |         nifs::ex_llama_session::__session_nif_model__,
44 |         nifs::ex_llama_session::__session_nif_params__,
45 |         nifs::ex_llama_session::__session_nif_context_size__,
46 |         nifs::ex_llama_session::__session_nif_context__,
47 |         nifs::ex_llama_session::__session_nif_truncate_context__,
48 |         nifs::ex_llama_session::__session_nif_set_context_to_tokens__,
49 |         nifs::ex_llama_session::__session_nif_set_context__,
50 |         nifs::ex_llama_session::__session_deep_copy__,
51 |         //
52 |         // nifs::ex_llama::__context_nif_load_model__,
53 |         // nifs::ex_llama::__context_nif_default_session_options__,
54 |         // nifs::ex_llama::__context_nif_create_session__,
55 |         // nifs::ex_llama::__context_nif_advance_context__,
56 |         // nifs::ex_llama::__context_nif_complete__,
57 | 
58 |     ],
59 |     load = on_load
60 | );
61 | 


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/nifs.rs:
--------------------------------------------------------------------------------
1 | pub mod ex_llama_model;
2 | pub mod ex_llama_session;


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/nifs/ex_llama_model.rs:
--------------------------------------------------------------------------------
  1 | use llama_cpp::{EmbeddingsParams, LlamaModel, LlamaParams, SessionParams, Token};
  2 | use crate::structs::embedding_options::ExLLamaEmbeddingOptions;
  3 | use crate::structs::model::ExLLamaModel;
  4 | use crate::structs::model_options::ModelOptions;
  5 | use crate::structs::session::ExLLamaSession;
  6 | use crate::structs::session_options::ExLLamaSessionOptions;
  7 | 
  8 | 
  9 | #[rustler::nif(schedule = "DirtyCpu")]
 10 | // This function creates a new instance of the ExLLama struct.
 11 | pub fn __model_nif_load_from_file__(path: String, model_options: ModelOptions) -> Result<ExLLamaModel, String> {
 12 |     let p = path.clone();
 13 |     let params = LlamaParams::from(model_options);
 14 |     let model = LlamaModel::load_from_file(path, params);
 15 |     match model {
 16 |         Ok(model) =>
 17 |             Ok(ExLLamaModel::new(p, model)),
 18 |         Err(e) =>
 19 |             Err(e.to_string()),
 20 |     }
 21 | }
 22 | 
 23 | #[rustler::nif(schedule = "DirtyCpu")]
 24 | pub fn __model_nif_detokenize__(model: ExLLamaModel, token: i32) -> Result<Vec<u8>, String> {
 25 |     let t = Token(token);
 26 |     let x = model.detokenize(t);
 27 |     let vector = Vec::from(x);
 28 |     Ok(vector)
 29 | }
 30 | 
 31 | #[rustler::nif(schedule = "DirtyCpu")]
 32 | pub fn __model_nif_token_to_byte_piece__(model: ExLLamaModel, token: i32) -> Result<Vec<u8>, String> {
 33 |     let t = Token(token);
 34 |     let x = model.token_to_byte_piece(t);
 35 |     Ok(x)
 36 | }
 37 | 
 38 | #[rustler::nif(schedule = "DirtyCpu")]
 39 | pub fn __model_nif_token_to_piece__(model: ExLLamaModel, token: i32) -> Result<String, String> {
 40 |     let t = Token(token);
 41 |     let x = model.token_to_piece(t);
 42 |     Ok(x)
 43 | }
 44 | 
 45 | #[rustler::nif(schedule = "DirtyCpu")]
 46 | pub fn __model_nif_decode_tokens__(model: ExLLamaModel, tokens: Vec<i32>) -> Result<String, String> {
 47 |     let tokens: Vec<Token> = tokens.into_iter().map(Token).collect();
 48 |     let x = model.decode_tokens(tokens);
 49 |     Ok(x)
 50 | }
 51 | 
 52 | 
 53 | #[rustler::nif(schedule = "DirtyCpu")]
 54 | pub fn __model_nif_create_session__(model: ExLLamaModel, options: ExLLamaSessionOptions) -> Result<ExLLamaSession, String> {
 55 |     let seed = options.seed.clone();
 56 |     let opts = SessionParams::from(options);
 57 |     let ctx = model.create_session(opts);
 58 |     match ctx {
 59 |         Ok(session) =>
 60 |             Ok(ExLLamaSession::new(model.name, seed, session)),
 61 |         Err(e) => return Err(e.to_string()),
 62 |     }
 63 | }
 64 | 
 65 | 
 66 | #[rustler::nif(schedule = "DirtyCpu")]
 67 | pub fn __model_nif_embeddings__(model: ExLLamaModel, inputs: String, options: ExLLamaEmbeddingOptions) -> Result<Vec<Vec<f32>>, String> {
 68 |     let options = EmbeddingsParams::from(options);
 69 |     let vec_of_vec = vec![inputs.as_bytes()];
 70 |     let response = model.embeddings(&vec_of_vec, options);
 71 |     match response {
 72 |         Ok(value) =>
 73 |             Ok(value),
 74 |         Err(e) => return Err(e.to_string()),
 75 |     }
 76 | }
 77 | 
 78 | // @TODO embeddings_async
 79 | 
 80 | #[rustler::nif(schedule = "DirtyCpu")]
 81 | pub fn __model_nif_bos__(model: ExLLamaModel) -> Result<i32, ()> {
 82 |     let x = model.bos();
 83 |     Ok(x.0)
 84 | }
 85 | 
 86 | #[rustler::nif(schedule = "DirtyCpu")]
 87 | pub fn __model_nif_eos__(model: ExLLamaModel) -> Result<i32, ()> {
 88 |     let x = model.eos();
 89 |     Ok(x.0)
 90 | }
 91 | 
 92 | #[rustler::nif(schedule = "DirtyCpu")]
 93 | pub fn __model_nif_nl__(model: ExLLamaModel) -> Result<i32, ()> {
 94 |     let x = model.nl();
 95 |     Ok(x.0)
 96 | }
 97 | 
 98 | #[rustler::nif(schedule = "DirtyCpu")]
 99 | pub fn __model_nif_infill_prefix__(model: ExLLamaModel) -> Result<i32, ()> {
100 |     let x = model.infill_prefix();
101 |     Ok(x.0)
102 | }
103 | 
104 | 
105 | #[rustler::nif(schedule = "DirtyCpu")]
106 | pub fn __model_nif_infill_middle__(model: ExLLamaModel) -> Result<i32, ()> {
107 |     let x = model.infill_middle();
108 |     Ok(x.0)
109 | }
110 | 
111 | 
112 | #[rustler::nif(schedule = "DirtyCpu")]
113 | pub fn __model_nif_infill_suffix__(model: ExLLamaModel) -> Result<i32, ()> {
114 |     let x = model.infill_suffix();
115 |     Ok(x.0)
116 | }
117 | 
118 | 
119 | #[rustler::nif(schedule = "DirtyCpu")]
120 | pub fn __model_nif_eot__(model: ExLLamaModel) -> Result<i32, ()> {
121 |     let x = model.eot();
122 |     Ok(x.0)
123 | }
124 | 
125 | 
126 | #[rustler::nif(schedule = "DirtyCpu")]
127 | pub fn __model_nif_vocabulary_size__(model: ExLLamaModel) -> Result<usize, ()> {
128 |     let x = model.vocabulary_size();
129 |     Ok(x)
130 | }
131 | 
132 | 
133 | #[rustler::nif(schedule = "DirtyCpu")]
134 | pub fn __model_nif_embed_len__(model: ExLLamaModel) -> Result<usize, ()> {
135 |     let x = model.embed_len();
136 |     Ok(x)
137 | }
138 | 
139 | #[rustler::nif(schedule = "DirtyCpu")]
140 | pub fn __model_nif_train_len__(model: ExLLamaModel) -> Result<usize, ()> {
141 |     let x = model.train_len();
142 |     Ok(x)
143 | }
144 | 


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/nifs/ex_llama_session.rs:
--------------------------------------------------------------------------------
  1 | use llama_cpp::standard_sampler::StandardSampler;
  2 | use llama_cpp::{SessionParams, Token};
  3 | use regex::Regex;
  4 | use rustler::{Env, ResourceArc};
  5 | use rustler::types::Pid;
  6 | use crate::refs::session_ref::ExLLamaSessionRef;
  7 | use crate::structs::completion::ExLLamaCompletion;
  8 | use crate::structs::model::ExLLamaModel;
  9 | use crate::structs::session::ExLLamaSession;
 10 | use crate::structs::session_options::ExLLamaSessionOptions;
 11 | 
 12 | 
 13 | #[rustler::nif(schedule = "DirtyCpu")]
 14 | pub fn __session_nif_default_session_options__() -> Result<ExLLamaSessionOptions, String> {
 15 |     let r = ExLLamaSessionOptions::from(SessionParams::default());
 16 |     Ok(r)
 17 | }
 18 | 
 19 | #[rustler::nif(schedule = "DirtyCpu")]
 20 | pub fn __session_nif_advance_context_with_tokens__(session:  ResourceArc<ExLLamaSessionRef>, context: Vec<i32>) -> Result<&'static str, String> {
 21 |     let mut ctx = session.0.lock().expect("Locking the session failed");
 22 |     let tokens: Vec<Token> = context.into_iter().map(Token).collect();
 23 |     let result = ctx.advance_context_with_tokens(tokens);
 24 |     match result {
 25 |         Ok(_) => Ok("OK"),
 26 |         Err(e) => Err(e.to_string())
 27 |     }
 28 | }
 29 | 
 30 | #[rustler::nif(schedule = "DirtyCpu")]
 31 | pub fn __session_nif_advance_context__(session:  ResourceArc<ExLLamaSessionRef>, context: String) -> Result<&'static str,String> {
 32 |     let mut ctx = session.0.lock().expect("Locking the session failed");
 33 |     let result = ctx.advance_context(context);
 34 |     match result {
 35 |         Ok(_) => Ok("OK"),
 36 |         Err(e) => Err(e.to_string())
 37 |     }
 38 | }
 39 | 
 40 | // start_completing
 41 | 
 42 | #[rustler::nif(schedule = "DirtyCpu")]
 43 | pub fn __session_nif_start_completing_with__(env: Env, pid: Pid, session:  ResourceArc<ExLLamaSessionRef>, max_predictions: usize) -> Result<&'static str,String> {
 44 |     let lock = session.0.lock().expect("Locking the session failed");
 45 |     let c = lock.deep_copy()  ;
 46 |     match c {
 47 |         Ok(ctx) => {
 48 |             let mut pid = pid;
 49 |             let mut ctx = ctx;
 50 |             let handle = ctx.start_completing_with(StandardSampler::default(), max_predictions);
 51 |             let i = handle.into_strings();
 52 |             for completion in i {
 53 |                 //let gen_completion = rustler::types::tuple::make_tuple(&[rustler::types::atom::from_str("gen"), completion]);
 54 |                 env.send(&pid, completion).expect("Encoding completion failed");
 55 |             }
 56 |             let fin = rustler::types::Atom::from_str(env, "fin").expect("Encoding completion failed");
 57 |             env.send(&pid, fin).expect("Encoding completion failed");
 58 |             Ok("OK")
 59 |         },
 60 |         Err(e) => Err(e.to_string())
 61 |     }
 62 | }
 63 | 
 64 | 
 65 | #[rustler::nif(schedule = "DirtyCpu")]
 66 | pub fn __session_nif_completion__(session:  ResourceArc<ExLLamaSessionRef>, max_predictions: usize, stop: Option<String>) -> Result<ExLLamaCompletion,String> {
 67 |     let lock = session.0.lock().expect("Locking the session failed");
 68 |     let c = lock.deep_copy()  ;
 69 |     match c {
 70 |         Ok(ctx) => {
 71 |             let mut ctx = ctx;
 72 |             let prompt_size = ctx.context_size();
 73 |             let completions = ctx.start_completing_with(StandardSampler::default(), max_predictions).into_strings();
 74 |             let mut completions_str = String::new();
 75 | 
 76 |             match stop {
 77 |                 Some(x) => {
 78 |                     let pattern = Regex::new(&x).unwrap(); // Compile the regex, handle errors as needed
 79 |                     for completion in completions {
 80 |                         completions_str.push_str(&completion);
 81 |                         if let Some(mat) = pattern.find(&completions_str) {
 82 |                             completions_str.truncate(mat.end());
 83 |                             break;
 84 |                         }
 85 |                     }
 86 |                 },
 87 |                 None => {
 88 |                     for completion in completions {
 89 |                         completions_str.push_str(&completion);
 90 |                     }
 91 |                 }
 92 |             }
 93 |             Ok(ExLLamaCompletion::new(completions_str, ctx.context_size() - prompt_size))
 94 |         },
 95 |         Err(e) => Err(e.to_string())
 96 |     }
 97 | }
 98 | 
 99 | #[rustler::nif(schedule = "DirtyCpu")]
100 | pub fn __session_nif_model__(session:  ResourceArc<ExLLamaSessionRef>) -> Result<ExLLamaModel,String> {
101 |     let ctx = session.0.lock().expect("Locking the session failed");
102 |     let model = ctx.model();
103 |     let wrapper = ExLLamaModel::new("...".to_string(), model);
104 |     Ok(wrapper)
105 | }
106 | 
107 | #[rustler::nif(schedule = "DirtyCpu")]
108 | pub fn __session_nif_params__(session:  ResourceArc<ExLLamaSessionRef>) -> Result<ExLLamaSessionOptions,String> {
109 |     let ctx = session.0.lock().expect("Locking the session failed");
110 |     let params = ctx.params();
111 |     let wrapper = ExLLamaSessionOptions::from(params);
112 |     Ok(wrapper)
113 | }
114 | 
115 | #[rustler::nif(schedule = "DirtyCpu")]
116 | pub fn __session_nif_context_size__(session:  ResourceArc<ExLLamaSessionRef>) -> Result<usize, String> {
117 |     let ctx = session.0.lock().expect("Locking the session failed");
118 |     let result = ctx.context_size();
119 |     Ok(result)
120 | }
121 | 
122 | #[rustler::nif(schedule = "DirtyCpu")]
123 | pub fn __session_nif_context__(session:  ResourceArc<ExLLamaSessionRef>) -> Result<Vec<i32>, String> {
124 |     let ctx = session.0.lock().expect("Locking the session failed");
125 |     let result = ctx.context();
126 |     let tokens: Vec<i32> = result.into_iter().map(|x| x.0).collect();
127 |     Ok(tokens)
128 | }
129 | 
130 | #[rustler::nif(schedule = "DirtyCpu")]
131 | pub fn __session_nif_truncate_context__(session:  ResourceArc<ExLLamaSessionRef>, n_tokens: usize) -> Result<&'static str, String> {
132 |     let ctx = session.0.lock().expect("Locking the session failed");
133 |     ctx.truncate_context(n_tokens);
134 |     Ok("OK")
135 | }
136 | 
137 | #[rustler::nif(schedule = "DirtyCpu")]
138 | pub fn __session_nif_set_context_to_tokens__(session:  ResourceArc<ExLLamaSessionRef>, context: Vec<i32>) -> Result<&'static str,String> {
139 |     let mut ctx = session.0.lock().expect("Locking the session failed");
140 |     let tokens: Vec<Token> = context.into_iter().map(Token).collect();
141 |     let result = ctx.set_context_to_tokens(tokens);
142 |     match result {
143 |         Ok(_) => Ok("OK"),
144 |         Err(e) => Err(e.to_string())
145 |     }
146 | }
147 | 
148 | #[rustler::nif(schedule = "DirtyCpu")]
149 | pub fn __session_nif_set_context__(session:  ResourceArc<ExLLamaSessionRef>, context: String) -> Result<&'static str,String> {
150 |     let mut ctx = session.0.lock().expect("Locking the session failed");
151 |     let result = ctx.set_context(context);
152 |     match result {
153 |         Ok(_) => Ok("OK"),
154 |         Err(e) => Err(e.to_string())
155 |     }
156 | }
157 | 
158 | #[rustler::nif(schedule = "DirtyCpu")]
159 | pub fn __session_deep_copy__(session:  ResourceArc<ExLLamaSessionRef>) -> Result<ExLLamaSession, String> {
160 |     let ctx = session.0.lock().expect("Locking the session failed");
161 |     let result = ctx.deep_copy();
162 |     match result {
163 |         Ok(session) =>
164 |             Ok(ExLLamaSession::new("".to_string(), session.params().seed, session)),
165 |         Err(e) => return Err(e.to_string()),
166 |     }
167 | }
168 | 


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/refs.rs:
--------------------------------------------------------------------------------
1 | pub mod model_ref;
2 | pub mod session_ref;
3 | 


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/refs/model_ref.rs:
--------------------------------------------------------------------------------
 1 | // This module contains the Ref Wrapper structs which are a wrapper around the LLama structs from the llama_cpp crate.
 2 | // These ref structs are used to ensure safe concurrent access to the LLama objects.
 3 | // These ref structs is also marked as Send and Sync, allowing it to be shared across threads.
 4 | 
 5 | 
 6 | use llama_cpp::{LlamaModel};
 7 | 
 8 | 
 9 | // This struct is used to create a reference to the LLamaModel object.
10 | pub struct ExLLamaModelRef(pub LlamaModel);
11 | 
12 | 
13 | impl ExLLamaModelRef {
14 |     pub fn new(llama: LlamaModel) -> Self {
15 |         Self(llama)
16 |     }
17 | }
18 | 
19 | impl Drop for ExLLamaModelRef {
20 |     fn drop(&mut self) {
21 |         // Log or print a message indicating the resource is being dropped.
22 |         // println!("Dropping ExLLamaModelRef");
23 |     }
24 | }
25 | unsafe impl Send for ExLLamaModelRef {}
26 | unsafe impl Sync for ExLLamaModelRef {}
27 | 


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/refs/session_ref.rs:
--------------------------------------------------------------------------------
 1 | // This module contains the Ref Wrapper structs which are a wrapper around the LLama structs from the llama_cpp crate.
 2 | // These ref structs are used to ensure safe concurrent access to the LLama objects.
 3 | // These ref structs is also marked as Send and Sync, allowing it to be shared across threads.
 4 | 
 5 | 
 6 | use std::sync::Mutex;
 7 | use llama_cpp::{LlamaSession};
 8 | 
 9 | pub struct ExLLamaSessionRef(pub Mutex<LlamaSession>);
10 | 
11 | impl ExLLamaSessionRef {
12 |     pub fn new(session: LlamaSession) -> Self {
13 |         Self(Mutex::new(session))
14 |     }
15 | }
16 | 
17 | impl Drop for ExLLamaSessionRef {
18 |     fn drop(&mut self) {
19 |         // println!("Dropping ExLLamaSessionRef");
20 |     }
21 | }
22 | 
23 | unsafe impl Send for ExLLamaSessionRef {}
24 | unsafe impl Sync for ExLLamaSessionRef {}
25 | 


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/structs.rs:
--------------------------------------------------------------------------------
1 | pub mod model_options;
2 | pub mod model;
3 | pub mod session_options;
4 | pub mod session;
5 | 
6 | pub mod embedding_options;
7 | 
8 | pub mod completion;


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/structs/completion.rs:
--------------------------------------------------------------------------------
 1 | // This module contains the ExLLama struct which is a wrapper around the LLama struct from the llama_cpp_rs crate.
 2 | // The ExLLama struct includes a ResourceArc to the ExLLamaRef struct, which is used to ensure safe concurrent access.
 3 | // The ExLLama struct also implements the Deref trait to allow it to be treated as a LLama object.
 4 | 
 5 | 
 6 | use llama_cpp::{LlamaSession};
 7 | use rustler::{NifStruct, ResourceArc};
 8 | use crate::refs::session_ref::ExLLamaSessionRef;
 9 | 
10 | 
11 | #[derive(NifStruct)]
12 | #[module = "ExLLama.Completion"]
13 | pub struct ExLLamaCompletion {
14 |     pub content: String,
15 |     pub token_length: usize,
16 | }
17 | 
18 | impl ExLLamaCompletion {
19 |     pub fn new(content: String, token_length: usize) -> Self {
20 |         Self {
21 |             content: content,
22 |             token_length: token_length
23 |         }
24 |     }
25 |     //
26 |     // // Provide a method to access the mutex protected session
27 |     // pub fn lock_session(&self) -> std::sync::MutexGuard<'_, LlamaSession> {
28 |     //     self.resource.0.lock().expect("Locking the session failed")
29 |     // }
30 | }


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/structs/embedding_options.rs:
--------------------------------------------------------------------------------
 1 | use llama_cpp::EmbeddingsParams;
 2 | use rustler::NifStruct;
 3 | 
 4 | #[derive(NifStruct)]
 5 | #[module = "ExLLama.EmbeddingOptions"]
 6 | pub struct ExLLamaEmbeddingOptions {
 7 |     pub n_threads: u32,
 8 |     pub n_threads_batch: u32,
 9 | }
10 | 
11 | impl From<ExLLamaEmbeddingOptions> for EmbeddingsParams {
12 |     fn from(value: ExLLamaEmbeddingOptions) -> Self {
13 |         Self {
14 |             n_threads: value.n_threads,
15 |             n_threads_batch: value.n_threads_batch,
16 |         }
17 |     }
18 | }
19 | 
20 | 
21 | impl From<EmbeddingsParams> for ExLLamaEmbeddingOptions{
22 |     fn from(value: EmbeddingsParams) -> Self {
23 |         Self {
24 |             n_threads: value.n_threads,
25 |             n_threads_batch: value.n_threads_batch,
26 |         }
27 |     }
28 | }


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/structs/model.rs:
--------------------------------------------------------------------------------
 1 | // This module contains the ExLLama struct which is a wrapper around the LLama struct from the llama_cpp_rs crate.
 2 | // The ExLLama struct includes a ResourceArc to the ExLLamaRef struct, which is used to ensure safe concurrent access.
 3 | // The ExLLama struct also implements the Deref trait to allow it to be treated as a LLama object.
 4 | 
 5 | use std::ops::Deref;
 6 | use llama_cpp::{LlamaModel};
 7 | use rustler::{NifStruct, ResourceArc};
 8 | use crate::refs::model_ref::ExLLamaModelRef;
 9 | 
10 | 
11 | #[derive(NifStruct)]
12 | #[module = "ExLLama.Model"]
13 | // This struct is used to create a resource that can be passed between Elixir and Rust.
14 | pub struct ExLLamaModel {
15 |     pub resource: ResourceArc<ExLLamaModelRef>,
16 |     pub name: String,
17 |     pub eos: Vec<u8>,
18 |     pub bos: Vec<u8>
19 | }
20 | 
21 | // This implementation of ExLLama creates a new instance of the ExLLama struct.
22 | impl ExLLamaModel {
23 |     pub fn new(name: String, llama: LlamaModel) -> Self {
24 |         Self {
25 |             name: name,
26 |             eos: llama.detokenize(llama.eos()).to_vec(),
27 |             bos: llama.detokenize(llama.bos()).to_vec(),
28 |             resource: ResourceArc::new(ExLLamaModelRef::new(llama)),
29 |         }
30 |     }
31 | }
32 | 
33 | // This implementation of Deref allows the ExLLama struct to be treated as a LLama object.
34 | impl Deref for ExLLamaModel {
35 |     type Target = LlamaModel;
36 | 
37 |     fn deref(&self) -> &Self::Target {
38 |         &self.resource.0
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/structs/model_options.rs:
--------------------------------------------------------------------------------
 1 | use llama_cpp::{LlamaParams, SplitMode};
 2 | use rustler::{NifStruct};
 3 | //use rustler::types::atom;
 4 | 
 5 | #[derive(NifStruct)]
 6 | #[module = "ExLLama.ModelOptions"]
 7 | pub struct ModelOptions {
 8 |     pub n_gpu_layers: u32,
 9 |     pub split_mode: String,
10 |     pub main_gpu: u32,
11 |     pub vocab_only: bool,
12 |     pub use_mmap: bool,
13 |     pub use_mlock: bool,
14 | }
15 | 
16 | impl From<ModelOptions> for LlamaParams {
17 |     fn from(value: ModelOptions) -> Self {
18 |         // Map the string values to the corresponding SplitMode enum values
19 |         let split_mode = match value.split_mode.as_str() {
20 |             "none" => SplitMode::None,
21 |             "layer" => SplitMode::Layer,
22 |             "row" => SplitMode::Row,
23 |             _ => panic!("Invalid split_mode value"),
24 |         };
25 | 
26 |         Self {
27 |             n_gpu_layers: value.n_gpu_layers,
28 |             split_mode: split_mode,
29 |             main_gpu: value.main_gpu,
30 |             vocab_only: value.vocab_only,
31 |             use_mmap: value.use_mmap,
32 |             use_mlock: value.use_mlock,
33 |         }
34 |     }
35 | }


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/structs/session.rs:
--------------------------------------------------------------------------------
 1 | // This module contains the ExLLama struct which is a wrapper around the LLama struct from the llama_cpp_rs crate.
 2 | // The ExLLama struct includes a ResourceArc to the ExLLamaRef struct, which is used to ensure safe concurrent access.
 3 | // The ExLLama struct also implements the Deref trait to allow it to be treated as a LLama object.
 4 | 
 5 | 
 6 | use llama_cpp::{LlamaSession};
 7 | use rustler::{NifStruct, ResourceArc};
 8 | use crate::refs::session_ref::ExLLamaSessionRef;
 9 | 
10 | 
11 | #[derive(NifStruct)]
12 | #[module = "ExLLama.Session"]
13 | pub struct ExLLamaSession {
14 |     pub model_name: String,
15 |     pub seed: u32,
16 |     pub resource: ResourceArc<ExLLamaSessionRef>,
17 | }
18 | 
19 | impl ExLLamaSession {
20 |     pub fn new(model_name: String, seed: u32, session: LlamaSession) -> Self {
21 |         Self {
22 |             model_name: model_name,
23 |             seed: seed,
24 |             resource: ResourceArc::new(ExLLamaSessionRef::new(session)),
25 |         }
26 |     }
27 |     //
28 |     // // Provide a method to access the mutex protected session
29 |     // pub fn lock_session(&self) -> std::sync::MutexGuard<'_, LlamaSession> {
30 |     //     self.resource.0.lock().expect("Locking the session failed")
31 |     // }
32 | }


--------------------------------------------------------------------------------
/native/erlang_llama_cpp_nif/src/structs/session_options.rs:
--------------------------------------------------------------------------------
  1 | use llama_cpp::SessionParams;
  2 | use rustler::NifStruct;
  3 | 
  4 | #[derive(NifStruct)]
  5 | #[module = "ExLLama.SessionOptions"]
  6 | pub struct ExLLamaSessionOptions {
  7 |     pub seed: u32,
  8 |     pub n_ctx: u32,
  9 |     pub n_batch: u32,
 10 |     pub n_threads: u32,
 11 |     pub n_threads_batch: u32,
 12 |     pub rope_scaling_type: i32,
 13 |     pub rope_freq_base: f32,
 14 |     pub rope_freq_scale: f32,
 15 |     pub yarn_ext_factor: f32,
 16 |     pub yarn_attn_factor: f32,
 17 |     pub yarn_beta_fast: f32,
 18 |     pub yarn_beta_slow: f32,
 19 |     pub yarn_orig_ctx: u32,
 20 |     pub type_k: u32,
 21 |     pub type_v: u32,
 22 |     pub embedding: bool,
 23 |     pub offload_kqv: bool,
 24 |     pub pooling: bool,
 25 | }
 26 | 
 27 | impl From<ExLLamaSessionOptions> for SessionParams {
 28 |     fn from(value: ExLLamaSessionOptions) -> Self {
 29 |         Self {
 30 |             seed: value.seed,
 31 |             n_ctx: value.n_ctx,
 32 |             n_batch: value.n_batch,
 33 |             n_threads: value.n_threads,
 34 |             n_threads_batch: value.n_threads_batch,
 35 |             rope_scaling_type: value.rope_scaling_type,
 36 |             rope_freq_base: value.rope_freq_base,
 37 |             rope_freq_scale: value.rope_freq_scale,
 38 |             yarn_ext_factor: value.yarn_ext_factor,
 39 |             yarn_attn_factor: value.yarn_attn_factor,
 40 |             yarn_beta_fast: value.yarn_beta_fast,
 41 |             yarn_beta_slow: value.yarn_beta_slow,
 42 |             yarn_orig_ctx: value.yarn_orig_ctx,
 43 |             type_k: value.type_k,
 44 |             type_v: value.type_v,
 45 |             embedding: value.embedding,
 46 |             offload_kqv: value.offload_kqv,
 47 |             pooling: value.pooling,
 48 |         }
 49 |     }
 50 | }
 51 | 
 52 | 
 53 | impl From<SessionParams> for ExLLamaSessionOptions{
 54 |     fn from(value: SessionParams) -> Self {
 55 |         Self {
 56 |             seed: value.seed,
 57 |             n_ctx: value.n_ctx,
 58 |             n_batch: value.n_batch,
 59 |             n_threads: value.n_threads,
 60 |             n_threads_batch: value.n_threads_batch,
 61 |             rope_scaling_type: value.rope_scaling_type,
 62 |             rope_freq_base: value.rope_freq_base,
 63 |             rope_freq_scale: value.rope_freq_scale,
 64 |             yarn_ext_factor: value.yarn_ext_factor,
 65 |             yarn_attn_factor: value.yarn_attn_factor,
 66 |             yarn_beta_fast: value.yarn_beta_fast,
 67 |             yarn_beta_slow: value.yarn_beta_slow,
 68 |             yarn_orig_ctx: value.yarn_orig_ctx,
 69 |             type_k: value.type_k,
 70 |             type_v: value.type_v,
 71 |             embedding: value.embedding,
 72 |             offload_kqv: value.offload_kqv,
 73 |             pooling: value.pooling,
 74 |         }
 75 |     }
 76 | }
 77 | 
 78 | 
 79 | impl From<&SessionParams> for ExLLamaSessionOptions{
 80 |     fn from(value: &SessionParams) -> Self {
 81 |         Self {
 82 |             seed: value.seed,
 83 |             n_ctx: value.n_ctx,
 84 |             n_batch: value.n_batch,
 85 |             n_threads: value.n_threads,
 86 |             n_threads_batch: value.n_threads_batch,
 87 |             rope_scaling_type: value.rope_scaling_type,
 88 |             rope_freq_base: value.rope_freq_base,
 89 |             rope_freq_scale: value.rope_freq_scale,
 90 |             yarn_ext_factor: value.yarn_ext_factor,
 91 |             yarn_attn_factor: value.yarn_attn_factor,
 92 |             yarn_beta_fast: value.yarn_beta_fast,
 93 |             yarn_beta_slow: value.yarn_beta_slow,
 94 |             yarn_orig_ctx: value.yarn_orig_ctx,
 95 |             type_k: value.type_k,
 96 |             type_v: value.type_v,
 97 |             embedding: value.embedding,
 98 |             offload_kqv: value.offload_kqv,
 99 |             pooling: value.pooling,
100 |         }
101 |     }
102 | }


--------------------------------------------------------------------------------
/priv/models/local_llama/tiny_llama/.gitignore:
--------------------------------------------------------------------------------
1 | *.gguf
2 | 


--------------------------------------------------------------------------------
/priv/models/local_llama/tiny_llama/init.sh:
--------------------------------------------------------------------------------
1 | wget -O tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf  https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf?download=true
2 | 


--------------------------------------------------------------------------------
/test/ex_llama_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule ExLLamaTest do
  2 |   use ExUnit.Case
  3 | 
  4 |   defp priv_dir() do
  5 |     :code.priv_dir(:ex_llama)
  6 |     |> List.to_string()
  7 |   end
  8 |   
  9 |   defp load_model(path) do
 10 |     file = priv_dir() <> "/models/" <> path
 11 |     ExLLama.load_model(file)
 12 |   end
 13 |   
 14 |   test "Default Session Options" do
 15 |       {:ok, sut} = ExLLama.Session.default_options()
 16 |       assert sut.__struct__ == ExLLama.SessionOptions
 17 |   end
 18 | 
 19 |   test "Create Session" do
 20 |     {:ok, llama} = load_model("local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
 21 |     {:ok, session} = ExLLama.create_session(llama)
 22 |     assert session.__struct__ == ExLLama.Session
 23 |   end
 24 | 
 25 |   test "Load Model" do
 26 |     {:ok, llama} = load_model("local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
 27 |     assert llama.__struct__ == ExLLama.Model
 28 |   end
 29 | 
 30 |   def receive_text(acc \\ []) do
 31 |     receive do
 32 |       x = {:ok, _} -> Enum.reverse([x|acc])
 33 |       x = {:error, _} -> Enum.reverse([x|acc])
 34 |       :fin ->
 35 |         Enum.reverse(acc)
 36 |       x ->
 37 |         receive_text([x | acc])
 38 |     end
 39 |   end
 40 | 
 41 |   test "Async complete_with" do
 42 |     {:ok, llama} = load_model("local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
 43 |     {:ok, options} = ExLLama.Session.default_options()
 44 |     {:ok, session} = ExLLama.create_session(llama, %{options| seed: 2})
 45 |     ExLLama.advance_context(session, "<|user|>\n Say Hello. And only hello. Example \"Hello\".</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Repeat what you just said.</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Say Goodbye.</s>\n<|assistant|>\n")
 46 |     ExLLama.Session.start_completing_with(session, %{max_tokens: 512})
 47 |     r = receive_text()
 48 |     assert r == [" Good", "bye", "</", "s", ">", ""]
 49 |   end
 50 | 
 51 |   test "Advance Context" do
 52 |     {:ok, llama} = load_model("local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
 53 |     {:ok, options} = ExLLama.Session.default_options()
 54 | 
 55 |     {:ok, session} = ExLLama.create_session(llama, %{options| seed: 2})
 56 |     ExLLama.advance_context(session, "<|user|>\n Say Hello. And only hello. Example \"Hello\".</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Repeat what you just said.</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Say Goodbye.</s>\n<|assistant|>\n")
 57 |     {:ok, context} = ExLLama.Session.context(session)
 58 |     {:ok, as_str} = ExLLama.Model.decode_tokens(llama, context)
 59 |     # There is a bug in advance_context in llama_cpp that injects a space
 60 |     assert as_str == " <|user|>\n Say Hello. And only hello. Example \"Hello\".</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Repeat what you just said.</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Say Goodbye.</s>\n<|assistant|>\n"
 61 |     {:ok, %{content: response}} = ExLLama.completion(session, 512, "</s>\n*")
 62 |     response = String.trim_leading(response)
 63 |      ExLLama.advance_context(session, response <> "\n<|user|>\n Say Apple.</s>\n<|assistant|>\n")
 64 |     {:ok, %{content: response}} = ExLLama.completion(session, 512, "</s>\n*")
 65 |     response = String.trim_leading(response)
 66 |     ExLLama.advance_context(session, response <> "\n<|user|>\n What did you just say?.</s>\n<|assistant|>\n")
 67 |     {:ok, %{content: response}} = ExLLama.completion(session, 512, "</s>\n*")
 68 |     response = String.trim_leading(response)
 69 |    assert response =~ "Apple"
 70 |     {:ok, context} = ExLLama.Session.context(session)
 71 |     {:ok, as_str} = ExLLama.Model.decode_tokens(llama, context)
 72 |     assert as_str == " <|user|>\n Say Hello. And only hello. Example \"Hello\".</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Repeat what you just said.</s>\n<|assistant|>\n Hello</s>\n<|user|>\n Say Goodbye.</s>\n<|assistant|>\n Goodbye</s>\n<|user|>\n Say Apple.</s>\n<|assistant|>\n Apple</s>\n<|user|>\n What did you just say?.</s>\n<|assistant|>\n"
 73 |   end
 74 | 
 75 |   test  "Chat Completion" do
 76 |     {:ok, llama} = load_model("local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
 77 |     thread = [
 78 |       %{role: :user, content: "Say Hello. And only hello. Example \"Hello\"."},
 79 |       %{role: :assistant, content: "Hello"},
 80 |       %{role: :user, content: "Repeat what you just said."},
 81 |       %{role: :assistant, content: "Hello"},
 82 |       %{role: :user, content: "Say Goodbye."},
 83 |       %{role: :assistant, content: "Goodbye"},
 84 |       %{role: :user, content: "Say Apple."},
 85 |       %{role: :assistant, content: "Apple"},
 86 |       %{role: :user, content: "What did you just say?."},
 87 |     ]
 88 | 
 89 |     # After stripping </s> completion_tokens are actually 3, although it's useful to know how many tokens were generated.
 90 |     {:ok, response} = ExLLama.chat_completion(llama, thread, [seed: 2, choices: 2])
 91 |     expected_path = priv_dir() <> "/models/local_llama/tiny_llama/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
 92 |     assert = %GenAI.ChatCompletion{
 93 |              choices: [
 94 |                %GenAI.ChatCompletion.Choice{finish_reason: :stop, index: 0, message: choice_a},
 95 |                %GenAI.ChatCompletion.Choice{finish_reason: :stop, index: 1, message: choice_b}
 96 |              ],
 97 |              id: nil,
 98 |              model: expected_path,
 99 |              seed: 2,
100 |              usage: %GenAI.ChatCompletion.Usage{prompt_tokens: 143, total_tokens: 147, completion_tokens: 4},
101 |              vsn: 1.0
102 |            } = response
103 |     assert choice_a.content == "Apple"
104 |     assert choice_b.content == "Apple"
105 |   end
106 | 
107 | 
108 | end
109 | 


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | #  Download Required  Models
2 | ExUnit.start()
3 | 


--------------------------------------------------------------------------------