├── llama_cli ├── resource │ └── llama_cli ├── llama_cli │ ├── __init__.py │ ├── verb │ │ ├── __init__.py │ │ ├── launch.py │ │ └── prompt.py │ ├── command │ │ ├── __init__.py │ │ └── llama.py │ └── api │ │ └── __init__.py ├── setup.cfg ├── package.xml ├── test │ ├── test_pep257.py │ ├── test_flake8.py │ └── test_copyright.py └── setup.py ├── llama_ros ├── llama_ros │ ├── __init__.py │ └── langchain │ │ ├── __init__.py │ │ ├── llama_ros_embeddings.py │ │ ├── llama_ros_reranker.py │ │ └── llama_ros.py ├── package.xml ├── src │ ├── llava_main.cpp │ ├── llama_main.cpp │ ├── llama_utils │ │ └── logs.cpp │ └── llava_ros │ │ └── llava_node.cpp ├── include │ ├── llama_utils │ │ ├── spinner.hpp │ │ └── llama_params.hpp │ └── llava_ros │ │ └── llava.hpp └── CMakeLists.txt ├── llama_demos ├── llama_demos │ ├── __init__.py │ ├── llama_embeddings_demo_node.py │ ├── llama_demo_node.py │ ├── chatllama_langgraph_demo_node.py │ ├── chatllama_reasoning_demo_node.py │ ├── chatllama_audio_demo_node.py │ ├── chatllama_demo_node.py │ ├── chatllama_structured_demo_node.py │ ├── llama_rerank_demo_node.py │ ├── chatllama_reasoning_tools_demo_node.py │ ├── llava_demo_node.py │ ├── chatllama_multi_audio_demo_node.py │ ├── chatllama_multi_image_user_demo_node.py │ ├── chatllama_streaming_tools_demo_node.py │ ├── chatllama_tools_demo_node.py │ ├── chatllama_multi_image_demo_node.py │ ├── mtmd_audio_demo_node.py │ ├── llama_rag_demo_node.py │ └── chatllama_pddl_demo_node.py ├── package.xml └── CMakeLists.txt ├── llama_bringup ├── llama_bringup │ ├── __init__.py │ └── utils.py ├── prompts │ ├── DeepSeek-R1.yaml │ ├── Mistral.yaml │ ├── Vicuna.yaml │ ├── Zephyr.yaml │ ├── Phi-3.yaml │ ├── user-assistant-hashes.yaml │ ├── Alpaca.yaml │ ├── StableLM-Zephyr.yaml │ ├── system-user-assistant-hashes.yaml │ ├── GPT.yaml │ ├── Gemma-2.yaml │ ├── Gemma-3.yaml │ ├── ChatML.yaml │ ├── Phi-4.yaml │ ├── gorilla.yaml │ └── Llama-3.yaml ├── models │ ├── GPT-OSS.yaml │ ├── Qwen3.yaml │ ├── Spaetzle.yaml │ ├── bge-base-en-v1.5.yaml │ ├── Falcon3.yaml │ ├── Marcoroni.yaml │ ├── bge-reranker-v2-m3.yaml │ ├── mxbai-embed.yaml │ ├── Gemma-2.yaml │ ├── InternLM.yaml │ ├── Phi-3.yaml │ ├── jina-reranker.yaml │ ├── multilingual-e5.yaml │ ├── Gemma-3.yaml │ ├── SmolLM2.yaml │ ├── All-MiniLM-L6-v2.yaml │ ├── Hermes.yaml │ ├── Mistral.yaml │ ├── gorilla.yaml │ ├── Phi-4.yaml │ ├── Qwen2.yaml │ ├── Llama-3.yaml │ ├── StableLM-Zephyr.yaml │ ├── localmentor.yaml │ ├── DeepSeek-R1.yaml │ ├── MiniCPM-v4.yaml │ ├── MiniCPM-2.6.yaml │ ├── MiniCPM-o-2.6.yaml │ ├── InternVL3.yaml │ ├── MiniCPM-2.5.yaml │ ├── llava-phi-3.yaml │ ├── llava-mistral.yaml │ ├── Qwen2-VL.yaml │ ├── Qwen2-Audio.yaml │ └── Phi-3-adapters.yaml ├── CMakeLists.txt ├── package.xml └── launch │ ├── spaetzle.launch.py │ └── minicpm-2.6.launch.py ├── .gitignore ├── llama_msgs ├── srv │ ├── GetMetadata.srv │ ├── Detokenize.srv │ ├── ListLoRAs.srv │ ├── Tokenize.srv │ ├── UpdateLoRAs.srv │ ├── RerankDocuments.srv │ └── GenerateEmbeddings.srv ├── msg │ ├── LogitBiasArray.msg │ ├── Metadata.msg │ ├── LogitBias.msg │ ├── ChatContent.msg │ ├── ChatReqTool.msg │ ├── TokenProbArray.msg │ ├── ChatReasoningFormat.msg │ ├── LoRA.msg │ ├── ChatToolCall.msg │ ├── TokenProb.msg │ ├── ChatDeltaChunk.msg │ ├── Response.msg │ ├── PartialResponse.msg │ ├── ChatChoice.msg │ ├── UsageStats.msg │ ├── ChatChoiceChunk.msg │ ├── ChatTool.msg │ ├── GrammarTrigger.msg │ ├── ChatMessage.msg │ ├── TokenizerInfo.msg │ ├── RoPEInfo.msg │ ├── AttentionInfo.msg │ ├── ModelInfo.msg │ ├── GeneralInfo.msg │ └── SamplingConfig.msg ├── action │ ├── GenerateResponse.action │ └── GenerateChatCompletions.action ├── package.xml └── CMakeLists.txt ├── docs └── ROSCon_Spain_2023.pdf ├── requirements.txt ├── CITATION.cff ├── .github └── workflows │ ├── python-formatter.yml │ ├── cpp-formatter.yml │ ├── iron-docker-build.yml │ ├── jazzy-docker-build.yml │ ├── humble-docker-build.yml │ ├── kilted-docker-build.yml │ ├── rolling-docker-build.yml │ ├── close-inactive-issues.yml │ ├── iron-docker-push.yml │ ├── humble-docker-push.yml │ ├── jazzy-docker-push.yml │ ├── kilted-docker-push.yml │ ├── rolling-docker-push.yml │ ├── doxygen-deployment.yml │ └── create-release.yml ├── llama_bt ├── llama_tree_nodes.xml ├── test │ ├── action │ │ └── CMakeLists.txt │ ├── CMakeLists.txt │ ├── test_register.cpp │ └── utils │ │ └── test_action_server.hpp ├── package.xml ├── src │ └── action │ │ ├── generate_response_action.cpp │ │ └── generate_chat_completions_action.cpp └── include │ └── llama_bt │ └── action │ ├── generate_response_action.hpp │ └── generate_chat_completions_action.hpp ├── llama_cpp_vendor ├── package.xml └── CMakeLists.txt ├── llama_hfhub_vendor ├── package.xml └── CMakeLists.txt ├── LICENSE └── Dockerfile /llama_cli/resource/llama_cli: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama_cli/llama_cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama_ros/llama_ros/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama_cli/llama_cli/verb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama_bringup/llama_bringup/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama_cli/llama_cli/command/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | __pycache__ 3 | .cache 4 | compile_commands.json -------------------------------------------------------------------------------- /llama_msgs/srv/GetMetadata.srv: -------------------------------------------------------------------------------- 1 | --- 2 | Metadata metadata # Metadata info -------------------------------------------------------------------------------- /llama_msgs/srv/Detokenize.srv: -------------------------------------------------------------------------------- 1 | int32[] tokens # tokens 2 | --- 3 | string text # promnpt -------------------------------------------------------------------------------- /llama_msgs/srv/ListLoRAs.srv: -------------------------------------------------------------------------------- 1 | --- 2 | LoRA[] loras # LoRAs loaded when launching llama_ros -------------------------------------------------------------------------------- /llama_msgs/srv/Tokenize.srv: -------------------------------------------------------------------------------- 1 | string text # promnpt 2 | --- 3 | int32[] tokens # tokens -------------------------------------------------------------------------------- /llama_msgs/msg/LogitBiasArray.msg: -------------------------------------------------------------------------------- 1 | LogitBias[] data # The array of logit biases -------------------------------------------------------------------------------- /docs/ROSCon_Spain_2023.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mgonzs13/llama_ros/HEAD/docs/ROSCon_Spain_2023.pdf -------------------------------------------------------------------------------- /llama_cli/setup.cfg: -------------------------------------------------------------------------------- 1 | [develop] 2 | script_dir=$base/lib/llama_cli 3 | [install] 4 | install_scripts=$base/lib/llama_cli 5 | -------------------------------------------------------------------------------- /llama_msgs/srv/UpdateLoRAs.srv: -------------------------------------------------------------------------------- 1 | LoRA[] loras # loaded LoRAS to update. A LoRA with scale 0.0 means deativating it. 2 | --- -------------------------------------------------------------------------------- /llama_msgs/msg/Metadata.msg: -------------------------------------------------------------------------------- 1 | GeneralInfo general # General info 2 | ModelInfo model # Model info 3 | TokenizerInfo tokenizer # Tokenizer info -------------------------------------------------------------------------------- /llama_msgs/msg/LogitBias.msg: -------------------------------------------------------------------------------- 1 | int32 token # The token to apply the bias 2 | float32 bias # The bias to influence the token -------------------------------------------------------------------------------- /llama_msgs/msg/ChatContent.msg: -------------------------------------------------------------------------------- 1 | string type # The type of the message (text, image, etc.) 2 | string text # The text of the message -------------------------------------------------------------------------------- /llama_msgs/msg/ChatReqTool.msg: -------------------------------------------------------------------------------- 1 | string type "function" # The type of the tool (only function is supported) 2 | ChatTool function # The Tool object -------------------------------------------------------------------------------- /llama_msgs/msg/TokenProbArray.msg: -------------------------------------------------------------------------------- 1 | TokenProb[] data # The probabilities of n most probable tokens 2 | int32 chosen_token # The chosen token in the sampling -------------------------------------------------------------------------------- /llama_bringup/prompts/DeepSeek-R1.yaml: -------------------------------------------------------------------------------- 1 | prefix: "<|User|>" 2 | suffix: "<|Assistant|>" 3 | stopping_words: ["<|User|>"] 4 | 5 | system_prompt: You are an AI assistant that follows instruction. 6 | -------------------------------------------------------------------------------- /llama_msgs/msg/ChatReasoningFormat.msg: -------------------------------------------------------------------------------- 1 | int32 COMMON_REASONING_FORMAT_NONE = 0 2 | int32 COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY = 1 3 | int32 COMMON_REASONING_FORMAT_DEEPSEEK = 2 4 | 5 | int32 value 0 -------------------------------------------------------------------------------- /llama_msgs/msg/LoRA.msg: -------------------------------------------------------------------------------- 1 | int32 id # The ID of the LoRA 2 | string path # The path of the LoRA file 3 | float32 scale # Scale of the LoRA to be applied -------------------------------------------------------------------------------- /llama_msgs/srv/RerankDocuments.srv: -------------------------------------------------------------------------------- 1 | string query # query to calculate score 2 | string[] documents # whether to normalize embeddings 3 | --- 4 | float32[] scores # scores for the documents -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.3.27 2 | langchain-chroma==0.2.2 3 | langchain-community==0.3.27 4 | langchain-core==0.3.75 5 | langchain-openai==0.3.8 6 | langchain-text-splitters==0.3.11 7 | langgraph==0.4.5 8 | pydantic==2.9.2 9 | -------------------------------------------------------------------------------- /llama_msgs/msg/ChatToolCall.msg: -------------------------------------------------------------------------------- 1 | string name # The name of the tool 2 | string arguments # The arguments of the tool 3 | string id # The ID of the tool call 4 | int32 index 0 -------------------------------------------------------------------------------- /llama_msgs/msg/TokenProb.msg: -------------------------------------------------------------------------------- 1 | int32 token # The possible token 2 | float32 probability # The probability of the token on the sampling 3 | string token_text # The representation of the token in text -------------------------------------------------------------------------------- /llama_msgs/msg/ChatDeltaChunk.msg: -------------------------------------------------------------------------------- 1 | string content # The content of the delta message 2 | string role # The role of the delta message (user, system, etc.) 3 | ChatToolCall[] tool_calls 4 | string reasoning_content -------------------------------------------------------------------------------- /llama_bringup/models/GPT-OSS.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 4096 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 4096 6 | 7 | model_repo: "ggml-org/gpt-oss-20b-GGUF" 8 | model_filename: "gpt-oss-20b-mxfp4.gguf" 9 | 10 | system_prompt_type: "GPT" 11 | -------------------------------------------------------------------------------- /llama_msgs/msg/Response.msg: -------------------------------------------------------------------------------- 1 | string text # The text of the response 2 | int32[] tokens # The tokens of the response 3 | TokenProbArray[] probs # The probabilities for each selected and more probable tokens -------------------------------------------------------------------------------- /llama_bringup/models/Qwen3.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "bartowski/Qwen_Qwen3-0.6B-GGUF" 8 | model_filename: "Qwen_Qwen3-0.6B-Q4_K_M.gguf" 9 | 10 | system_prompt_type: "ChatML" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/Spaetzle.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "cstr/Spaetzle-v60-7b-GGUF" 8 | model_filename: "Spaetzle-v60-7b-q4-k-m.gguf" 9 | 10 | system_prompt_type: "Alpaca" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/bge-base-en-v1.5.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 1024 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | embedding: true 7 | 8 | model_repo: "CompendiumLabs/bge-base-en-v1.5-gguf" 9 | model_filename: "bge-base-en-v1.5-f16.gguf" 10 | -------------------------------------------------------------------------------- /llama_bringup/models/Falcon3.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "tiiuae/Falcon3-1B-Instruct-GGUF" 8 | model_filename: "Falcon3-1B-Instruct-q4_k_m.gguf" 9 | 10 | system_prompt_type: "Phi-3" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/Marcoroni.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "TheBloke/Marcoroni-7B-v3-GGUF" 8 | model_filename: "marcoroni-7b-v3.Q4_K_M.gguf" 9 | 10 | system_prompt_type: "Alpaca" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/bge-reranker-v2-m3.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 1024 3 | n_gpu_layers: 0 4 | n_threads: -1 5 | n_predict: 2048 6 | reranking: true 7 | 8 | model_repo: "gpustack/bge-reranker-v2-m3-GGUF" 9 | model_filename: "bge-reranker-v2-m3-Q4_K_M.gguf" 10 | -------------------------------------------------------------------------------- /llama_bringup/models/mxbai-embed.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 1024 3 | n_gpu_layers: 0 4 | n_threads: -1 5 | n_predict: 2048 6 | embedding: true 7 | 8 | model_repo: "mixedbread-ai/mxbai-embed-large-v1" 9 | model_filename: "gguf/mxbai-embed-large-v1-f16.gguf" 10 | -------------------------------------------------------------------------------- /llama_bringup/models/Gemma-2.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "lmstudio-community/gemma-2-2b-it-GGUF" 8 | model_filename: "gemma-2-2b-it-Q4_K_M.gguf" 9 | 10 | system_prompt_type: "Gemma-2" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/InternLM.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "internlm/internlm2_5-7b-chat-gguf" 8 | model_filename: "internlm2_5-7b-chat-q4_k_m.gguf" 9 | 10 | system_prompt_type: "ChatML" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/Phi-3.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "bartowski/Phi-3.5-mini-instruct-GGUF" 8 | model_filename: "Phi-3.5-mini-instruct-Q4_K_M.gguf" 9 | 10 | system_prompt_type: "Phi-3" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/jina-reranker.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 1024 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | reranking: true 7 | 8 | model_repo: "gpustack/jina-reranker-v1-tiny-en-GGUF" 9 | model_filename: "jina-reranker-v1-tiny-en-FP16.gguf" 10 | -------------------------------------------------------------------------------- /llama_bringup/models/multilingual-e5.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 1024 3 | n_gpu_layers: 0 4 | n_threads: -1 5 | n_predict: 2048 6 | embedding: true 7 | 8 | model_repo: "nnch/multilingual-e5-large-Q4_K_M-GGUF" 9 | model_filename: "multilingual-e5-large-q4_k_m.gguf" 10 | -------------------------------------------------------------------------------- /llama_bringup/models/Gemma-3.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 256 3 | n_gpu_layers: 27 4 | n_threads: -1 5 | n_predict: -1 6 | 7 | model_repo: "vinimuchulski/gemma-3-1b-it-qat-q4_0-gguf" 8 | model_filename: "gemma-3-1b-it-q4_0.gguf" 9 | 10 | system_prompt_type: "Gemma-3" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/SmolLM2.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "bartowski/SmolLM2-1.7B-Instruct-GGUF" 8 | model_filename: "SmolLM2-1.7B-Instruct-Q4_K_L.gguf" 9 | 10 | system_prompt_type: "ChatML" 11 | -------------------------------------------------------------------------------- /llama_msgs/msg/PartialResponse.msg: -------------------------------------------------------------------------------- 1 | string text # Delta text of the streamed response 2 | int32 token # The token of the response 3 | TokenProbArray probs # The probabilities of the selected token and the more probable tokens -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "González-Santamarta" 5 | given-names: "Miguel Á." 6 | title: "llama_ros" 7 | date-released: 2023-04-03 8 | url: "https://github.com/mgonzs13/llama_ros" 9 | -------------------------------------------------------------------------------- /llama_bringup/models/All-MiniLM-L6-v2.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 1024 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | embedding: true 7 | 8 | model_repo: "second-state/All-MiniLM-L6-v2-Embedding-GGUF" 9 | model_filename: "all-MiniLM-L6-v2-ggml-model-f16.gguf" 10 | -------------------------------------------------------------------------------- /llama_bringup/prompts/Mistral.yaml: -------------------------------------------------------------------------------- 1 | prefix: "[INST]" 2 | suffix: "[/INST]" 3 | stopping_words: ["[INST]"] 4 | 5 | system_prompt: |- 6 | Below is an instruction that describes a task. Write a response that appropriately completes the request. 7 | 8 | [INST] Hello [/INST] Hello [INST] 9 | -------------------------------------------------------------------------------- /llama_bringup/models/Hermes.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "QuantFactory/Hermes-2-Theta-Llama-3-8B-GGUF" 8 | model_filename: "Hermes-2-Theta-Llama-3-8B.Q4_K_M.gguf" 9 | 10 | system_prompt_type: "ChatML" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/Mistral.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "bartowski/Ministral-8B-Instruct-2410-GGUF" 8 | model_filename: "Ministral-8B-Instruct-2410-Q4_K_M.gguf" 9 | 10 | system_prompt_type: "Mistral" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/gorilla.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "gorilla-llm/gorilla-openfunctions-v2-gguf" 8 | model_filename: "gorilla-openfunctions-v2-q4_K_M.gguf" 9 | 10 | system_prompt_type: "gorilla" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/Phi-4.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "bartowski/microsoft_Phi-4-mini-instruct-GGUF" 8 | model_filename: "microsoft_Phi-4-mini-instruct-Q4_K_M.gguf" 9 | 10 | system_prompt_type: "Phi-4" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/Qwen2.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF" 8 | model_filename: "qwen2.5-coder-7b-instruct-q4_k_m-00001-of-00002.gguf" 9 | 10 | system_prompt_type: "ChatML" 11 | -------------------------------------------------------------------------------- /llama_msgs/msg/ChatChoice.msg: -------------------------------------------------------------------------------- 1 | ChatMessage message # The message that was sent 2 | TokenProbArray[] logprobs # The log probabilities 3 | string finish_reason # The reason the chat completion ended 4 | int32 index # The index of the choice -------------------------------------------------------------------------------- /llama_msgs/msg/UsageStats.msg: -------------------------------------------------------------------------------- 1 | int32 completion_tokens # The number of tokens in the completion (output tokens) 2 | int32 prompt_tokens # The number of tokens in the prompt (input tokens) 3 | int32 total_tokens # The sum of the prompt and completion tokens -------------------------------------------------------------------------------- /llama_bringup/models/Llama-3.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF" 8 | model_filename: "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" 9 | 10 | system_prompt_type: "Llama-3" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/StableLM-Zephyr.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "duyntnet/stablelm-zephyr-3b-imatrix-GGUF" 8 | model_filename: "stablelm-zephyr-3b-Q4_K_M.gguf" 9 | 10 | system_prompt_type: "StableLM-Zephyr" 11 | -------------------------------------------------------------------------------- /llama_bringup/models/localmentor.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "mgonzs13/stablelm-zephyr-3B-localmentor-GGUF" 8 | model_filename: "stablelm-zephyr-3B-localmentor.Q4_K_M.gguf" 9 | 10 | system_prompt_type: "Zephyr" 11 | -------------------------------------------------------------------------------- /llama_msgs/msg/ChatChoiceChunk.msg: -------------------------------------------------------------------------------- 1 | ChatDeltaChunk delta # The delta to apply to the chat state 2 | TokenProbArray logprobs # The log probabilities 3 | string finish_reason # The reason the chat completion ended 4 | int32 index 0 # The index of the choice -------------------------------------------------------------------------------- /llama_bringup/prompts/Vicuna.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\nUSER: " 2 | suffix: "\nASSISTANT: " 3 | stopping_words: ["USER: "] 4 | 5 | system_prompt: |- 6 | SYSTEM: You are an AI assistant that follows instruction extremely well. Help as much as you can. 7 | 8 | USER: Hello 9 | 10 | ASSISTANT: Hello 11 | 12 | USER: 13 | -------------------------------------------------------------------------------- /llama_msgs/msg/ChatTool.msg: -------------------------------------------------------------------------------- 1 | int32 TOOL_CHOICE_AUTO = 0 2 | int32 TOOL_CHOICE_REQUIRED = 1 3 | int32 TOOL_CHOICE_NONE = 2 4 | 5 | string name # The name of the tool 6 | string description # The description of the tool 7 | string parameters # The parameters of the tool -------------------------------------------------------------------------------- /llama_msgs/srv/GenerateEmbeddings.srv: -------------------------------------------------------------------------------- 1 | string prompt # prompt 2 | int32 normalization 2 # normalization (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm) 3 | --- 4 | float32[] embeddings # embeddings 5 | int32 n_tokens # tokens processed -------------------------------------------------------------------------------- /llama_bringup/prompts/Zephyr.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\n<|user|>\n" 2 | suffix: "\n<|assistant|>\n" 3 | stopping_words: [""] 4 | 5 | system_prompt: |- 6 | <|system|> 7 | You are an AI assistant that follows instruction extremely well. Help as much as you can. 8 | <|user|> 9 | Hello 10 | <|assistant|> 11 | Hello 12 | -------------------------------------------------------------------------------- /llama_bringup/models/DeepSeek-R1.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF" 8 | model_filename: "DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf" 9 | 10 | # system_prompt_type: "DeepSeek-R1" 11 | chat_template_file: "llama-cpp-deepseek-r1.jinja" -------------------------------------------------------------------------------- /llama_bringup/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(llama_bringup) 3 | 4 | # find dependencies 5 | find_package(ament_cmake REQUIRED) 6 | 7 | install(DIRECTORY 8 | launch prompts models 9 | DESTINATION share/${PROJECT_NAME}/ 10 | ) 11 | 12 | # Python 13 | ament_python_install_package(${PROJECT_NAME}) 14 | 15 | ament_package() 16 | -------------------------------------------------------------------------------- /llama_bringup/prompts/Phi-3.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\n<|user|>\n" 2 | suffix: "<|end|>\n<|assistant|>\n" 3 | stopping_words: ["<|end|>"] 4 | 5 | system_prompt: |- 6 | <|system|> 7 | Below is an instruction that describes a task. Write a response that appropriately completes the request<|end|> 8 | <|user|> 9 | Hello<|end|> 10 | <|assistant|> 11 | Hello<|end|> 12 | -------------------------------------------------------------------------------- /llama_bringup/prompts/user-assistant-hashes.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\n\n### User:\n" 2 | suffix: "\n\n### Assistant:\n" 3 | stopping_words: ["### User:\n"] 4 | 5 | system_prompt: | 6 | You are an AI assistant that follows instruction extremely well. Help as much as you can. 7 | 8 | ### User: 9 | Hello 10 | 11 | ### Assistant: 12 | Hello 13 | 14 | ### User: 15 | -------------------------------------------------------------------------------- /llama_ros/llama_ros/langchain/__init__.py: -------------------------------------------------------------------------------- 1 | from llama_ros.langchain.llama_ros_common import LlamaROSCommon 2 | from llama_ros.langchain.llama_ros import LlamaROS 3 | from llama_ros.langchain.chat_llama_ros import ChatLlamaROS 4 | from llama_ros.langchain.llama_ros_embeddings import LlamaROSEmbeddings 5 | from llama_ros.langchain.llama_ros_reranker import LlamaROSReranker 6 | -------------------------------------------------------------------------------- /llama_bringup/prompts/Alpaca.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\n\n### Instruction:\n" 2 | suffix: "\n\n### Response:\n" 3 | stopping_words: ["### Instruction:\n"] 4 | 5 | system_prompt: | 6 | You are an AI assistant that follows instruction extremely well. Help as much as you can. 7 | 8 | ### Instruction: 9 | Hello 10 | 11 | ### Response: 12 | Hello 13 | 14 | ### Instruction: 15 | -------------------------------------------------------------------------------- /llama_bringup/models/MiniCPM-v4.yaml: -------------------------------------------------------------------------------- 1 | use_llava: True 2 | 3 | n_ctx: 8192 4 | n_batch: 512 5 | n_gpu_layers: 20 6 | n_threads: -1 7 | n_predict: 8192 8 | 9 | model_repo: "openbmb/MiniCPM-V-4-gguf" 10 | model_filename: "ggml-model-Q4_K_M.gguf" 11 | 12 | mmproj_repo: "openbmb/MiniCPM-V-4-gguf" 13 | mmproj_filename: "openbmb/MiniCPM-V-4-gguf" 14 | 15 | system_prompt_type: "ChatML" 16 | -------------------------------------------------------------------------------- /llama_bringup/models/MiniCPM-2.6.yaml: -------------------------------------------------------------------------------- 1 | use_llava: True 2 | 3 | n_ctx: 8192 4 | n_batch: 512 5 | n_gpu_layers: 20 6 | n_threads: -1 7 | n_predict: 8192 8 | 9 | model_repo: "openbmb/MiniCPM-V-2_6-gguf" 10 | model_filename: "ggml-model-Q4_K_M.gguf" 11 | 12 | mmproj_repo: "openbmb/MiniCPM-V-2_6-gguf" 13 | mmproj_filename: "mmproj-model-f16.gguf" 14 | 15 | system_prompt_type: "ChatML" 16 | -------------------------------------------------------------------------------- /llama_bringup/models/MiniCPM-o-2.6.yaml: -------------------------------------------------------------------------------- 1 | use_llava: True 2 | 3 | n_ctx: 8192 4 | n_batch: 512 5 | n_gpu_layers: 20 6 | n_threads: -1 7 | n_predict: 8192 8 | 9 | model_repo: "openbmb/MiniCPM-o-2_6-gguf" 10 | model_filename: "Model-7.6B-Q4_K_M.gguf" 11 | 12 | mmproj_repo: "openbmb/MiniCPM-o-2_6-gguf" 13 | mmproj_filename: "mmproj-model-f16.gguf" 14 | 15 | system_prompt_type: "ChatML" 16 | -------------------------------------------------------------------------------- /llama_bringup/prompts/StableLM-Zephyr.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\n<|user|>\n" 2 | suffix: "<|endoftext|>\n<|assistant|>\n" 3 | stopping_words: ["<|endoftext|>"] 4 | 5 | system_prompt: |- 6 | <|system|> 7 | You are an AI assistant that follows instruction extremely well. Help as much as you can.<|endoftext|> 8 | <|user|> 9 | Hello<|endoftext|> 10 | <|assistant|> 11 | Hello<|endoftext|> 12 | -------------------------------------------------------------------------------- /llama_bringup/prompts/system-user-assistant-hashes.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\n\n### User:\n" 2 | suffix: "\n\n### Assistant:\n" 3 | stopping_words: ["### User:\n"] 4 | 5 | system_prompt: | 6 | ### System: 7 | You are an AI assistant that follows instruction extremely well. Help as much as you can. 8 | 9 | ### User: 10 | Hello 11 | 12 | ### Assistant: 13 | Hello 14 | 15 | ### User: 16 | -------------------------------------------------------------------------------- /llama_msgs/msg/GrammarTrigger.msg: -------------------------------------------------------------------------------- 1 | int32 GRAMMAR_TRIGGER_TYPE_TOKEN = 0 2 | int32 GRAMMAR_TRIGGER_TYPE_WORD = 1 3 | int32 GRAMMAR_TRIGGER_TYPE_PATTERN = 2 4 | int32 GRAMMAR_TRIGGER_TYPE_PATTERN_START = 3 5 | 6 | int32 type # The type of the trigger 7 | string value # The string of the trigger 8 | int32 token # The token of the trigger -------------------------------------------------------------------------------- /llama_bringup/models/InternVL3.yaml: -------------------------------------------------------------------------------- 1 | use_llava: True 2 | 3 | n_ctx: 8192 4 | n_batch: 256 5 | n_gpu_layers: 33 6 | n_threads: -1 7 | n_predict: 8192 8 | 9 | model_repo: "mradermacher/InternVL3-8B-GGUF" 10 | model_filename: "InternVL3-8B.Q4_K_M.gguf" 11 | 12 | mmproj_repo: "mradermacher/InternVL3-8B-GGUF" 13 | mmproj_filename: "InternVL3-8B.mmproj-Q8_0.gguf" 14 | 15 | system_prompt_type: "ChatML" 16 | -------------------------------------------------------------------------------- /llama_bringup/models/MiniCPM-2.5.yaml: -------------------------------------------------------------------------------- 1 | use_llava: True 2 | 3 | n_ctx: 8192 4 | n_batch: 512 5 | n_gpu_layers: 20 6 | n_threads: -1 7 | n_predict: 8192 8 | 9 | model_repo: "openbmb/MiniCPM-Llama3-V-2_5-gguf" 10 | model_filename: "ggml-model-Q4_K_M.gguf" 11 | 12 | mmproj_repo: "openbmb/MiniCPM-Llama3-V-2_5-gguf" 13 | mmproj_filename: "mmproj-model-f16.gguf" 14 | 15 | system_prompt_type: "Llama-3" 16 | -------------------------------------------------------------------------------- /llama_bringup/models/llava-phi-3.yaml: -------------------------------------------------------------------------------- 1 | use_llava: True 2 | 3 | n_ctx: 8192 4 | n_batch: 512 5 | n_gpu_layers: 33 6 | n_threads: -1 7 | n_predict: 8192 8 | 9 | model_repo: "xtuner/llava-phi-3-mini-gguf" 10 | model_filename: "llava-phi-3-mini-int4.gguf" 11 | 12 | mmproj_repo: "xtuner/llava-phi-3-mini-gguf" 13 | mmproj_filename: "llava-phi-3-mini-mmproj-f16.gguf" 14 | 15 | system_prompt_type: "Phi-3" 16 | -------------------------------------------------------------------------------- /llama_bringup/prompts/GPT.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\n<|start|>user<|message|>" 2 | suffix: "<|end|>\n<|start|>assistant" 3 | stopping_words: ["<|end|>", "<|return|>"] 4 | 5 | system_prompt: |- 6 | <|start|>system<|message|>Below is an instruction that describes a task. Write a response that appropriately completes the request<|end|> 7 | <|start|>user<|message|>Hello<|end|> 8 | <|start|>assistant<|message|>Hello<|end|> 9 | -------------------------------------------------------------------------------- /llama_bringup/models/llava-mistral.yaml: -------------------------------------------------------------------------------- 1 | use_llava: True 2 | 3 | n_ctx: 8192 4 | n_batch: 512 5 | n_gpu_layers: 33 6 | n_threads: -1 7 | n_predict: 8192 8 | 9 | model_repo: "cjpais/llava-1.6-mistral-7b-gguf" 10 | model_filename: "llava-v1.6-mistral-7b.Q4_K_M.gguf" 11 | 12 | mmproj_repo: "cjpais/llava-1.6-mistral-7b-gguf" 13 | mmproj_filename: "mmproj-model-f16.gguf" 14 | 15 | system_prompt_type: "Mistral" 16 | -------------------------------------------------------------------------------- /llama_bringup/prompts/Gemma-2.yaml: -------------------------------------------------------------------------------- 1 | prefix: "user\n" 2 | suffix: "\nmodel\n" 3 | stopping_words: [""] 4 | 5 | system_prompt: |+ 6 | Below is an instruction that describes a task. Write a response that appropriately completes the request 7 | 8 | user 9 | Hello 10 | model 11 | Hello 12 | 13 | -------------------------------------------------------------------------------- /llama_bringup/prompts/Gemma-3.yaml: -------------------------------------------------------------------------------- 1 | prefix: "user\n" 2 | suffix: "\nassistant\n" 3 | stopping_words: [""] 4 | 5 | system_prompt: | 6 | Below is an instruction that describes a task. Write a response that appropriately completes the request 7 | 8 | user 9 | Hello 10 | assistant 11 | Hello 12 | -------------------------------------------------------------------------------- /.github/workflows/python-formatter.yml: -------------------------------------------------------------------------------- 1 | name: Python Formatting Check 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | python_formatter: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Checkout code 10 | uses: actions/checkout@v4 11 | 12 | - name: Black Formatter 13 | uses: lgeiger/black-action@master 14 | with: 15 | args: ". --check --diff --line-length 90" 16 | -------------------------------------------------------------------------------- /llama_bringup/prompts/ChatML.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\n<|im_start|>user\n" 2 | suffix: "<|im_end|>\n<|im_start|>assistant\n" 3 | stopping_words: ["<|im_end|>"] 4 | 5 | system_prompt: |- 6 | <|im_start|>system 7 | Below is an instruction that describes a task. Write a response that appropriately completes the request.<|im_end|> 8 | <|im_start|>user 9 | Hello<|im_end|> 10 | <|im_start|>assistant 11 | Hello<|im_end|> 12 | -------------------------------------------------------------------------------- /llama_bringup/models/Qwen2-VL.yaml: -------------------------------------------------------------------------------- 1 | use_llava: True 2 | 3 | n_ctx: 8192 4 | n_batch: 512 5 | n_gpu_layers: 29 6 | n_threads: -1 7 | n_predict: 8192 8 | 9 | model_repo: "bartowski/Qwen2-VL-2B-Instruct-GGUF" 10 | model_filename: "Qwen2-VL-2B-Instruct-Q4_K_M.gguf" 11 | 12 | mmproj_repo: "bartowski/Qwen2-VL-2B-Instruct-GGUF" 13 | mmproj_filename: "mmproj-Qwen2-VL-2B-Instruct-f16.gguf" 14 | 15 | system_prompt_type: "ChatML" 16 | -------------------------------------------------------------------------------- /.github/workflows/cpp-formatter.yml: -------------------------------------------------------------------------------- 1 | name: C++ Formatting Check 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | cpp_formatter: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Checkout code 10 | uses: actions/checkout@v4 11 | 12 | - name: Clang Formatter 13 | uses: DoozyX/clang-format-lint-action@v0.18.1 14 | with: 15 | clangFormatVersion: 14 16 | source: "." 17 | -------------------------------------------------------------------------------- /llama_bringup/models/Qwen2-Audio.yaml: -------------------------------------------------------------------------------- 1 | use_llava: True 2 | 3 | n_ctx: 8192 4 | n_batch: 512 5 | n_gpu_layers: 20 6 | n_threads: -1 7 | n_predict: 8192 8 | 9 | model_repo: "mradermacher/Qwen2-Audio-7B-Instruct-GGUF" 10 | model_filename: "Qwen2-Audio-7B-Instruct.Q4_K_M.gguf" 11 | 12 | mmproj_repo: "mradermacher/Qwen2-Audio-7B-Instruct-GGUF" 13 | mmproj_filename: "Qwen2-Audio-7B-Instruct.mmproj-f16.gguf" 14 | 15 | system_prompt_type: "ChatML" 16 | -------------------------------------------------------------------------------- /llama_bringup/prompts/Phi-4.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\n<|im_start|>user<|im_sep|>\n" 2 | suffix: "<|im_end|>\n<|assistant|>\n" 3 | stopping_words: ["<|im_end|>"] 4 | 5 | system_prompt: |- 6 | <|im_start|>system<|im_sep|> 7 | Below is an instruction that describes a task. Write a response that appropriately completes the request<|im_end|> 8 | <|im_start|>user<|im_sep|> 9 | Hello<|im_end|> 10 | <|im_start|>assistant<|im_sep|> 11 | Hello<|im_end|> 12 | -------------------------------------------------------------------------------- /llama_bringup/prompts/gorilla.yaml: -------------------------------------------------------------------------------- 1 | prefix: "\n### Instruction:" 2 | suffix: "\n### Response:" 3 | stopping_words: ["### Instruction:"] 4 | 5 | system_prompt: |- 6 | You are an AI programming assistant, utilizing the Gorilla LLM model, developed by Gorilla LLM, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer. 7 | ### Instruction: 8 | -------------------------------------------------------------------------------- /llama_msgs/action/GenerateResponse.action: -------------------------------------------------------------------------------- 1 | string prompt # prompt 2 | sensor_msgs/Image[] images # images for VLMs 3 | std_msgs/UInt8MultiArray[] audios # audios for mtmd 4 | string[] stop [] # stop list 5 | bool reset false # whether to reset the context 6 | SamplingConfig sampling_config # sampling config 7 | --- 8 | Response response # final response 9 | --- 10 | PartialResponse partial_response # partial response -------------------------------------------------------------------------------- /llama_msgs/msg/ChatMessage.msg: -------------------------------------------------------------------------------- 1 | string role # The role of the message (user, system, etc.) 2 | string content # The content of the message 3 | ChatContent[] content_parts # The content of the message if it is split into parts 4 | ChatToolCall[] tool_calls # The tool calls 5 | string reasoning_content # The reasoning content 6 | string tool_name # The name of the required tool 7 | string tool_call_id # The ID of the tool call -------------------------------------------------------------------------------- /llama_bt/llama_tree_nodes.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Prompt 6 | Stop list 7 | Whether to reset the context 8 | Final response 9 | 10 | 11 | -------------------------------------------------------------------------------- /llama_msgs/msg/TokenizerInfo.msg: -------------------------------------------------------------------------------- 1 | string model # Tokenizer model name 2 | 3 | uint32 bos_token_id # bos token of the tokenizer 4 | uint32 eos_token_id # eos token of the tokenizer 5 | uint32 unknown_token_id # unknown token of the tokenizer 6 | uint32 padding_token_id # padding token of the tokenizer 7 | uint32 separator_token_id # separator token of the tokenizer 8 | bool add_bos_token # Whether to add bos 9 | 10 | string chat_template # Chat template -------------------------------------------------------------------------------- /llama_msgs/msg/RoPEInfo.msg: -------------------------------------------------------------------------------- 1 | uint64 dimension_count # The number of rotary dimensions for RoPE 2 | float32 freq_base # The base frequency for RoPE 3 | 4 | string scaling_type # Can be none, linear, or yarn. 5 | float32 scaling_factor # A scale factor for RoPE to adjust the context length. 6 | uint32 scaling_original_context_length # The original context length of the base model. 7 | bool scaling_finetuned # True if model has been finetuned with RoPE scaling. -------------------------------------------------------------------------------- /.github/workflows/iron-docker-build.yml: -------------------------------------------------------------------------------- 1 | name: Iron Docker Build 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "0 5 * * 1" 8 | 9 | jobs: 10 | iron_docker_build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | 16 | - name: Set up Docker Buildx 17 | uses: docker/setup-buildx-action@v3 18 | 19 | - name: Build docker 20 | uses: docker/build-push-action@v6 21 | with: 22 | build-args: ROS_DISTRO=iron 23 | push: false 24 | -------------------------------------------------------------------------------- /.github/workflows/jazzy-docker-build.yml: -------------------------------------------------------------------------------- 1 | name: Jazzy Docker Build 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "0 5 * * 1" 8 | 9 | jobs: 10 | jazzy_docker_build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | 16 | - name: Set up Docker Buildx 17 | uses: docker/setup-buildx-action@v3 18 | 19 | - name: Build docker 20 | uses: docker/build-push-action@v6 21 | with: 22 | build-args: ROS_DISTRO=jazzy 23 | push: false 24 | -------------------------------------------------------------------------------- /llama_bringup/prompts/Llama-3.yaml: -------------------------------------------------------------------------------- 1 | prefix: "<|start_header_id|>user<|end_header_id|>\n\n" 2 | suffix: "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" 3 | stopping_words: ["<|eot_id|>"] 4 | 5 | system_prompt: |+ 6 | <|start_header_id|>system<|end_header_id|> 7 | 8 | Below is an instruction that describes a task. Write a response that appropriately completes the request<|eot_id|><|start_header_id|>user<|end_header_id|> 9 | 10 | Hello<|eot_id|><|start_header_id|>assistant<|end_header_id|> 11 | 12 | Hello<|eot_id|><|start_header_id|>user<|end_header_id|> 13 | 14 | -------------------------------------------------------------------------------- /.github/workflows/humble-docker-build.yml: -------------------------------------------------------------------------------- 1 | name: Humble Docker Build 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "0 5 * * 1" 8 | 9 | jobs: 10 | humble_docker_build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | 16 | - name: Set up Docker Buildx 17 | uses: docker/setup-buildx-action@v3 18 | 19 | - name: Build docker 20 | uses: docker/build-push-action@v6 21 | with: 22 | build-args: ROS_DISTRO=humble 23 | push: false 24 | -------------------------------------------------------------------------------- /.github/workflows/kilted-docker-build.yml: -------------------------------------------------------------------------------- 1 | name: Kilted Docker Build 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "0 5 * * 1" 8 | 9 | jobs: 10 | kilted_docker_build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | 16 | - name: Set up Docker Buildx 17 | uses: docker/setup-buildx-action@v3 18 | 19 | - name: Build docker 20 | uses: docker/build-push-action@v6 21 | with: 22 | build-args: ROS_DISTRO=kilted 23 | push: false 24 | -------------------------------------------------------------------------------- /.github/workflows/rolling-docker-build.yml: -------------------------------------------------------------------------------- 1 | name: Rolling Docker Build 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "0 5 * * 1" 8 | 9 | jobs: 10 | rolling_docker_build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | 16 | - name: Set up Docker Buildx 17 | uses: docker/setup-buildx-action@v3 18 | 19 | - name: Build docker 20 | uses: docker/build-push-action@v6 21 | with: 22 | build-args: ROS_DISTRO=rolling 23 | push: false 24 | -------------------------------------------------------------------------------- /llama_cpp_vendor/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | llama_cpp_vendor 5 | 5.3.3 6 | Vendor package for llama.cpp. 7 | Miguel Ángel González Santamarta 8 | MIT 9 | ament_cmake 10 | 11 | ament_cmake 12 | 13 | -------------------------------------------------------------------------------- /llama_bringup/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | llama_bringup 5 | 5.3.3 6 | Bringup package for llama_ros 7 | Miguel Ángel González Santamarta 8 | MIT 9 | ament_cmake 10 | llama_ros 11 | 12 | ament_cmake 13 | 14 | -------------------------------------------------------------------------------- /llama_cli/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | llama_cli 5 | 5.3.3 6 | Cli package for llama_ros 7 | Miguel Ángel González Santamarta 8 | MIT 9 | ros2cli 10 | llama_ros 11 | llama_bringup 12 | 13 | ament_python 14 | 15 | -------------------------------------------------------------------------------- /llama_bringup/models/Phi-3-adapters.yaml: -------------------------------------------------------------------------------- 1 | n_ctx: 2048 2 | n_batch: 8 3 | n_gpu_layers: 0 4 | n_threads: 1 5 | n_predict: 2048 6 | 7 | model_repo: "bartowski/Phi-3.5-mini-instruct-GGUF" 8 | model_filename: "Phi-3.5-mini-instruct-Q4_K_M.gguf" 9 | 10 | lora_adapters: 11 | - repo: "zhhan/adapter-Phi-3-mini-4k-instruct_code_writing" 12 | filename: "Phi-3-mini-4k-instruct-adaptor-f16-code_writer.gguf" 13 | scale: 0.5 14 | - repo: "zhhan/adapter-Phi-3-mini-4k-instruct_summarization" 15 | filename: "Phi-3-mini-4k-instruct-adaptor-f16-summarization.gguf" 16 | scale: 0.5 17 | 18 | system_prompt_type: "Phi-3" 19 | -------------------------------------------------------------------------------- /llama_msgs/msg/AttentionInfo.msg: -------------------------------------------------------------------------------- 1 | uint64 head_count # Number of attention heads (n_head) 2 | uint64 head_count_kv # Number of heads per group for GQA 3 | 4 | float32 max_alibi_bias # Maximum bias for ALiBI 5 | float32 clamp_kqv # Clamp value for Q, K, and V tensors 6 | 7 | float32 layer_norm_epsilon # Epsilon for layer normalization 8 | float32 layer_norm_rms_epsilon # Epsilon for RMS normalization 9 | 10 | uint32 key_length # Size of a key head (optional) 11 | uint32 value_length # Size of a value head (optional) -------------------------------------------------------------------------------- /llama_hfhub_vendor/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | llama_hfhub_vendor 5 | 5.3.3 6 | huggingface-hub-cpp vendor package for llama_ros 7 | Miguel Ángel González Santamarta 8 | MIT 9 | ament_cmake 10 | curl 11 | 12 | ament_cmake 13 | 14 | -------------------------------------------------------------------------------- /llama_bt/test/action/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Test for the generate response action 2 | find_package(ament_index_cpp REQUIRED) 3 | 4 | # Test for the generate response action 5 | ament_add_gtest(test_generate_response_action 6 | test_generate_response_action.cpp 7 | ) 8 | target_link_libraries(test_generate_response_action 9 | ament_index_cpp::ament_index_cpp 10 | llama_generate_response_action_bt_node 11 | ) 12 | 13 | # Test for the generate chat action 14 | ament_add_gtest(test_generate_chat_action 15 | test_generate_chat_action.cpp 16 | ) 17 | 18 | target_link_libraries(test_generate_chat_action 19 | ament_index_cpp::ament_index_cpp 20 | llama_generate_chat_completions_action_bt_node 21 | ) 22 | -------------------------------------------------------------------------------- /llama_demos/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | llama_demos 5 | 5.3.3 6 | Demos for llama_ros 7 | Miguel Ángel González Santamarta 8 | MIT 9 | ament_cmake 10 | rclcpp 11 | rclcpp_action 12 | cv_bridge 13 | llama_ros 14 | llama_bringup 15 | 16 | ament_cmake 17 | 18 | -------------------------------------------------------------------------------- /llama_msgs/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | llama_msgs 5 | 5.3.3 6 | Msgs for llama_ros 7 | Miguel Ángel González Santamarta 8 | MIT 9 | ament_cmake 10 | sensor_msgs 11 | rosidl_interface_packages 12 | 13 | ament_cmake 14 | rosidl_default_generators 15 | rosidl_default_runtime 16 | 17 | -------------------------------------------------------------------------------- /.github/workflows/close-inactive-issues.yml: -------------------------------------------------------------------------------- 1 | name: Close Inactive Issues 2 | on: 3 | schedule: 4 | - cron: "30 1 * * *" 5 | 6 | jobs: 7 | close-issues: 8 | runs-on: ubuntu-latest 9 | 10 | permissions: 11 | issues: write 12 | pull-requests: write 13 | 14 | steps: 15 | - uses: actions/stale@v5 16 | with: 17 | days-before-issue-stale: 30 18 | days-before-issue-close: 14 19 | stale-issue-label: "stale" 20 | stale-issue-message: "This issue is stale because it has been open for 30 days with no activity." 21 | close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale." 22 | days-before-pr-stale: -1 23 | days-before-pr-close: -1 24 | repo-token: ${{ secrets.GITHUB_TOKEN }} 25 | -------------------------------------------------------------------------------- /llama_bt/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if("$ENV{ROS_DISTRO}" STREQUAL "foxy" OR "$ENV{ROS_DISTRO}" STREQUAL "galactic" OR "$ENV{ROS_DISTRO}" STREQUAL "humble" OR "$ENV{ROS_DISTRO}" STREQUAL "iron") 2 | message(STATUS "Using BehaviorTree.CPP V3") 3 | set(BT_LIB_NAME behaviortree_cpp_v3) 4 | set(BT_TARGET ${behaviortree_cpp_v3_TARGETS}) 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBTV3") 6 | else() 7 | message(STATUS "Using BehaviorTree.CPP V4") 8 | set(BT_LIB_NAME behaviortree_cpp) 9 | set(BT_TARGET behaviortree_cpp::behaviortree_cpp) 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBTV4") 11 | endif() 12 | 13 | include_directories(.) 14 | 15 | add_subdirectory(action) 16 | 17 | # Test register 18 | ament_add_gtest(test_register 19 | test_register.cpp 20 | ) 21 | target_link_libraries(test_register 22 | ${BT_TARGET} 23 | rclcpp::rclcpp 24 | ) -------------------------------------------------------------------------------- /.github/workflows/iron-docker-push.yml: -------------------------------------------------------------------------------- 1 | name: Iron Docker Push 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | iron_docker_push: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | with: 14 | fetch-depth: 0 15 | 16 | - name: Set up Docker Buildx 17 | uses: docker/setup-buildx-action@v3 18 | 19 | - name: Login to Docker Hub 20 | uses: docker/login-action@v3 21 | with: 22 | username: ${{ secrets.DOCKERHUB_USERNAME }} 23 | password: ${{ secrets.DOCKERHUB_TOKEN }} 24 | 25 | - name: Build and push docker 26 | uses: docker/build-push-action@v6 27 | with: 28 | build-args: ROS_DISTRO=iron 29 | push: true 30 | tags: mgons/llama_ros:iron-${{ github.event.release.tag_name }} 31 | -------------------------------------------------------------------------------- /.github/workflows/humble-docker-push.yml: -------------------------------------------------------------------------------- 1 | name: Humble Docker Push 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | humble_docker_push: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | with: 14 | fetch-depth: 0 15 | 16 | - name: Set up Docker Buildx 17 | uses: docker/setup-buildx-action@v3 18 | 19 | - name: Login to Docker Hub 20 | uses: docker/login-action@v3 21 | with: 22 | username: ${{ secrets.DOCKERHUB_USERNAME }} 23 | password: ${{ secrets.DOCKERHUB_TOKEN }} 24 | 25 | - name: Build and push docker 26 | uses: docker/build-push-action@v6 27 | with: 28 | build-args: ROS_DISTRO=humble 29 | push: true 30 | tags: mgons/llama_ros:humble-${{ github.event.release.tag_name }} 31 | -------------------------------------------------------------------------------- /llama_cli/test/test_pep257.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Open Source Robotics Foundation, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ament_pep257.main import main 16 | import pytest 17 | 18 | 19 | @pytest.mark.linter 20 | @pytest.mark.pep257 21 | def test_pep257(): 22 | rc = main(argv=[".", "test"]) 23 | assert rc == 0, "Found code style errors / warnings" 24 | -------------------------------------------------------------------------------- /llama_hfhub_vendor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(llama_hfhub_vendor) 3 | 4 | include(FetchContent) 5 | find_package(ament_cmake REQUIRED) 6 | find_package(CURL REQUIRED) # Add CURL as a required package 7 | 8 | # Declare hfhub from Git repository 9 | FetchContent_Declare( 10 | hfhub 11 | GIT_REPOSITORY https://github.com/agonzc34/huggingface-hub-cpp 12 | GIT_TAG 1.1.3 13 | GIT_SHALLOW TRUE 14 | ) 15 | 16 | FetchContent_MakeAvailable(hfhub) 17 | 18 | # Export targets and include directories 19 | install( 20 | TARGETS hfhub 21 | EXPORT export_hfhub 22 | LIBRARY DESTINATION lib 23 | INCLUDES DESTINATION include 24 | ) 25 | 26 | # Export include directories and dependencies (CURL) 27 | ament_export_include_directories(include) 28 | ament_export_dependencies(CURL) # Export CURL dependency 29 | ament_export_targets(export_hfhub HAS_LIBRARY_TARGET) 30 | 31 | ament_package() 32 | -------------------------------------------------------------------------------- /llama_ros/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | llama_ros 5 | 5.3.3 6 | llama.cpp for ROS 2 7 | Miguel Ángel González Santamarta 8 | MIT 9 | ament_cmake 10 | ament_lint_auto 11 | ament_clang_format 12 | ament_cmake_clang_format 13 | rclcpp 14 | rclcpp_action 15 | rclcpp_lifecycle 16 | cv_bridge 17 | llama_msgs 18 | llama_cpp_vendor 19 | llama_hfhub_vendor 20 | 21 | ament_cmake 22 | 23 | -------------------------------------------------------------------------------- /llama_cli/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | setup( 5 | name="llama_cli", 6 | version="5.3.3", 7 | packages=find_packages(exclude=["test"]), 8 | zip_safe=True, 9 | author="Miguel Ángel González Santamarta", 10 | author_email="mgons@unileon.es", 11 | maintainer="Miguel Ángel González Santamarta", 12 | maintainer_email="mgons@unileon.es", 13 | description="Cli package for llama_ros", 14 | license="MIT", 15 | data_files=[ 16 | ("share/llama_cli", ["package.xml"]), 17 | ("share/ament_index/resource_index/packages", ["resource/llama_cli"]), 18 | ], 19 | entry_points={ 20 | "ros2cli.command": [ 21 | "llama = llama_cli.command.llama:LlamaCommand", 22 | ], 23 | "llama_cli.verb": [ 24 | "launch = llama_cli.verb.launch:LaunchVerb", 25 | "prompt = llama_cli.verb.prompt:PromptVerb", 26 | ], 27 | }, 28 | ) 29 | -------------------------------------------------------------------------------- /llama_cli/test/test_flake8.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Open Source Robotics Foundation, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ament_flake8.main import main_with_errors 16 | import pytest 17 | 18 | 19 | @pytest.mark.flake8 20 | @pytest.mark.linter 21 | def test_flake8(): 22 | rc, errors = main_with_errors(argv=[]) 23 | assert rc == 0, "Found %d code style errors / warnings:\n" % len(errors) + "\n".join( 24 | errors 25 | ) 26 | -------------------------------------------------------------------------------- /.github/workflows/jazzy-docker-push.yml: -------------------------------------------------------------------------------- 1 | name: Jazzy Docker Push 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | jazzy_docker_push: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | with: 14 | fetch-depth: 0 15 | 16 | - name: Get tag name 17 | run: | 18 | tag_name=$(git describe --tags --abbrev=0 HEAD^) 19 | echo "tag_name=$tag_name" >> $GITHUB_ENV 20 | 21 | - name: Set up Docker Buildx 22 | uses: docker/setup-buildx-action@v3 23 | 24 | - name: Login to Docker Hub 25 | uses: docker/login-action@v3 26 | with: 27 | username: ${{ secrets.DOCKERHUB_USERNAME }} 28 | password: ${{ secrets.DOCKERHUB_TOKEN }} 29 | 30 | - name: Build and push docker 31 | uses: docker/build-push-action@v6 32 | with: 33 | build-args: ROS_DISTRO=jazzy 34 | push: true 35 | tags: mgons/llama_ros:jazzy-${{ github.event.release.tag_name }} 36 | -------------------------------------------------------------------------------- /.github/workflows/kilted-docker-push.yml: -------------------------------------------------------------------------------- 1 | name: Kilted Docker Push 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | kilted_docker_push: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | with: 14 | fetch-depth: 0 15 | 16 | - name: Get tag name 17 | run: | 18 | tag_name=$(git describe --tags --abbrev=0 HEAD^) 19 | echo "tag_name=$tag_name" >> $GITHUB_ENV 20 | 21 | - name: Set up Docker Buildx 22 | uses: docker/setup-buildx-action@v3 23 | 24 | - name: Login to Docker Hub 25 | uses: docker/login-action@v3 26 | with: 27 | username: ${{ secrets.DOCKERHUB_USERNAME }} 28 | password: ${{ secrets.DOCKERHUB_TOKEN }} 29 | 30 | - name: Build and push docker 31 | uses: docker/build-push-action@v6 32 | with: 33 | build-args: ROS_DISTRO=kilted 34 | push: true 35 | tags: mgons/llama_ros:kilted-${{ github.event.release.tag_name }} 36 | -------------------------------------------------------------------------------- /.github/workflows/rolling-docker-push.yml: -------------------------------------------------------------------------------- 1 | name: Rolling Docker Push 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | rolling_docker_push: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | with: 14 | fetch-depth: 0 15 | 16 | - name: Get tag name 17 | run: | 18 | tag_name=$(git describe --tags --abbrev=0 HEAD^) 19 | echo "tag_name=$tag_name" >> $GITHUB_ENV 20 | 21 | - name: Set up Docker Buildx 22 | uses: docker/setup-buildx-action@v3 23 | 24 | - name: Login to Docker Hub 25 | uses: docker/login-action@v3 26 | with: 27 | username: ${{ secrets.DOCKERHUB_USERNAME }} 28 | password: ${{ secrets.DOCKERHUB_TOKEN }} 29 | 30 | - name: Build and push docker 31 | uses: docker/build-push-action@v6 32 | with: 33 | build-args: ROS_DISTRO=rolling 34 | push: true 35 | tags: mgons/llama_ros:rolling-${{ github.event.release.tag_name }} 36 | -------------------------------------------------------------------------------- /llama_cli/test/test_copyright.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Open Source Robotics Foundation, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ament_copyright.main import main 16 | import pytest 17 | 18 | 19 | # Remove the `skip` decorator once the source file(s) have a copyright header 20 | @pytest.mark.skip( 21 | reason="No copyright header has been placed in the generated source file." 22 | ) 23 | @pytest.mark.copyright 24 | @pytest.mark.linter 25 | def test_copyright(): 26 | rc = main(argv=[".", "test"]) 27 | assert rc == 0, "Found errors" 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Miguel Ángel González Santamarta 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /llama_msgs/msg/ModelInfo.msg: -------------------------------------------------------------------------------- 1 | uint64 context_length # Length of the context (in tokens) that the model was trained on. For most architectures, this is the hard limit on the length of the input 2 | uint64 embedding_length # Embedding layer size 3 | uint64 block_count # The number of blocks of attention+feed-forward layers (i.e. the bulk of the LLM). Does not include the input or embedding layers 4 | uint64 feed_forward_length # The length of the feed-forward layer 5 | 6 | bool use_parallel_residual # Whether or not the parallel residual logic should be used 7 | string tensor_data_layout # When a model is converted to GGUF, tensors may be rearranged to improve performance. This key describes the layout of the tensor data 8 | 9 | uint32 expert_count # Number of experts in MoE models (optional for non-MoE arches) 10 | uint32 expert_used_count # Number of experts used during each token token evaluation (optional for non-MoE arches) 11 | 12 | AttentionInfo attention # Attention info for the model 13 | RoPEInfo rope # RoPE info for the model -------------------------------------------------------------------------------- /llama_bt/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | llama_bt 5 | 1.0.0 6 | A package containing behavior trees plugins for the llama_ros stack. 7 | Alberto Tudela 8 | MIT 9 | Alberto Tudela 10 | ament_cmake 11 | behaviortree_cpp_v3 12 | behaviortree_cpp 13 | ament_index_cpp 14 | rclcpp 15 | rclcpp_action 16 | rclcpp_lifecycle 17 | llama_ros 18 | llama_msgs 19 | nlohmann-json-dev 20 | ament_cmake_clang_format 21 | ament_cmake_gtest 22 | 23 | ament_cmake 24 | 25 | -------------------------------------------------------------------------------- /llama_msgs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(llama_msgs) 3 | 4 | # find dependencies 5 | find_package(ament_cmake REQUIRED) 6 | find_package(rosidl_default_generators REQUIRED) 7 | find_package(sensor_msgs REQUIRED) 8 | 9 | rosidl_generate_interfaces(${PROJECT_NAME} 10 | "msg/TokenProb.msg" 11 | "msg/TokenProbArray.msg" 12 | "msg/Response.msg" 13 | "msg/PartialResponse.msg" 14 | "msg/LogitBias.msg" 15 | "msg/LogitBiasArray.msg" 16 | "msg/SamplingConfig.msg" 17 | "msg/LoRA.msg" 18 | "msg/GeneralInfo.msg" 19 | "msg/TokenizerInfo.msg" 20 | "msg/AttentionInfo.msg" 21 | "msg/RoPEInfo.msg" 22 | "msg/ModelInfo.msg" 23 | "msg/Metadata.msg" 24 | "msg/GrammarTrigger.msg" 25 | "msg/ChatMessage.msg" 26 | "msg/ChatTool.msg" 27 | "msg/ChatContent.msg" 28 | "msg/ChatToolCall.msg" 29 | "msg/ChatReqTool.msg" 30 | "msg/ChatChoice.msg" 31 | "msg/ChatChoiceChunk.msg" 32 | "msg/ChatDeltaChunk.msg" 33 | "msg/UsageStats.msg" 34 | "msg/ChatReasoningFormat.msg" 35 | 36 | "action/GenerateResponse.action" 37 | "action/GenerateChatCompletions.action" 38 | 39 | "srv/GenerateEmbeddings.srv" 40 | "srv/Tokenize.srv" 41 | "srv/Detokenize.srv" 42 | "srv/ListLoRAs.srv" 43 | "srv/UpdateLoRAs.srv" 44 | "srv/RerankDocuments.srv" 45 | "srv/GetMetadata.srv" 46 | DEPENDENCIES sensor_msgs 47 | ) 48 | 49 | ament_package() 50 | -------------------------------------------------------------------------------- /.github/workflows/doxygen-deployment.yml: -------------------------------------------------------------------------------- 1 | name: Doxygen Deployment 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | doxygen_generation: 9 | runs-on: ubuntu-latest 10 | 11 | permissions: 12 | contents: write 13 | 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | 20 | - name: Generate Doxygen 21 | uses: mattnotmitt/doxygen-action@edge 22 | with: 23 | doxyfile-path: ".github/Doxyfile" 24 | 25 | - name: Create redirect for /latest/ 26 | run: | 27 | mkdir -p redirect 28 | echo '' > redirect/index.html 29 | 30 | - name: Deploy Doxygen page 31 | uses: peaceiris/actions-gh-pages@v4 32 | with: 33 | github_token: ${{ secrets.GITHUB_TOKEN }} 34 | publish_branch: gh-pages 35 | publish_dir: docs/html 36 | destination_dir: ${{ github.event.release.tag_name }} 37 | keep_files: true 38 | 39 | - name: Deploy redirect to /latest/ 40 | uses: peaceiris/actions-gh-pages@v4 41 | with: 42 | github_token: ${{ secrets.GITHUB_TOKEN }} 43 | publish_branch: gh-pages 44 | publish_dir: redirect 45 | destination_dir: latest 46 | -------------------------------------------------------------------------------- /llama_cli/llama_cli/verb/launch.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2024 Miguel Ángel González Santamarta 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | 24 | from ros2cli.verb import VerbExtension 25 | from llama_cli.api import launch_llm 26 | 27 | 28 | class LaunchVerb(VerbExtension): 29 | 30 | def add_arguments(self, parser, cli_name): 31 | arg = parser.add_argument("file_path", help="path to the YAML of the LLM") 32 | 33 | def main(self, *, args): 34 | launch_llm(args.file_path) 35 | -------------------------------------------------------------------------------- /.github/workflows/create-release.yml: -------------------------------------------------------------------------------- 1 | name: Create Release 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | 7 | jobs: 8 | create_release: 9 | if: startsWith(github.event.head_commit.message, 'new version') 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@v4 16 | with: 17 | fetch-depth: 0 18 | 19 | - name: Extract version from commit message 20 | run: | 21 | if [[ "${{ github.event.head_commit.message }}" =~ new\ version\ ([0-9]+\.[0-9]+\.[0-9]+) ]]; then 22 | echo "version=${BASH_REMATCH[1]}" >> $GITHUB_ENV 23 | else 24 | echo "Commit message does not match 'new version *.*.*' format." 25 | exit 1 26 | fi 27 | 28 | - name: Get previous tag 29 | run: | 30 | previous_tag=$(git describe --tags --abbrev=0 HEAD^) 31 | echo "previous_tag=$previous_tag" >> $GITHUB_ENV 32 | 33 | - name: Generate release notes with commit messages 34 | run: | 35 | commits=$(git log "${{ env.previous_tag }}..HEAD" --oneline) 36 | echo "release_body<> $GITHUB_ENV 37 | echo "### Changelog from version ${{ env.previous_tag }} to ${{ env.version }}:" >> $GITHUB_ENV 38 | echo "$commits" >> $GITHUB_ENV 39 | echo "EOF" >> $GITHUB_ENV 40 | 41 | - name: Create GitHub release 42 | uses: actions/create-release@latest 43 | env: 44 | GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} 45 | with: 46 | tag_name: "${{ env.version }}" 47 | release_name: "${{ env.version }}" 48 | body: "${{ env.release_body }}" 49 | draft: false 50 | prerelease: false 51 | -------------------------------------------------------------------------------- /llama_msgs/msg/GeneralInfo.msg: -------------------------------------------------------------------------------- 1 | string architecture # Describes what architecture this model implements 2 | uint32 quantization_version # The version of the quantization format. Not required if the model is not quantized 3 | uint32 alignment # The global alignment to use 4 | 5 | string name # The name of the model. This should be a human-readable name that can be used to identify the model. It should be unique within the community that the model is defined in 6 | string author # The author of the model 7 | string version # The version of the model 8 | string organization # The organization of the model 9 | 10 | string basename # The base model name / architecture of the model 11 | string finetune # What has the base model been optimized toward 12 | string description # Free-form description of the model including anything that isn't covered by the other fields 13 | string quantized_by # The name of the individual who quantized the model 14 | string size_label # Size class of the model, such as number of weights and experts 15 | 16 | string license # License of the model 17 | string license_name # Human friendly license name 18 | string license_link # URL to the license 19 | 20 | string url # URL to the model's homepage 21 | string repo_url # URL to the model's repository such as a GitHub repo or HuggingFace repo 22 | string doi # Digital Object Identifier (DOI) 23 | string uuid # Universally unique identifier 24 | 25 | string file_type # An enumerated value describing the type of the majority of the tensors in the file -------------------------------------------------------------------------------- /llama_bringup/launch/spaetzle.launch.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2024 Miguel Ángel González Santamarta 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | 24 | import os 25 | from launch import LaunchDescription 26 | from llama_bringup.utils import create_llama_launch_from_yaml 27 | from ament_index_python.packages import get_package_share_directory 28 | 29 | 30 | def generate_launch_description(): 31 | return LaunchDescription( 32 | [ 33 | create_llama_launch_from_yaml( 34 | os.path.join( 35 | get_package_share_directory("llama_bringup"), 36 | "models", 37 | "Spaetzle.yaml", 38 | ) 39 | ) 40 | ] 41 | ) 42 | -------------------------------------------------------------------------------- /llama_bringup/launch/minicpm-2.6.launch.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2024 Miguel Ángel González Santamarta 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | 24 | import os 25 | from launch import LaunchDescription 26 | from llama_bringup.utils import create_llama_launch_from_yaml 27 | from ament_index_python.packages import get_package_share_directory 28 | 29 | 30 | def generate_launch_description(): 31 | return LaunchDescription( 32 | [ 33 | create_llama_launch_from_yaml( 34 | os.path.join( 35 | get_package_share_directory("llama_bringup"), 36 | "models", 37 | "MiniCPM-2.6.yaml", 38 | ) 39 | ) 40 | ] 41 | ) 42 | -------------------------------------------------------------------------------- /llama_ros/src/llava_main.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2024 Miguel Ángel González Santamarta 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | 25 | #include "llava_ros/llava_node.hpp" 26 | 27 | using namespace llava_ros; 28 | 29 | int main(int argc, char *argv[]) { 30 | 31 | rclcpp::init(argc, argv); 32 | 33 | auto node = std::make_shared(); 34 | node->configure(); 35 | node->activate(); 36 | 37 | rclcpp::executors::SingleThreadedExecutor executor; 38 | executor.add_node(node->get_node_base_interface()); 39 | 40 | executor.spin(); 41 | 42 | executor.remove_node(node->get_node_base_interface()); 43 | node.reset(); 44 | 45 | rclcpp::shutdown(); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /llama_ros/src/llama_main.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2023 Miguel Ángel González Santamarta 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | 25 | #include "llama_ros/llama_node.hpp" 26 | 27 | using namespace llama_ros; 28 | 29 | int main(int argc, char *argv[]) { 30 | 31 | rclcpp::init(argc, argv); 32 | 33 | auto node = std::make_shared(); 34 | node->configure(); 35 | node->activate(); 36 | 37 | rclcpp::executors::SingleThreadedExecutor executor; 38 | executor.add_node(node->get_node_base_interface()); 39 | 40 | executor.spin(); 41 | 42 | executor.remove_node(node->get_node_base_interface()); 43 | node.reset(); 44 | 45 | rclcpp::shutdown(); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /llama_cli/llama_cli/command/llama.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2024 Miguel Ángel González Santamarta 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | 24 | from ros2cli.command import add_subparsers_on_demand 25 | from ros2cli.command import CommandExtension 26 | 27 | 28 | class LlamaCommand(CommandExtension): 29 | 30 | def add_arguments(self, parser, cli_name): 31 | self._subparser = parser 32 | add_subparsers_on_demand( 33 | parser, cli_name, "_verb", "llama_cli.verb", required=False 34 | ) 35 | 36 | def main(self, *, parser, args): 37 | if not hasattr(args, "_verb"): 38 | self._subparser.print_help() 39 | return 0 40 | 41 | extension = getattr(args, "_verb") 42 | return extension.main(args=args) 43 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/llama_embeddings_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Miguel Ángel González Santamarta 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | 26 | import sys 27 | import rclpy 28 | from llama_ros.llama_client_node import LlamaClientNode 29 | from llama_msgs.srv import GenerateEmbeddings 30 | 31 | 32 | def main(): 33 | if len(sys.argv) < 2: 34 | prompt = "This is the test to create embeddings using llama_ros" 35 | else: 36 | prompt = " ".join(sys.argv[1:]) 37 | 38 | rclpy.init() 39 | 40 | llama_client = LlamaClientNode.get_instance() 41 | 42 | emb_req = GenerateEmbeddings.Request() 43 | emb_req.prompt = prompt 44 | 45 | emb = llama_client.generate_embeddings(emb_req).embeddings 46 | print(f"{emb}") 47 | rclpy.shutdown() 48 | 49 | 50 | if __name__ == "__main__": 51 | main() 52 | -------------------------------------------------------------------------------- /llama_msgs/action/GenerateChatCompletions.action: -------------------------------------------------------------------------------- 1 | ChatMessage[] messages # List of chat messages 2 | bool add_generation_prompt # Add generation prompt to the chat messages 3 | bool use_jinja # Use Jinja templating for the chat messages. Required for tool calls 4 | ChatReqTool[] tools # List of tools to use 5 | int32 tool_choice # TOOL_CHOICE_AUTO = 0, TOOL_CHOICE_REQUIRED = 1, TOOL_CHOICE_NONE = 2 6 | bool extract_reasoning # Extract reasoning from the chat messages 7 | SamplingConfig sampling_config # Sampling configuration 8 | ChatReasoningFormat reasoning_format # Reasoning format to use for the chat messages 9 | sensor_msgs/Image[] images # Images to be used in the chat messages 10 | std_msgs/UInt8MultiArray[] audios # Audios for mtmd 11 | bool parallel_tool_calls # Return more than one tool call per message 12 | bool stream # Stream the chat messages 13 | --- 14 | string id # Unique ID for the chat completion 15 | ChatChoice[] choices # List of chat completions 16 | int32 created # Time of creation 17 | string model # Model used for the chat completion 18 | string system_fingerprint # System fingerprint 19 | string object "chat.completion" # Object type 20 | UsageStats usage # Usage statistics 21 | --- 22 | string id # Unique ID for the chat completion 23 | ChatChoiceChunk[] choices # List of chat chunks 24 | int32 created # Time of creation 25 | string model # Model used for the chat completion 26 | string system_fingerprint # System fingerprint 27 | string object "chat.completion.chunk" # Object type 28 | UsageStats usage # Usage statistics -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG ROS_DISTRO=rolling 2 | FROM ros:${ROS_DISTRO} AS deps 3 | 4 | # Create ros2_ws and copy files 5 | WORKDIR /root/ros2_ws 6 | SHELL ["/bin/bash", "-c"] 7 | COPY . /root/ros2_ws/src 8 | 9 | # Install dependencies 10 | RUN apt-get update \ 11 | && apt-get -y --quiet --no-install-recommends install \ 12 | gcc \ 13 | git \ 14 | wget \ 15 | python3 \ 16 | python3-pip 17 | 18 | # Clone behavior_tree if ROS_DISTRO is rolling 19 | RUN if [ "$ROS_DISTRO" = "rolling" ]; then \ 20 | git clone https://github.com/BehaviorTree/BehaviorTree.CPP src/BehaviorTree.CPP; \ 21 | fi 22 | 23 | # Install rosdep 24 | RUN apt update && rosdep install --from-paths src --ignore-src -r -y 25 | 26 | # Check if ubuntu version is 24.04 or later 27 | RUN if [ "$(lsb_release -rs)" = "24.04" ] || [ "$(lsb_release -rs)" = "24.10" ]; then \ 28 | pip3 install -r src/requirements.txt --break-system-packages --ignore-installed; \ 29 | else \ 30 | pip3 install -r src/requirements.txt; \ 31 | fi 32 | 33 | # Install CUDA nvcc 34 | ARG USE_CUDA 35 | ARG CUDA_VERSION=12-6 36 | 37 | RUN if [ "$USE_CUDA" = "1" ]; then \ 38 | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb && \ 39 | dpkg -i cuda-keyring_1.1-1_all.deb && \ 40 | rm cuda-keyring_1.1-1_all.deb; \ 41 | apt-get update && apt-get install -y cuda-toolkit-$CUDA_VERSION; \ 42 | echo "export PATH=/usr/local/cuda/bin${PATH:+:${PATH}}" >> ~/.bashrc; \ 43 | echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" >> ~/.bashrc; \ 44 | fi 45 | 46 | # Colcon the ws 47 | FROM deps AS builder 48 | ARG CMAKE_BUILD_TYPE=Release 49 | 50 | ENV PATH=/usr/local/cuda/bin${PATH:+:${PATH}} 51 | ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64 52 | 53 | RUN source /opt/ros/${ROS_DISTRO}/setup.bash && \ 54 | if [ "$USE_CUDA" = "1" ]; then \ 55 | source ~/.bashrc && \ 56 | colcon build --cmake-args -DGGML_CUDA=ON; \ 57 | else \ 58 | colcon build; \ 59 | fi 60 | 61 | # Source the ROS 2 setup file 62 | RUN echo "source /root/ros2_ws/install/setup.bash" >> ~/.bashrc 63 | 64 | # Run a default command, e.g., starting a bash shell 65 | CMD ["bash"] 66 | -------------------------------------------------------------------------------- /llama_bt/test/test_register.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2025 Alberto J. Tudela Roldán 4 | // Copyright (c) 2025 Grupo Avispa, DTE, Universidad de Málaga 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files (the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions: 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | // SOFTWARE. 23 | 24 | #if defined(BTV3) 25 | #include "behaviortree_cpp_v3/bt_factory.h" 26 | #include "behaviortree_cpp_v3/utils/shared_library.h" 27 | #else 28 | #include "behaviortree_cpp/bt_factory.h" 29 | #include "behaviortree_cpp/utils/shared_library.h" 30 | #endif 31 | 32 | #include "rclcpp/rclcpp.hpp" 33 | #include 34 | 35 | TEST(LlamaBT, register_nodes) { 36 | BT::BehaviorTreeFactory factory; 37 | BT::SharedLibrary loader; 38 | 39 | factory.registerFromPlugin( 40 | loader.getOSName("llama_generate_response_action_bt_node")); 41 | 42 | factory.registerFromPlugin( 43 | loader.getOSName("llama_generate_chat_completions_action_bt_node")); 44 | } 45 | 46 | int main(int argc, char **argv) { 47 | ::testing::InitGoogleTest(&argc, argv); 48 | 49 | // initialize ROS 50 | rclcpp::init(argc, argv); 51 | 52 | bool all_successful = RUN_ALL_TESTS(); 53 | 54 | // shutdown ROS 55 | rclcpp::shutdown(); 56 | 57 | return all_successful; 58 | } -------------------------------------------------------------------------------- /llama_cli/llama_cli/verb/prompt.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2024 Miguel Ángel González Santamarta 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | 24 | from ros2cli.verb import VerbExtension 25 | from llama_cli.api import prompt_llm, positive_float 26 | 27 | 28 | class PromptVerb(VerbExtension): 29 | 30 | def add_arguments(self, parser, cli_name): 31 | arg = parser.add_argument("prompt", help="prompt text for the LLM") 32 | parser.add_argument( 33 | "-r", 34 | "--reset", 35 | action="store_true", 36 | help="Whether to reset the LLM and its context before prompting", 37 | ) 38 | parser.add_argument( 39 | "-t", 40 | "--temp", 41 | metavar="N", 42 | type=positive_float, 43 | default=0.8, 44 | help="Temperature value (default: 0.8)", 45 | ) 46 | parser.add_argument( 47 | "--image-url", type=str, default="", help="Image URL to sent to the VLM" 48 | ) 49 | 50 | def main(self, *, args): 51 | prompt_llm( 52 | args.prompt, reset=args.reset, temp=args.temp, image_url=args.image_url 53 | ) 54 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/llama_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2023 Miguel Ángel González Santamarta 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | 26 | import sys 27 | import time 28 | import rclpy 29 | from llama_ros.llama_client_node import LlamaClientNode 30 | from llama_msgs.action import GenerateResponse 31 | 32 | 33 | def text_cb(feedback): 34 | global eval_time, tokens 35 | if eval_time < 0: 36 | eval_time = time.time() 37 | tokens += 1 38 | print(feedback.feedback.partial_response.text, end="", flush=True) 39 | 40 | 41 | def main(): 42 | if len(sys.argv) < 2: 43 | prompt = "Do you know the city of León from Spain? Can you tell me a bit about its history?" 44 | else: 45 | prompt = " ".join(sys.argv[1:]) 46 | 47 | global tokens, eval_time 48 | tokens = 0 49 | eval_time = -1 50 | 51 | rclpy.init() 52 | llama_client = LlamaClientNode.get_instance() 53 | 54 | goal = GenerateResponse.Goal() 55 | goal.prompt = prompt 56 | goal.sampling_config.temp = 0.2 57 | 58 | initial_time = time.time() 59 | llama_client.generate_response(goal, text_cb) 60 | end_time = time.time() 61 | 62 | print(f"Time to eval: {eval_time - initial_time:.4f} s") 63 | print(f"Prediction speed: {tokens / (end_time - eval_time):.4f} t/s") 64 | rclpy.shutdown() 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /llama_ros/llama_ros/langchain/llama_ros_embeddings.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2024 Miguel Ángel González Santamarta 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | 24 | from typing import Dict, List 25 | from pydantic import BaseModel, model_validator 26 | from langchain_core.embeddings import Embeddings 27 | 28 | from llama_msgs.srv import GenerateEmbeddings 29 | from llama_ros.llama_client_node import LlamaClientNode 30 | 31 | 32 | class LlamaROSEmbeddings(BaseModel, Embeddings): 33 | 34 | llama_client: LlamaClientNode = None 35 | normalization: int = 2 36 | 37 | class Config: 38 | arbitrary_types_allowed = True 39 | 40 | @model_validator(mode="before") 41 | @classmethod 42 | def validate_environment(cls, values: Dict) -> Dict: 43 | values["llama_client"] = LlamaClientNode.get_instance() 44 | return values 45 | 46 | def __call_generate_embedding_srv(self, text: str) -> List[int]: 47 | req = GenerateEmbeddings.Request() 48 | req.prompt = text 49 | req.normalization = self.normalization 50 | return self.llama_client.generate_embeddings(req).embeddings 51 | 52 | def embed_documents(self, texts: List[str]) -> List[List[float]]: 53 | embeddings = [self.__call_generate_embedding_srv(text) for text in texts] 54 | return [list(map(float, e)) for e in embeddings] 55 | 56 | def embed_query(self, text: str) -> List[float]: 57 | embedding = self.__call_generate_embedding_srv(text) 58 | return list(map(float, embedding)) 59 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_langgraph_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import time 28 | import rclpy 29 | from random import randint 30 | from langchain.tools import tool 31 | from langchain_core.messages import HumanMessage 32 | from langgraph.prebuilt import create_react_agent 33 | from llama_ros.langchain import ChatLlamaROS 34 | 35 | 36 | @tool 37 | def get_inhabitants(city: str) -> int: 38 | """Get the current temperature of a city""" 39 | return randint(4_000_000, 8_000_000) 40 | 41 | 42 | @tool 43 | def get_curr_temperature(city: str) -> int: 44 | """Get the current temperature of a city""" 45 | return randint(20, 30) 46 | 47 | 48 | def main(): 49 | rclpy.init() 50 | chat = ChatLlamaROS(temp=0.0) 51 | agent_executor = create_react_agent(chat, [get_inhabitants, get_curr_temperature]) 52 | 53 | initial_time = time.time() 54 | response = agent_executor.invoke( 55 | { 56 | "messages": [ 57 | HumanMessage( 58 | content="What is the current temperature in Madrid? And its inhabitants?" 59 | ) 60 | ] 61 | } 62 | ) 63 | 64 | end_time = time.time() 65 | print(f"\nResponse: {response['messages'][-1].content}") 66 | print(f"Time to run the agent: {(end_time - initial_time):.2f} s") 67 | 68 | rclpy.shutdown() 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /llama_ros/llama_ros/langchain/llama_ros_reranker.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2024 Miguel Ángel González Santamarta 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | 24 | import operator 25 | from typing import Optional, Sequence, Dict 26 | from pydantic import model_validator 27 | 28 | from langchain_core.callbacks import Callbacks 29 | from langchain_core.documents import BaseDocumentCompressor, Document 30 | 31 | from llama_msgs.srv import RerankDocuments 32 | from llama_ros.llama_client_node import LlamaClientNode 33 | 34 | 35 | class LlamaROSReranker(BaseDocumentCompressor): 36 | 37 | llama_client: LlamaClientNode = None 38 | top_n: int = 3 39 | 40 | class Config: 41 | arbitrary_types_allowed = True 42 | 43 | @model_validator(mode="before") 44 | @classmethod 45 | def validate_environment(cls, values: Dict) -> Dict: 46 | values["llama_client"] = LlamaClientNode.get_instance() 47 | return values 48 | 49 | def compress_documents( 50 | self, 51 | documents: Sequence[Document], 52 | query: str, 53 | callbacks: Optional[Callbacks] = None, 54 | ) -> Sequence[Document]: 55 | 56 | req = RerankDocuments.Request() 57 | req.query = query 58 | 59 | for doc in documents: 60 | req.documents.append(doc.page_content) 61 | 62 | scores = self.llama_client.rerank_documents(req).scores 63 | scored_docs = list(zip(documents, scores)) 64 | result = sorted(scored_docs, key=operator.itemgetter(1), reverse=True) 65 | return [doc for doc, _ in result[: self.top_n]] 66 | -------------------------------------------------------------------------------- /llama_bt/src/action/generate_response_action.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2025 Alberto J. Tudela Roldán 4 | // Copyright (c) 2025 Grupo Avispa, DTE, Universidad de Málaga 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files (the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions: 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | // SOFTWARE. 23 | 24 | #include 25 | #include 26 | 27 | #include "llama_bt/action/generate_response_action.hpp" 28 | 29 | namespace llama_bt { 30 | 31 | GenerateResponseAction::GenerateResponseAction( 32 | const std::string &xml_tag_name, const std::string &action_name, 33 | const BT::NodeConfiguration &conf) 34 | : llama_bt::BtActionNode( 35 | xml_tag_name, action_name, conf) {} 36 | 37 | void GenerateResponseAction::on_tick() { 38 | std::string prompt; 39 | getInput("prompt", prompt); 40 | std::vector stop; 41 | getInput("stop", stop); 42 | bool reset; 43 | getInput("reset", reset); 44 | 45 | goal_.prompt = prompt; 46 | goal_.stop = stop; 47 | goal_.reset = reset; 48 | } 49 | 50 | BT::NodeStatus GenerateResponseAction::on_success() { 51 | setOutput("response", result_.result->response.text); 52 | return BT::NodeStatus::SUCCESS; 53 | } 54 | 55 | } // namespace llama_bt 56 | 57 | #if defined(BTV3) 58 | #include "behaviortree_cpp_v3/bt_factory.h" 59 | #else 60 | #include "behaviortree_cpp/bt_factory.h" 61 | #endif 62 | 63 | BT_REGISTER_NODES(factory) { 64 | BT::NodeBuilder builder = [](const std::string &name, 65 | const BT::NodeConfiguration &config) { 66 | return std::make_unique( 67 | name, "generate_response", config); 68 | }; 69 | 70 | factory.registerBuilder("GenerateResponse", 71 | builder); 72 | } -------------------------------------------------------------------------------- /llama_ros/include/llama_utils/spinner.hpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2023 Miguel Ángel González Santamarta 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LLAMA_UTILS__SPINNER_HPP 24 | #define LLAMA_UTILS__SPINNER_HPP 25 | 26 | #include 27 | #include 28 | 29 | namespace llama_utils { 30 | 31 | /** 32 | * @class Spinner 33 | * @brief A utility class for displaying a spinning animation in the terminal. 34 | */ 35 | class Spinner { 36 | 37 | public: 38 | /** 39 | * @brief Constructs a Spinner object. 40 | * 41 | * Initializes the spinner animation characters and sets the starting index. 42 | */ 43 | 44 | Spinner() { 45 | this->spinner = "-\\|/"; 46 | this->index = 0; 47 | } 48 | 49 | /** 50 | * @brief Displays the spinner animation with an optional text message. 51 | * 52 | * @param text The text to display alongside the spinner. Defaults to an empty 53 | * string. 54 | */ 55 | void spin(std::string text) { 56 | fprintf(stderr, "%c %s\n", spinner[index], text.c_str()); 57 | fflush(stderr); 58 | fprintf(stderr, "\033[1A\033[2K"); 59 | index = (index + 1) % 4; 60 | } 61 | 62 | /** 63 | * @brief Displays the spinner animation without any text. 64 | */ 65 | void spin() { this->spin(""); } 66 | 67 | private: 68 | /** 69 | * @brief The spinner characters used for the animation. 70 | * 71 | * The spinner consists of four characters that are displayed in a loop to 72 | * create the spinning effect. 73 | */ 74 | const char *spinner; 75 | 76 | /** 77 | * @brief The current index of the spinner character being displayed. 78 | * 79 | * This index is used to determine which character from the spinner string to 80 | * display next. 81 | */ 82 | int index = 0; 83 | }; 84 | 85 | } // namespace llama_utils 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /llama_ros/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(llama_ros) 3 | 4 | if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") 5 | add_compile_options(-Wall -Wextra -Wpedantic) 6 | endif() 7 | 8 | # find dependencies 9 | find_package(ament_cmake REQUIRED) 10 | find_package(rclcpp REQUIRED) 11 | find_package(rclcpp_action REQUIRED) 12 | find_package(rclcpp_lifecycle REQUIRED) 13 | find_package(llama_msgs REQUIRED) 14 | find_package(llama_cpp_vendor REQUIRED) 15 | find_package(Threads REQUIRED) 16 | find_package(cv_bridge REQUIRED) 17 | find_package(OpenCV REQUIRED) 18 | find_package(llama_hfhub_vendor REQUIRED) 19 | 20 | include_directories( 21 | include 22 | ${OpenCV_INCLUDE_DIRS} 23 | ) 24 | 25 | # ROS 2 distros 26 | if("$ENV{ROS_DISTRO}" STREQUAL "foxy" OR "$ENV{ROS_DISTRO}" STREQUAL "galactic" OR "$ENV{ROS_DISTRO}" STREQUAL "humble") 27 | message(STATUS "Using cv_bridge.h") 28 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCV_BRIDGE_H") 29 | else() 30 | message(STATUS "Using cv_bridge.hpp") 31 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCV_BRIDGE_HPP") 32 | endif() 33 | 34 | # llama nodes 35 | add_executable(llama_node 36 | src/llama_ros/llama.cpp 37 | src/llama_utils/logs.cpp 38 | src/llama_utils/chat_utils.cpp 39 | src/llama_utils/llama_params.cpp 40 | src/llama_ros/llama_node.cpp 41 | src/llama_main.cpp 42 | ) 43 | target_link_libraries(llama_node 44 | llama_cpp_vendor::llama 45 | llama_cpp_vendor::common 46 | ${CMAKE_THREAD_LIBS_INIT} 47 | rclcpp::rclcpp 48 | rclcpp_action::rclcpp_action 49 | rclcpp_lifecycle::rclcpp_lifecycle 50 | ${llama_msgs_TARGETS} 51 | ${llama_cpp_vendor_TARGETS} 52 | ${llama_hfhub_vendor_TARGETS} 53 | Threads::Threads 54 | ) 55 | 56 | add_executable(llava_node 57 | src/llama_ros/llama.cpp 58 | src/llava_ros/llava.cpp 59 | src/llama_utils/logs.cpp 60 | src/llama_utils/chat_utils.cpp 61 | src/llama_utils/llama_params.cpp 62 | src/llama_ros/llama_node.cpp 63 | src/llava_ros/llava_node.cpp 64 | src/llava_main.cpp 65 | ) 66 | target_link_libraries(llava_node 67 | cv_bridge::cv_bridge 68 | llama_cpp_vendor::llama 69 | llama_cpp_vendor::mtmd 70 | llama_cpp_vendor::common 71 | ${CMAKE_THREAD_LIBS_INIT} 72 | rclcpp::rclcpp 73 | rclcpp_action::rclcpp_action 74 | rclcpp_lifecycle::rclcpp_lifecycle 75 | ${llama_msgs_TARGETS} 76 | ${llama_cpp_vendor_TARGETS} 77 | ${llama_hfhub_vendor_TARGETS} 78 | Threads::Threads 79 | ) 80 | 81 | ament_export_dependencies(llama_cpp_vendor) 82 | 83 | # INSTALL 84 | install(TARGETS 85 | llama_node 86 | DESTINATION lib/${PROJECT_NAME} 87 | ) 88 | 89 | install(TARGETS 90 | llava_node 91 | DESTINATION lib/${PROJECT_NAME} 92 | ) 93 | 94 | install(DIRECTORY 95 | DESTINATION share/${PROJECT_NAME} 96 | ) 97 | 98 | # TEST 99 | if(BUILD_TESTING) 100 | find_package(ament_cmake_clang_format REQUIRED) 101 | ament_clang_format(CONFIG_FILE .clang-format) 102 | endif() 103 | 104 | ament_python_install_package(${PROJECT_NAME}) 105 | ament_package() 106 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_reasoning_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import sys 28 | import time 29 | import rclpy 30 | from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate 31 | from llama_ros.langchain import ChatLlamaROS 32 | from langchain_core.messages import AIMessage 33 | 34 | 35 | def main(): 36 | if len(sys.argv) < 2: 37 | prompt = "Here we have a book, a laptop and a nail. Please tell me how to stack them onto each other in a stable manner in English." 38 | else: 39 | prompt = " ".join(sys.argv[1:]) 40 | 41 | rclpy.init() 42 | initial_time = -1 43 | chat = ChatLlamaROS(temp=0.2, penalty_last_n=8, enable_thinking=True) 44 | 45 | prompt = ChatPromptTemplate.from_messages( 46 | [ 47 | HumanMessagePromptTemplate.from_template( 48 | template=[ 49 | {"type": "text", "text": f"{prompt}"}, 50 | ] 51 | ), 52 | ] 53 | ) 54 | chain = prompt | chat 55 | 56 | initial_time = time.time() 57 | response: AIMessage = chain.invoke({}) 58 | final_time = time.time() 59 | 60 | print(f"Prompt: {prompt}") 61 | print(f"Response: {response.content.strip()}") 62 | 63 | if "reasoning_content" in response.additional_kwargs: 64 | print( 65 | f"Reasoning length: {len(response.additional_kwargs['reasoning_content'])} characters" 66 | ) 67 | else: 68 | print("No reasoning content. Are you sure you are using a reasoning model?") 69 | 70 | print(f"Time elapsed: {final_time - initial_time:.2f} seconds") 71 | print( 72 | f"Tokens per second: {response.usage_metadata['output_tokens'] / (final_time - initial_time):.2f} t/s" 73 | ) 74 | rclpy.shutdown() 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_audio_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import sys 28 | import time 29 | import rclpy 30 | from langchain_core.messages import SystemMessage 31 | from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate 32 | from langchain_core.output_parsers import StrOutputParser 33 | from llama_ros.langchain import ChatLlamaROS 34 | 35 | 36 | def main(): 37 | if len(sys.argv) < 2: 38 | prompt = "What's that sound?" 39 | else: 40 | prompt = " ".join(sys.argv[1:]) 41 | 42 | tokens = 0 43 | initial_time = -1 44 | eval_time = -1 45 | 46 | rclpy.init() 47 | chat = ChatLlamaROS(temp=0.0) 48 | 49 | prompt = ChatPromptTemplate.from_messages( 50 | [ 51 | SystemMessage("You are an IA that answer questions."), 52 | HumanMessagePromptTemplate.from_template( 53 | template=[ 54 | {"type": "text", "text": f"<__media__>{prompt}"}, 55 | {"type": "image_url", "image_url": "{audio_url}"}, 56 | ] 57 | ), 58 | ] 59 | ) 60 | 61 | chain = prompt | chat | StrOutputParser() 62 | 63 | initial_time = time.time() 64 | for text in chain.stream( 65 | { 66 | "audio_url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/glass-breaking-151256.mp3" 67 | } 68 | ): 69 | tokens += 1 70 | print(text, end="", flush=True) 71 | if eval_time < 0: 72 | eval_time = time.time() 73 | 74 | print("", end="\n", flush=True) 75 | 76 | end_time = time.time() 77 | print(f"Time to eval: {eval_time - initial_time} s") 78 | print(f"Prediction speed: {tokens / (end_time - eval_time)} t/s") 79 | 80 | rclpy.shutdown() 81 | 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import sys 28 | import time 29 | import rclpy 30 | from langchain_core.messages import SystemMessage 31 | from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate 32 | from langchain_core.output_parsers import StrOutputParser 33 | from llama_ros.langchain import ChatLlamaROS 34 | 35 | 36 | def main(): 37 | if len(sys.argv) < 2: 38 | prompt = "Who is the character in the middle?" 39 | else: 40 | prompt = " ".join(sys.argv[1:]) 41 | 42 | tokens = 0 43 | initial_time = -1 44 | eval_time = -1 45 | 46 | rclpy.init() 47 | chat = ChatLlamaROS(temp=0.0) 48 | 49 | prompt = ChatPromptTemplate.from_messages( 50 | [ 51 | SystemMessage("You are an IA that answer questions."), 52 | HumanMessagePromptTemplate.from_template( 53 | template=[ 54 | {"type": "text", "text": f"<__media__>{prompt}"}, 55 | {"type": "image_url", "image_url": "{image_url}"}, 56 | ] 57 | ), 58 | ] 59 | ) 60 | 61 | chain = prompt | chat | StrOutputParser() 62 | 63 | initial_time = time.time() 64 | for text in chain.stream( 65 | { 66 | "image_url": "https://pics.filmaffinity.com/Dragon_Ball_Bola_de_Dragaon_Serie_de_TV-973171538-large.jpg" 67 | } 68 | ): 69 | tokens += 1 70 | print(text, end="", flush=True) 71 | if eval_time < 0: 72 | eval_time = time.time() 73 | 74 | print("", end="\n", flush=True) 75 | 76 | end_time = time.time() 77 | print(f"Time to eval: {eval_time - initial_time} s") 78 | print(f"Prediction speed: {tokens / (end_time - eval_time)} t/s") 79 | 80 | rclpy.shutdown() 81 | 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_structured_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import time 28 | import rclpy 29 | from typing import Optional 30 | from pydantic import BaseModel, Field 31 | from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate 32 | from langchain_core.messages import AIMessage 33 | from llama_ros.langchain import ChatLlamaROS 34 | 35 | 36 | # Pydantic 37 | class Joke(BaseModel): 38 | """Joke to tell user.""" 39 | 40 | setup: str = Field(description="The setup of the joke") 41 | punchline: str = Field(description="The punchline to the joke") 42 | rating: Optional[int] = Field( 43 | default=None, description="How funny the joke is, from 1 to 10" 44 | ) 45 | 46 | 47 | def main(): 48 | rclpy.init() 49 | chat = ChatLlamaROS(temp=0.2, penalty_last_n=8) 50 | 51 | prompt = ChatPromptTemplate.from_messages( 52 | [ 53 | HumanMessagePromptTemplate.from_template( 54 | template=[ 55 | {"type": "text", "text": "{prompt}"}, 56 | ] 57 | ), 58 | ] 59 | ) 60 | 61 | chain = prompt | chat.with_structured_output( 62 | Joke, method="function_calling", include_raw=True 63 | ) 64 | initial_time = time.time() 65 | response = chain.invoke({"prompt": "Tell me a joke about cats"}) 66 | message: AIMessage = response["raw"] 67 | joke: Joke = response["parsed"] 68 | final_time = time.time() 69 | 70 | print(f"Prompt: Tell me a joke about cats") 71 | print(joke.model_dump_json()) 72 | print(f"Time elapsed: {final_time - initial_time:.2f} seconds") 73 | print( 74 | f"Tokens per second: {message.usage_metadata['output_tokens'] / (final_time - initial_time):.2f} t/s" 75 | ) 76 | rclpy.shutdown() 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/llama_rerank_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Miguel Ángel González Santamarta 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | 26 | import sys 27 | import rclpy 28 | import operator 29 | from llama_ros.llama_client_node import LlamaClientNode 30 | from llama_msgs.srv import RerankDocuments 31 | 32 | 33 | def main(): 34 | rclpy.init() 35 | 36 | if len(sys.argv) < 2: 37 | query = "Machine learning is" 38 | else: 39 | query = " ".join(sys.argv[1:]) 40 | 41 | rerank_req = RerankDocuments.Request() 42 | rerank_req.query = query 43 | rerank_req.documents = [ 44 | "A machine is a physical system that uses power to apply forces and control movement to perform an action. The term is commonly applied to artificial devices, such as those employing engines or motors, but also to natural biological macromolecules, such as molecular machines.", 45 | "Learning is the process of acquiring new understanding, knowledge, behaviors, skills, values, attitudes, and preferences. The ability to learn is possessed by humans, non-human animals, and some machines; there is also evidence for some kind of learning in certain plants.", 46 | "Machine learning is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data, and thus perform tasks without explicit instructions.", 47 | "Paris, capitale de la France, est une grande ville européenne et un centre mondial de l'art, de la mode, de la gastronomie et de la culture. Son paysage urbain du XIXe siècle est traversé par de larges boulevards et la Seine.", 48 | ] 49 | 50 | scores = LlamaClientNode.get_instance().rerank_documents(rerank_req).scores 51 | scored_docs = list(zip(rerank_req.documents, scores)) 52 | result = sorted(scored_docs, key=operator.itemgetter(1), reverse=True) 53 | 54 | for i in range(len(result)): 55 | print(f"\t{i} ({result[i][1]}): {result[i][0]}") 56 | 57 | rclpy.shutdown() 58 | 59 | 60 | if __name__ == "__main__": 61 | main() 62 | -------------------------------------------------------------------------------- /llama_ros/llama_ros/langchain/llama_ros.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2024 Miguel Ángel González Santamarta 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | 24 | from typing import Any, Dict, List, Optional, Iterator 25 | 26 | from langchain_core.outputs import GenerationChunk 27 | from langchain_core.language_models.llms import LLM 28 | from langchain.callbacks.manager import CallbackManagerForLLMRun 29 | 30 | from action_msgs.msg import GoalStatus 31 | from llama_msgs.srv import Tokenize 32 | from llama_ros.langchain import LlamaROSCommon 33 | 34 | 35 | class LlamaROS(LLM, LlamaROSCommon): 36 | 37 | @property 38 | def _default_params(self) -> Dict[str, Any]: 39 | return {} 40 | 41 | @property 42 | def _llm_type(self) -> str: 43 | return "llamaros" 44 | 45 | def _call( 46 | self, 47 | prompt: str, 48 | stop: Optional[List[str]] = None, 49 | run_manager: Optional[CallbackManagerForLLMRun] = None, 50 | **kwargs: Any, 51 | ) -> str: 52 | 53 | goal = self._create_action_goal(prompt, stop, **kwargs) 54 | 55 | result, status = self.llama_client.generate_response(goal) 56 | 57 | if status != GoalStatus.STATUS_SUCCEEDED: 58 | return "" 59 | return result.response.text 60 | 61 | def _stream( 62 | self, 63 | prompt: str, 64 | stop: Optional[List[str]] = None, 65 | run_manager: Optional[CallbackManagerForLLMRun] = None, 66 | **kwargs: Any, 67 | ) -> Iterator[GenerationChunk]: 68 | 69 | goal = self._create_action_goal(prompt, stop, **kwargs) 70 | 71 | for pt in self.llama_client.generate_response(goal, stream=True): 72 | 73 | if run_manager: 74 | run_manager.on_llm_new_token( 75 | pt.text, 76 | verbose=self.verbose, 77 | ) 78 | 79 | yield GenerationChunk(text=pt.text) 80 | 81 | def get_num_tokens(self, text: str) -> int: 82 | req = Tokenize.Request() 83 | req.text = text 84 | tokens = self.llama_client.tokenize(req).tokens 85 | return len(tokens) 86 | -------------------------------------------------------------------------------- /llama_cli/llama_cli/api/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2024 Miguel Ángel González Santamarta 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | 24 | from launch import LaunchService 25 | from launch import LaunchDescription 26 | from llama_bringup.utils import create_llama_launch_from_yaml 27 | 28 | import os 29 | import rclpy 30 | from argparse import ArgumentTypeError 31 | from llama_msgs.action import GenerateResponse 32 | from llama_ros.llama_client_node import LlamaClientNode 33 | 34 | import cv2 35 | import numpy as np 36 | import urllib.request 37 | from cv_bridge import CvBridge 38 | 39 | 40 | def positive_float(inval): 41 | try: 42 | ret = float(inval) 43 | except ValueError: 44 | raise ArgumentTypeError("Expects a floating point number") 45 | if ret < 0.0: 46 | raise ArgumentTypeError("Value must be positive") 47 | return ret 48 | 49 | 50 | def launch_llm(file_path: str) -> None: 51 | if not os.path.exists(file_path): 52 | print(f"File '{file_path}' does not exists") 53 | return 54 | 55 | ld = LaunchDescription([create_llama_launch_from_yaml(file_path)]) 56 | ls = LaunchService() 57 | ls.include_launch_description(ld) 58 | ls.run() 59 | 60 | 61 | def prompt_llm( 62 | prompt: str, reset: bool = False, temp: float = 0.8, image_url: str = "" 63 | ) -> None: 64 | 65 | rclpy.init() 66 | llama_client = LlamaClientNode() 67 | goal = GenerateResponse.Goal() 68 | goal.prompt = prompt 69 | goal.reset = reset 70 | goal.sampling_config.temp = temp 71 | 72 | if image_url: 73 | req = urllib.request.Request(image_url, headers={"User-Agent": "Mozilla/5.0"}) 74 | response = urllib.request.urlopen(req) 75 | arr = np.asarray(bytearray(response.read()), dtype=np.uint8) 76 | img = cv2.imdecode(arr, -1) 77 | 78 | cv_bridge = CvBridge() 79 | goal.images.append(cv_bridge.cv2_to_imgmsg(img)) 80 | 81 | last_t = "" 82 | for ele in llama_client.generate_response(goal, stream=True): 83 | last_t = ele.text 84 | print(ele.text, flush=True, end="") 85 | if not last_t.endswith("\n"): 86 | print() 87 | rclpy.shutdown() 88 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_reasoning_tools_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import time 28 | import rclpy 29 | from random import randint 30 | from langchain.tools import tool 31 | from langchain_core.messages import HumanMessage, AIMessage 32 | from llama_ros.langchain import ChatLlamaROS 33 | 34 | 35 | @tool 36 | def get_inhabitants(city: str) -> int: 37 | """Get the current temperature of a city""" 38 | return randint(4_000_000, 8_000_000) 39 | 40 | 41 | @tool 42 | def get_curr_temperature(city: str) -> int: 43 | """Get the current temperature of a city""" 44 | return randint(20, 30) 45 | 46 | 47 | def main(): 48 | rclpy.init() 49 | chat = ChatLlamaROS(temp=0.0, enable_thinking=True) 50 | 51 | messages = [ 52 | HumanMessage("What is the current temperature in Madrid? And its inhabitants?") 53 | ] 54 | 55 | print(f"\nPrompt: {messages[0].content}\n") 56 | llm_tools = chat.bind_tools( 57 | [get_inhabitants, get_curr_temperature], tool_choice="auto" 58 | ) 59 | 60 | initial_time = time.time() 61 | all_tools_res: AIMessage = llm_tools.invoke(messages) 62 | final_time = time.time() 63 | 64 | messages.append(all_tools_res) 65 | 66 | for tool in all_tools_res.tool_calls: 67 | formatted_output = f"{tool['name']}({''.join(tool['args'].values())})" 68 | print(f"Calling tool: {formatted_output}") 69 | 70 | if "reasoning_content" in all_tools_res.additional_kwargs: 71 | print( 72 | f"Reasoning length: {len(all_tools_res.additional_kwargs['reasoning_content'])} characters" 73 | ) 74 | else: 75 | print("No reasoning content. Are you sure you are using a reasoning model?") 76 | 77 | print(f"Time elapsed: {final_time - initial_time:.2f} seconds") 78 | print( 79 | f"Tokens per second: {all_tools_res.usage_metadata['output_tokens'] / (final_time - initial_time):.2f} t/s" 80 | ) 81 | 82 | rclpy.shutdown() 83 | 84 | 85 | if __name__ == "__main__": 86 | main() 87 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/llava_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Miguel Ángel González Santamarta 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | 26 | import sys 27 | import time 28 | import cv2 29 | import numpy as np 30 | import urllib.request 31 | 32 | import rclpy 33 | from cv_bridge import CvBridge 34 | from llama_ros.llama_client_node import LlamaClientNode 35 | from llama_msgs.action import GenerateResponse 36 | 37 | 38 | def load_image_from_url(url): 39 | req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) 40 | response = urllib.request.urlopen(req) 41 | arr = np.asarray(bytearray(response.read()), dtype=np.uint8) 42 | img = cv2.imdecode(arr, -1) 43 | return img 44 | 45 | 46 | def text_cb(feedback): 47 | global eval_time, tokens 48 | if eval_time < 0: 49 | eval_time = time.time() 50 | tokens += 1 51 | print(feedback.feedback.partial_response.text, end="", flush=True) 52 | 53 | 54 | def main(): 55 | prompt = "<__media__>What type of food is the girl holding?" 56 | use_image = True 57 | image_url = "https://i.pinimg.com/474x/32/89/17/328917cc4fe3bd4cfbe2d32aa9cc6e98.jpg" 58 | 59 | if len(sys.argv) > 1: 60 | prompt = sys.argv[1] 61 | if len(sys.argv) > 2: 62 | use_image = sys.argv[2].lower() in ["true", "1", "yes"] 63 | if len(sys.argv) > 3: 64 | image_url = sys.argv[3] 65 | 66 | global tokens, eval_time 67 | tokens = 0 68 | eval_time = -1 69 | 70 | rclpy.init() 71 | cv_bridge = CvBridge() 72 | image = load_image_from_url(image_url) if use_image else None 73 | llama_client = LlamaClientNode.get_instance() 74 | 75 | goal = GenerateResponse.Goal() 76 | goal.prompt = prompt 77 | goal.sampling_config.temp = 0.2 78 | 79 | if use_image and image is not None: 80 | goal.images.append(cv_bridge.cv2_to_imgmsg(image)) 81 | 82 | initial_time = time.time() 83 | llama_client.generate_response(goal, text_cb) 84 | end_time = time.time() 85 | 86 | print(f"Time to eval: {eval_time - initial_time} s") 87 | print(f"Prediction speed: {tokens / (end_time - eval_time)} t/s") 88 | rclpy.shutdown() 89 | 90 | 91 | if __name__ == "__main__": 92 | main() 93 | -------------------------------------------------------------------------------- /llama_demos/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(llama_demos) 3 | 4 | if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") 5 | add_compile_options(-Wall -Wextra -Wpedantic) 6 | endif() 7 | 8 | # find dependencies 9 | find_package(ament_cmake REQUIRED) 10 | 11 | install(PROGRAMS 12 | llama_demos/llama_demo_node.py 13 | DESTINATION lib/${PROJECT_NAME} 14 | RENAME llama_demo_node 15 | ) 16 | 17 | install(PROGRAMS 18 | llama_demos/llama_embeddings_demo_node.py 19 | DESTINATION lib/${PROJECT_NAME} 20 | RENAME llama_embeddings_demo_node 21 | ) 22 | 23 | install(PROGRAMS 24 | llama_demos/llama_rerank_demo_node.py 25 | DESTINATION lib/${PROJECT_NAME} 26 | RENAME llama_rerank_demo_node 27 | ) 28 | 29 | install(PROGRAMS 30 | llama_demos/llama_rag_demo_node.py 31 | DESTINATION lib/${PROJECT_NAME} 32 | RENAME llama_rag_demo_node 33 | ) 34 | 35 | install(PROGRAMS 36 | llama_demos/llava_demo_node.py 37 | DESTINATION lib/${PROJECT_NAME} 38 | RENAME llava_demo_node 39 | ) 40 | 41 | install(PROGRAMS 42 | llama_demos/mtmd_audio_demo_node.py 43 | DESTINATION lib/${PROJECT_NAME} 44 | RENAME mtmd_audio_demo_node 45 | ) 46 | 47 | install(PROGRAMS 48 | llama_demos/chatllama_demo_node.py 49 | DESTINATION lib/${PROJECT_NAME} 50 | RENAME chatllama_demo_node 51 | ) 52 | 53 | install(PROGRAMS 54 | llama_demos/chatllama_audio_demo_node.py 55 | DESTINATION lib/${PROJECT_NAME} 56 | RENAME chatllama_audio_demo_node 57 | ) 58 | 59 | install(PROGRAMS 60 | llama_demos/chatllama_multi_audio_demo_node.py 61 | DESTINATION lib/${PROJECT_NAME} 62 | RENAME chatllama_multi_audio_demo_node 63 | ) 64 | 65 | install(PROGRAMS 66 | llama_demos/chatllama_multi_image_demo_node.py 67 | DESTINATION lib/${PROJECT_NAME} 68 | RENAME chatllama_multi_image_demo_node 69 | ) 70 | 71 | install(PROGRAMS 72 | llama_demos/chatllama_multi_image_user_demo_node.py 73 | DESTINATION lib/${PROJECT_NAME} 74 | RENAME chatllama_multi_image_user_demo_node 75 | ) 76 | 77 | install(PROGRAMS 78 | llama_demos/chatllama_structured_demo_node.py 79 | DESTINATION lib/${PROJECT_NAME} 80 | RENAME chatllama_structured_demo_node 81 | ) 82 | 83 | install(PROGRAMS 84 | llama_demos/chatllama_tools_demo_node.py 85 | DESTINATION lib/${PROJECT_NAME} 86 | RENAME chatllama_tools_demo_node 87 | ) 88 | 89 | install(PROGRAMS 90 | llama_demos/chatllama_streaming_tools_demo_node.py 91 | DESTINATION lib/${PROJECT_NAME} 92 | RENAME chatllama_streaming_tools_demo_node 93 | ) 94 | 95 | install(PROGRAMS 96 | llama_demos/chatllama_langgraph_demo_node.py 97 | DESTINATION lib/${PROJECT_NAME} 98 | RENAME chatllama_langgraph_demo_node 99 | ) 100 | 101 | install(PROGRAMS 102 | llama_demos/chatllama_reasoning_demo_node.py 103 | DESTINATION lib/${PROJECT_NAME} 104 | RENAME chatllama_reasoning_demo_node 105 | ) 106 | 107 | install(PROGRAMS 108 | llama_demos/chatllama_reasoning_tools_demo_node.py 109 | DESTINATION lib/${PROJECT_NAME} 110 | RENAME chatllama_reasoning_tools_demo_node 111 | ) 112 | 113 | install(PROGRAMS 114 | llama_demos/chatllama_pddl_demo_node.py 115 | DESTINATION lib/${PROJECT_NAME} 116 | RENAME chatllama_pddl_demo_node 117 | ) 118 | 119 | ament_python_install_package(${PROJECT_NAME}) 120 | ament_package() 121 | -------------------------------------------------------------------------------- /llama_bt/include/llama_bt/action/generate_response_action.hpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2025 Alberto J. Tudela Roldán 4 | // Copyright (c) 2025 Grupo Avispa, DTE, Universidad de Málaga 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files (the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions: 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | // SOFTWARE. 23 | 24 | #ifndef LLAMA_BT__ACTION__GENERATE_RESPONSE_ACTION_HPP_ 25 | #define LLAMA_BT__ACTION__GENERATE_RESPONSE_ACTION_HPP_ 26 | 27 | #include 28 | #include 29 | 30 | #if defined(BTV3) 31 | #include "llama_bt/action/bt_action_node_v3.hpp" 32 | #else 33 | #include "llama_bt/action/bt_action_node.hpp" 34 | #endif 35 | 36 | #include "llama_msgs/action/generate_response.hpp" 37 | 38 | namespace llama_bt { 39 | 40 | /** 41 | * @brief A llama_bt::BtActionNode class that wraps 42 | * llama_msgs::action::GenerateResponse 43 | */ 44 | class GenerateResponseAction 45 | : public llama_bt::BtActionNode { 46 | public: 47 | /** 48 | * @brief A constructor for llama_bt::GenerateResponse Service 49 | * @param xml_tag_name Name for the XML tag for this node 50 | * @param action_name Action name this node creates a client for 51 | * @param conf BT node configuration 52 | */ 53 | GenerateResponseAction(const std::string &xml_tag_name, 54 | const std::string &action_name, 55 | const BT::NodeConfiguration &conf); 56 | 57 | /** 58 | * @brief Function to perform some user-defined operation on tick 59 | * @return BT::NodeStatus Status of tick execution 60 | */ 61 | void on_tick() override; 62 | 63 | /** 64 | * @brief Function to perform some user-defined operation upon successful 65 | * completion of the action 66 | */ 67 | BT::NodeStatus on_success() override; 68 | 69 | /** 70 | * @brief Creates list of BT ports 71 | * @return BT::PortsList Containing node-specific ports 72 | */ 73 | static BT::PortsList providedPorts() { 74 | return providedBasicPorts({ 75 | BT::InputPort("prompt", "Prompt"), 76 | BT::InputPort>("stop", "Stop list"), 77 | BT::InputPort("reset", false, "Whether to reset the context"), 78 | BT::OutputPort("response", "Final Response"), 79 | }); 80 | } 81 | }; 82 | 83 | } // namespace llama_bt 84 | 85 | #endif // LLAMA_BT__ACTION__GENERATE_RESPONSE_ACTION_HPP_ -------------------------------------------------------------------------------- /llama_bt/test/utils/test_action_server.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Sarthak Mittal 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef TEST_ACTION_SERVER_HPP_ 16 | #define TEST_ACTION_SERVER_HPP_ 17 | 18 | #include 19 | #include 20 | 21 | #include "rclcpp/rclcpp.hpp" 22 | #include "rclcpp_action/rclcpp_action.hpp" 23 | 24 | template class TestActionServer : public rclcpp::Node { 25 | public: 26 | explicit TestActionServer( 27 | std::string action_name, 28 | const rclcpp::NodeOptions &options = rclcpp::NodeOptions()) 29 | : Node("test_action_server", options) { 30 | using namespace std::placeholders; // NOLINT 31 | 32 | this->action_server_ = rclcpp_action::create_server( 33 | this->get_node_base_interface(), this->get_node_clock_interface(), 34 | this->get_node_logging_interface(), 35 | this->get_node_waitables_interface(), action_name, 36 | std::bind(&TestActionServer::handle_goal, this, _1, _2), 37 | std::bind(&TestActionServer::handle_cancel, this, _1), 38 | std::bind(&TestActionServer::handle_accepted, this, _1)); 39 | } 40 | 41 | std::shared_ptr getCurrentGoal() const { 42 | return current_goal_; 43 | } 44 | 45 | void setReturnSuccess(bool return_success) { 46 | return_success_ = return_success; 47 | } 48 | 49 | bool getReturnSuccess(void) { return return_success_; } 50 | 51 | bool isGoalCancelled() { return goal_cancelled_; } 52 | 53 | protected: 54 | virtual rclcpp_action::GoalResponse 55 | handle_goal(const rclcpp_action::GoalUUID &, 56 | std::shared_ptr goal) { 57 | current_goal_ = goal; 58 | return rclcpp_action::GoalResponse::ACCEPT_AND_EXECUTE; 59 | } 60 | 61 | virtual rclcpp_action::CancelResponse 62 | handle_cancel(const typename std::shared_ptr< 63 | rclcpp_action::ServerGoalHandle>) { 64 | goal_cancelled_ = true; 65 | return rclcpp_action::CancelResponse::ACCEPT; 66 | } 67 | 68 | virtual void execute( 69 | const typename std::shared_ptr> 70 | goal_handle) = 0; 71 | 72 | void handle_accepted( 73 | const std::shared_ptr> 74 | goal_handle) { 75 | using namespace std::placeholders; // NOLINT 76 | // this needs to return quickly to avoid blocking the executor, so spin up a 77 | // new thread 78 | std::thread{std::bind(&TestActionServer::execute, this, _1), goal_handle} 79 | .detach(); 80 | } 81 | 82 | private: 83 | typename rclcpp_action::Server::SharedPtr action_server_; 84 | std::shared_ptr current_goal_; 85 | bool return_success_ = true; 86 | bool goal_cancelled_ = false; 87 | }; 88 | 89 | #endif // TEST_ACTION_SERVER_HPP_ -------------------------------------------------------------------------------- /llama_cpp_vendor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(llama_cpp_vendor) 3 | 4 | include(FetchContent) 5 | find_package(ament_cmake REQUIRED) 6 | 7 | FetchContent_Declare( 8 | llama 9 | GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git 10 | GIT_TAG b6692 11 | GIT_SHALLOW TRUE 12 | ) 13 | 14 | option(LLAMA_BUILD_COMMON "llama: build common utils library" ON) 15 | 16 | FetchContent_MakeAvailable(llama) 17 | 18 | # ggml 19 | set_target_properties( 20 | ggml PROPERTIES 21 | OUTPUT_NAME "llama_ggml" 22 | INTERFACE_INCLUDE_DIRECTORIES "$" 23 | CXX_STANDARD 17 24 | ) 25 | 26 | set_target_properties( 27 | ggml-base PROPERTIES 28 | OUTPUT_NAME "llama_ggml_base" 29 | INTERFACE_INCLUDE_DIRECTORIES "$" 30 | CXX_STANDARD 17 31 | ) 32 | 33 | set_target_properties( 34 | ggml-cpu PROPERTIES 35 | OUTPUT_NAME "llama_ggml_cpu" 36 | INTERFACE_INCLUDE_DIRECTORIES "$" 37 | CXX_STANDARD 17 38 | ) 39 | 40 | if(GGML_CUDA) 41 | set_target_properties( 42 | ggml-cuda PROPERTIES 43 | OUTPUT_NAME "llama_ggml_cuda" 44 | INTERFACE_INCLUDE_DIRECTORIES "$" 45 | CXX_STANDARD 17 46 | ) 47 | endif() 48 | 49 | # llama 50 | set_target_properties( 51 | build_info llama common PROPERTIES 52 | INTERFACE_INCLUDE_DIRECTORIES "$" 53 | CXX_STANDARD 17 54 | ) 55 | 56 | # mtmd 57 | add_library(mtmd 58 | ${llama_SOURCE_DIR}/tools/mtmd/clip.cpp 59 | ${llama_SOURCE_DIR}/tools/mtmd/mtmd.cpp 60 | ${llama_SOURCE_DIR}/tools/mtmd/mtmd-helper.cpp 61 | ${llama_SOURCE_DIR}/tools/mtmd/mtmd-audio.cpp 62 | ) 63 | 64 | target_include_directories(mtmd 65 | PUBLIC 66 | $ 67 | $ 68 | $ 69 | $ 70 | $ 71 | $ 72 | ) 73 | 74 | # CUDA 75 | if(GGML_CUDA) 76 | add_compile_definitions(GGML_USE_CUDA) 77 | endif() 78 | 79 | # export 80 | install( 81 | DIRECTORY 82 | ${llama_SOURCE_DIR}/common/ 83 | ${llama_SOURCE_DIR}/ggml/include/ 84 | ${llama_SOURCE_DIR}/tools/mtmd/ 85 | ${llama_SOURCE_DIR}/vendor/nlohmann/ 86 | ${llama_SOURCE_DIR}/vendor/minja/ 87 | DESTINATION include 88 | FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp" 89 | ) 90 | 91 | install( 92 | DIRECTORY 93 | ${llama_SOURCE_DIR}/models/templates/ 94 | DESTINATION share/${PROJECT_NAME}/models/templates 95 | FILES_MATCHING PATTERN "*.jinja" 96 | ) 97 | 98 | set(INSTALL_TARGETS 99 | ggml 100 | ggml-base 101 | ggml-cpu 102 | build_info 103 | common 104 | llama 105 | mtmd 106 | ) 107 | 108 | if(GGML_CUDA) 109 | list(APPEND INSTALL_TARGETS ggml-cuda) 110 | endif() 111 | 112 | install( 113 | TARGETS ${INSTALL_TARGETS} 114 | EXPORT export_llama 115 | LIBRARY DESTINATION lib 116 | INCLUDES DESTINATION include 117 | ) 118 | 119 | ament_export_include_directories(include) 120 | ament_export_targets(export_llama HAS_LIBRARY_TARGET) 121 | ament_package() 122 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_multi_audio_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import time 28 | import rclpy 29 | from langchain_core.messages import SystemMessage, AIMessage 30 | from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate 31 | from langchain_core.output_parsers import StrOutputParser 32 | from llama_ros.langchain import ChatLlamaROS 33 | 34 | 35 | def main(): 36 | 37 | tokens = 0 38 | initial_time = -1 39 | eval_time = -1 40 | 41 | rclpy.init() 42 | chat = ChatLlamaROS(temp=0.0) 43 | 44 | prompt = ChatPromptTemplate.from_messages( 45 | [ 46 | SystemMessage("You are an IA that answer questions."), 47 | HumanMessagePromptTemplate.from_template( 48 | template=[ 49 | {"type": "text", "text": "<__media__>"}, 50 | { 51 | "type": "image_url", 52 | "image_url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/guess_age_gender.wav", 53 | }, 54 | ] 55 | ), 56 | AIMessage(content="Yes, the speaker is female and in her twenties."), 57 | HumanMessagePromptTemplate.from_template( 58 | template=[ 59 | {"type": "text", "text": "<__media__>"}, 60 | { 61 | "type": "image_url", 62 | "image_url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/translate_to_chinese.wav", 63 | }, 64 | ] 65 | ), 66 | ] 67 | ) 68 | 69 | chain = prompt | chat | StrOutputParser() 70 | 71 | initial_time = time.time() 72 | for text in chain.stream({}): 73 | tokens += 1 74 | print(text, end="", flush=True) 75 | if eval_time < 0: 76 | eval_time = time.time() 77 | 78 | print("", end="\n", flush=True) 79 | 80 | end_time = time.time() 81 | print(f"Time to eval: {eval_time - initial_time} s") 82 | print(f"Prediction speed: {tokens / (end_time - eval_time)} t/s") 83 | 84 | rclpy.shutdown() 85 | 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /llama_bt/src/action/generate_chat_completions_action.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2025 Alejandro González Cantón 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | #include 25 | 26 | #include "llama_bt/action/generate_chat_completions_action.hpp" 27 | #include "llama_msgs/msg/chat_tool.hpp" 28 | 29 | namespace llama_bt { 30 | 31 | GenerateChatCompletionsAction::GenerateChatCompletionsAction( 32 | const std::string &xml_tag_name, const std::string &action_name, 33 | const BT::NodeConfiguration &conf) 34 | : llama_bt::BtActionNode( 35 | xml_tag_name, action_name, conf) {} 36 | 37 | void GenerateChatCompletionsAction::on_tick() { 38 | std::vector chat_messages; 39 | getInput("messages", chat_messages); 40 | std::vector chat_req_tools; 41 | getInput("tools", chat_req_tools); 42 | std::string tool_choice; 43 | getInput("tool_choice", tool_choice); 44 | 45 | goal_.messages = chat_messages; 46 | goal_.tools = chat_req_tools; 47 | 48 | if (tool_choice == "required") { 49 | goal_.tool_choice = llama_msgs::msg::ChatTool::TOOL_CHOICE_REQUIRED; 50 | } else if (tool_choice == "none") { 51 | goal_.tool_choice = llama_msgs::msg::ChatTool::TOOL_CHOICE_NONE; 52 | } else { 53 | goal_.tool_choice = llama_msgs::msg::ChatTool::TOOL_CHOICE_AUTO; 54 | } 55 | 56 | goal_.add_generation_prompt = true; 57 | goal_.use_jinja = true; 58 | goal_.parallel_tool_calls = chat_req_tools.size() > 1; 59 | goal_.stream = false; 60 | } 61 | 62 | BT::NodeStatus GenerateChatCompletionsAction::on_success() { 63 | setOutput("choice_message", result_.result->choices[0].message); 64 | return BT::NodeStatus::SUCCESS; 65 | } 66 | 67 | } // namespace llama_bt 68 | 69 | #if defined(BTV3) 70 | #include "behaviortree_cpp_v3/bt_factory.h" 71 | #else 72 | #include "behaviortree_cpp/bt_factory.h" 73 | #endif 74 | 75 | BT_REGISTER_NODES(factory) { 76 | BT::NodeBuilder builder = [](const std::string &name, 77 | const BT::NodeConfiguration &config) { 78 | return std::make_unique( 79 | name, "generate_chat_completions", config); 80 | }; 81 | 82 | factory.registerBuilder( 83 | "GenerateChatCompletions", builder); 84 | } -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_multi_image_user_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import time 28 | import rclpy 29 | from langchain_core.messages import SystemMessage 30 | from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate 31 | from langchain_core.output_parsers import StrOutputParser 32 | from llama_ros.langchain import ChatLlamaROS 33 | 34 | 35 | def main(): 36 | tokens = 0 37 | initial_time = -1 38 | eval_time = -1 39 | 40 | rclpy.init() 41 | chat = ChatLlamaROS(temp=0.0) 42 | 43 | prompt = ChatPromptTemplate.from_messages( 44 | [ 45 | SystemMessage("You are an IA that answer questions."), 46 | HumanMessagePromptTemplate.from_template( 47 | template=[ 48 | { 49 | "type": "text", 50 | "text": ( 51 | "<__media__><__media__>\n" 52 | "Who is the character in the middle of this first image and what type of food is the girl holding in this second image?" 53 | ), 54 | }, 55 | { 56 | "type": "image_url", 57 | "image_url": "https://pics.filmaffinity.com/Dragon_Ball_Bola_de_Dragaon_Serie_de_TV-973171538-large.jpg", 58 | }, 59 | { 60 | "type": "image_url", 61 | "image_url": "https://i.pinimg.com/474x/32/89/17/328917cc4fe3bd4cfbe2d32aa9cc6e98.jpg", 62 | }, 63 | ] 64 | ), 65 | ] 66 | ) 67 | 68 | chain = prompt | chat | StrOutputParser() 69 | 70 | initial_time = time.time() 71 | for text in chain.stream({}): 72 | tokens += 1 73 | print(text, end="", flush=True) 74 | if eval_time < 0: 75 | eval_time = time.time() 76 | 77 | print("", end="\n", flush=True) 78 | 79 | end_time = time.time() 80 | print(f"Time to eval: {eval_time - initial_time} s") 81 | print(f"Prediction speed: {tokens / (end_time - eval_time)} t/s") 82 | 83 | rclpy.shutdown() 84 | 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_streaming_tools_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import time 28 | import rclpy 29 | from random import randint 30 | from langchain.tools import tool 31 | from langchain_core.messages import HumanMessage 32 | from llama_ros.langchain import ChatLlamaROS 33 | import asyncio 34 | 35 | 36 | @tool 37 | def get_inhabitants(city: str) -> int: 38 | """Get the current temperature of a city""" 39 | return randint(4_000_000, 8_000_000) 40 | 41 | 42 | @tool 43 | def get_curr_temperature(city: str) -> int: 44 | """Get the current temperature of a city""" 45 | return randint(20, 30) 46 | 47 | 48 | async def main(): 49 | rclpy.init() 50 | chat = ChatLlamaROS(temp=0.0, penalty_repeat=1.3, penalty_freq=1.3) 51 | 52 | messages = [ 53 | HumanMessage("What is the current temperature in Madrid? And its inhabitants?") 54 | ] 55 | 56 | print(f"\nPrompt: {messages[0].content}") 57 | llm_tools = chat.bind_tools( 58 | [get_inhabitants, get_curr_temperature], tool_choice="any" 59 | ) 60 | 61 | initial_time = time.time() 62 | eval_time = -1 63 | 64 | first = True 65 | async for chunk in llm_tools.astream(messages): 66 | if first: 67 | gathered = chunk 68 | first = False 69 | eval_time = time.time() 70 | else: 71 | gathered = gathered + chunk 72 | 73 | if ( 74 | chunk.tool_call_chunks 75 | and chunk.tool_call_chunks[-1]["args"] 76 | and "}" in chunk.tool_call_chunks[-1]["args"] 77 | ): 78 | print( 79 | f"Tool received: {gathered.tool_calls[-1]['name']}({gathered.tool_calls[-1]['args']})" 80 | ) 81 | 82 | output_tokens = chunk.usage_metadata.get("output_tokens", 0) 83 | 84 | end_time = time.time() 85 | total_eval_time = end_time - eval_time 86 | total_time = end_time - initial_time 87 | predition_time = total_time - total_eval_time 88 | 89 | print(f"\nTime to eval: {total_eval_time:.2f} s") 90 | print(f"Time to predict: {predition_time:.2f} s") 91 | print(f"Prediction speed: {output_tokens / predition_time:.2f} t/s") 92 | 93 | rclpy.shutdown() 94 | 95 | 96 | if __name__ == "__main__": 97 | asyncio.run(main()) 98 | -------------------------------------------------------------------------------- /llama_bt/include/llama_bt/action/generate_chat_completions_action.hpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2025 Alejandro González Cantón 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LLAMA_BT__ACTION__GENERATE_CHAT_COMPLETIONS_ACTION_HPP_ 24 | #define LLAMA_BT__ACTION__GENERATE_CHAT_COMPLETIONS_ACTION_HPP_ 25 | 26 | #include 27 | #include 28 | 29 | #if defined(BTV3) 30 | #include "llama_bt/action/bt_action_node_v3.hpp" 31 | #else 32 | #include "llama_bt/action/bt_action_node.hpp" 33 | #endif 34 | 35 | #include "llama_bt/action/bt_types.hpp" 36 | #include "llama_msgs/action/generate_chat_completions.hpp" 37 | #include "llama_msgs/msg/chat_message.hpp" 38 | #include "llama_msgs/msg/chat_req_tool.hpp" 39 | 40 | namespace llama_bt { 41 | 42 | /** 43 | * @brief A llama_bt::BtActionNode class that wraps 44 | * llama_msgs::action::GenerateChatCompletions 45 | */ 46 | class GenerateChatCompletionsAction 47 | : public llama_bt::BtActionNode< 48 | llama_msgs::action::GenerateChatCompletions> { 49 | public: 50 | /** 51 | * @brief A constructor for llama_bt::GenerateChatCompletions Service 52 | * @param xml_tag_name Name for the XML tag for this node 53 | * @param action_name Action name this node creates a client for 54 | * @param conf BT node configuration 55 | */ 56 | GenerateChatCompletionsAction(const std::string &xml_tag_name, 57 | const std::string &action_name, 58 | const BT::NodeConfiguration &conf); 59 | 60 | /** 61 | * @brief Function to perform some user-defined operation on tick 62 | * @return BT::NodeStatus Status of tick execution 63 | */ 64 | void on_tick() override; 65 | 66 | /** 67 | * @brief Function to perform some user-defined operation upon successful 68 | * completion of the action 69 | */ 70 | BT::NodeStatus on_success() override; 71 | 72 | /** 73 | * @brief Creates list of BT ports 74 | * @return BT::PortsList Containing node-specific ports 75 | */ 76 | static BT::PortsList providedPorts() { 77 | return providedBasicPorts({ 78 | BT::InputPort>( 79 | "messages", "Chat messages"), 80 | BT::InputPort>( 81 | "tools", "Chat request tools"), 82 | BT::InputPort("tool_choice", "auto", "Tool choice"), 83 | 84 | BT::OutputPort("choice_message", 85 | "Chat choice message"), 86 | }); 87 | } 88 | }; 89 | 90 | } // namespace llama_bt 91 | 92 | #endif // LLAMA_BT__ACTION__GENERATE_CHAT_COMPLETIONS_ACTION_HPP_ -------------------------------------------------------------------------------- /llama_ros/include/llama_utils/llama_params.hpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2024 Miguel Ángel González Santamarta 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LLAMA_UTILS__LLAMA_PARAMS_HPP 24 | #define LLAMA_UTILS__LLAMA_PARAMS_HPP 25 | 26 | #include 27 | #include 28 | 29 | #include "common.h" 30 | 31 | #include "llama_msgs/msg/sampling_config.hpp" 32 | #include "llava_ros/llava.hpp" 33 | 34 | namespace llama_utils { 35 | 36 | /** 37 | * @brief Represents the parameters for configuring the Llama system. 38 | */ 39 | struct LlamaParams { 40 | /** 41 | * @brief The system prompt used for initializing the Llama system. 42 | */ 43 | std::string system_prompt; 44 | 45 | /** 46 | * @brief Common parameters for configuring the Llama system. 47 | */ 48 | struct common_params params; 49 | }; 50 | 51 | /** 52 | * @brief Declares the parameters for the Llama system. 53 | * 54 | * @param node The lifecycle node to which the parameters will be declared. 55 | */ 56 | void declare_llama_params( 57 | const rclcpp_lifecycle::LifecycleNode::SharedPtr &node); 58 | 59 | /** 60 | * @brief Retrieves the Llama parameters from the given lifecycle node. 61 | * 62 | * @param node The shared pointer to the lifecycle node from which parameters 63 | * will be retrieved. 64 | * @return A struct containing the Llama parameters. 65 | */ 66 | struct LlamaParams 67 | get_llama_params(const rclcpp_lifecycle::LifecycleNode::SharedPtr &node); 68 | 69 | /** 70 | * @brief Parses a scheduling priority from a string. 71 | * 72 | * @param priority The string representing the scheduling priority. 73 | * @return The parsed scheduling priority as an enum value. 74 | */ 75 | enum ggml_sched_priority parse_priority(std::string priority); 76 | 77 | /** 78 | * @brief Parses a grammar trigger type from an integer. 79 | * 80 | * @param type The integer representing the grammar trigger type. 81 | * @return The parsed grammar trigger type as an enum value. 82 | */ 83 | common_grammar_trigger_type parse_grammar_trigger_type(int type); 84 | 85 | /** 86 | * @brief Parses sampling parameters from a SamplingConfig message. 87 | * 88 | * @param sampling_config The SamplingConfig message containing sampling 89 | * configuration. 90 | * @param n_vocab The size of the vocabulary. 91 | * @return A struct containing the parsed sampling parameters. 92 | */ 93 | struct common_params_sampling 94 | parse_sampling_params(const llama_msgs::msg::SamplingConfig &sampling_config, 95 | int n_vocab); 96 | 97 | } // namespace llama_utils 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_tools_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import time 28 | import rclpy 29 | from random import randint 30 | from langchain.tools import tool 31 | from langchain_core.messages import HumanMessage, AIMessage 32 | from llama_ros.langchain import ChatLlamaROS 33 | 34 | 35 | @tool 36 | def get_inhabitants(city: str) -> int: 37 | """Get the current temperature of a city""" 38 | return randint(4_000_000, 8_000_000) 39 | 40 | 41 | @tool 42 | def get_curr_temperature(city: str) -> int: 43 | """Get the current temperature of a city""" 44 | return randint(20, 30) 45 | 46 | 47 | def main(): 48 | rclpy.init() 49 | chat = ChatLlamaROS(temp=0.0) 50 | 51 | messages = [ 52 | HumanMessage("What is the current temperature in Madrid? And its inhabitants?") 53 | ] 54 | 55 | print(f"\nPrompt: {messages[0].content}") 56 | llm_tools = chat.bind_tools( 57 | [get_inhabitants, get_curr_temperature], tool_choice="any" 58 | ) 59 | 60 | initial_time = time.time() 61 | all_tools_res: AIMessage = llm_tools.invoke(messages) 62 | tools_time = time.time() 63 | 64 | messages.append(all_tools_res) 65 | 66 | for tool in all_tools_res.tool_calls: 67 | selected_tool = { 68 | "get_inhabitants": get_inhabitants, 69 | "get_curr_temperature": get_curr_temperature, 70 | }[tool["name"]] 71 | 72 | tool_msg = selected_tool.invoke(tool) 73 | 74 | formatted_output = ( 75 | f"{tool['name']}({''.join(tool['args'].values())}) = {tool_msg.content}" 76 | ) 77 | print(f"Calling tool: {formatted_output}") 78 | 79 | tool_msg.additional_kwargs = {"args": tool["args"]} 80 | messages.append(tool_msg) 81 | 82 | res: AIMessage = llm_tools.invoke(messages) 83 | eval_time = time.time() 84 | print(f"\nResponse: {res.content}") 85 | 86 | time_generate_tools = tools_time - initial_time 87 | time_last_response = eval_time - tools_time 88 | print(f"Time to generate tools: {time_generate_tools:.2f} s") 89 | print( 90 | f"Tokens per second (tools): {all_tools_res.usage_metadata['output_tokens'] / time_generate_tools:.2f} t/s" 91 | ) 92 | 93 | print(f"Time to generate last response: {time_last_response:.2f} s") 94 | print( 95 | f"Tokens per second (last response): {res.usage_metadata['output_tokens'] / time_last_response:.2f} t/s" 96 | ) 97 | 98 | rclpy.shutdown() 99 | 100 | 101 | if __name__ == "__main__": 102 | main() 103 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_multi_image_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import sys 28 | import time 29 | import rclpy 30 | from langchain_core.messages import SystemMessage, AIMessage 31 | from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate 32 | from langchain_core.output_parsers import StrOutputParser 33 | from llama_ros.langchain import ChatLlamaROS 34 | 35 | 36 | def main(): 37 | if len(sys.argv) < 2: 38 | prompt = "What type of food is the girl holding?" 39 | else: 40 | prompt = " ".join(sys.argv[1:]) 41 | 42 | tokens = 0 43 | initial_time = -1 44 | eval_time = -1 45 | 46 | rclpy.init() 47 | chat = ChatLlamaROS(temp=0.0) 48 | 49 | prompt = ChatPromptTemplate.from_messages( 50 | [ 51 | SystemMessage("You are an IA that answer questions."), 52 | HumanMessagePromptTemplate.from_template( 53 | template=[ 54 | { 55 | "type": "text", 56 | "text": f"<__media__>Who is the character in the middle?", 57 | }, 58 | { 59 | "type": "image_url", 60 | "image_url": "https://pics.filmaffinity.com/Dragon_Ball_Bola_de_Dragaon_Serie_de_TV-973171538-large.jpg", 61 | }, 62 | ] 63 | ), 64 | AIMessage(content="The character in the middle is Goku."), 65 | HumanMessagePromptTemplate.from_template( 66 | template=[ 67 | { 68 | "type": "text", 69 | "text": f"<__media__>{prompt}", 70 | }, 71 | {"type": "image_url", "image_url": "{image_url}"}, 72 | ] 73 | ), 74 | ] 75 | ) 76 | 77 | chain = prompt | chat | StrOutputParser() 78 | 79 | initial_time = time.time() 80 | for text in chain.stream( 81 | { 82 | "image_url": "https://i.pinimg.com/474x/32/89/17/328917cc4fe3bd4cfbe2d32aa9cc6e98.jpg" 83 | } 84 | ): 85 | tokens += 1 86 | print(text, end="", flush=True) 87 | if eval_time < 0: 88 | eval_time = time.time() 89 | 90 | print("", end="\n", flush=True) 91 | 92 | end_time = time.time() 93 | print(f"Time to eval: {eval_time - initial_time} s") 94 | print(f"Prediction speed: {tokens / (end_time - eval_time)} t/s") 95 | 96 | rclpy.shutdown() 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/mtmd_audio_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Miguel Ángel González Santamarta 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | 26 | import sys 27 | import time 28 | import numpy as np 29 | import requests 30 | import tempfile 31 | 32 | import rclpy 33 | from llama_ros.llama_client_node import LlamaClientNode 34 | from llama_msgs.action import GenerateResponse 35 | from std_msgs.msg import UInt8MultiArray 36 | 37 | 38 | def download_audio_to_tempfile(url: str) -> str: 39 | """Download WAV file to a temporary file and return its path.""" 40 | print(f"Downloading audio file '{url}'") 41 | response = requests.get(url) 42 | response.raise_for_status() 43 | 44 | temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") 45 | temp_file.write(response.content) 46 | temp_file.close() 47 | return temp_file.name 48 | 49 | 50 | def read_mp3_as_uint8_array(filename: str) -> np.ndarray: 51 | """Read the binary MP3 file and return a NumPy array of uint8.""" 52 | with open(filename, "rb") as f: 53 | data = f.read() 54 | return np.frombuffer(data, dtype=np.uint8) 55 | 56 | 57 | def text_cb(feedback): 58 | global eval_time, tokens 59 | if eval_time < 0: 60 | eval_time = time.time() 61 | tokens += 1 62 | print(feedback.feedback.partial_response.text, end="", flush=True) 63 | 64 | 65 | def main(): 66 | prompt = "<__media__>What's that sound?" 67 | use_audio = True 68 | audio_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/glass-breaking-151256.mp3" 69 | 70 | if len(sys.argv) > 1: 71 | prompt = sys.argv[1] 72 | if len(sys.argv) > 2: 73 | use_audio = sys.argv[2].lower() in ["true", "1", "yes"] 74 | if len(sys.argv) > 3: 75 | use_audio = sys.argv[3] 76 | 77 | global tokens, eval_time 78 | tokens = 0 79 | eval_time = -1 80 | 81 | rclpy.init() 82 | file_path = download_audio_to_tempfile(audio_url) 83 | mp3_array = read_mp3_as_uint8_array(file_path) 84 | llama_client = LlamaClientNode.get_instance() 85 | 86 | goal = GenerateResponse.Goal() 87 | goal.prompt = prompt 88 | goal.sampling_config.temp = 0.8 89 | 90 | if use_audio and mp3_array is not None: 91 | msg = UInt8MultiArray() 92 | msg.data = mp3_array.tolist() 93 | goal.audios.append(msg) 94 | 95 | initial_time = time.time() 96 | llama_client.generate_response(goal, text_cb) 97 | end_time = time.time() 98 | 99 | print(f"\nTime to eval: {eval_time - initial_time} s") 100 | print(f"Prediction speed: {tokens / (end_time - eval_time)} t/s") 101 | rclpy.shutdown() 102 | 103 | 104 | if __name__ == "__main__": 105 | main() 106 | -------------------------------------------------------------------------------- /llama_msgs/msg/SamplingConfig.msg: -------------------------------------------------------------------------------- 1 | int32 n_prev 64 # number of previous tokens to remember 2 | int32 n_probs 1 # if greater than 0, output the probabilities of top n_probs tokens 3 | int32 min_keep 0 # 0 = disabled, otherwise samplers should return at least min_keep tokens 4 | 5 | bool ignore_eos false # ignore end of stream token and continue generating (implies --logit-bias 2-inf) 6 | LogitBiasArray logit_bias # logit bias for specific tokens 7 | LogitBiasArray logit_bias_eog # pre-calculated logit biases for EOG tokens 8 | 9 | float32 temp 0.80 # temperature 10 | float32 dynatemp_range 0.0 # 0.0 = disabled 11 | float32 dynatemp_exponent 1.0 # controls how entropy maps to temperature in dynamic temperature sampler 12 | 13 | int32 top_k 40 # top-k sampling (0.0 = disabled) 14 | float32 top_p 0.95 # top-p sampling (1.0 = disabled) 15 | float32 min_p 0.05 # min-p sampling (0.0 = disabled) 16 | float32 top_n_sigma -1.0 # top-n sampling (-1.0 = disabled) 17 | float32 xtc_probability 0.00 # xtc sampling (0.0 = disable) 18 | float32 xtc_threshold 0.10 # xtc sampling threshold (> 0.5 disables XTC) 19 | float32 typical_p 1.00 # locally typical sampling, parameter p (1.0 = disabled) 20 | 21 | int32 penalty_last_n 64 # last n tokens consider for penalize (0 = disable penalty, -1 = context size) 22 | float32 penalty_repeat 1.00 # penalize repeat sequence of tokens (1.0 = disabled) 23 | float32 penalty_freq 0.00 # repeat alpha frequency penalty (0.0 = disable) 24 | float32 penalty_present 0.00 # repeat alpha presence penalty (0.0 = disabled) 25 | 26 | float32 dry_multiplier 0.0 # DRY repetition penalty for tokens extending repetition (0.0 = disabled) 27 | float32 dry_base 1.75 # multiplier * base ^ (length of sequence before token - allowed length) (0.0 = disabled) 28 | int32 dry_allowed_length 2 # tokens extending repetitions beyond this receive penalty 29 | int32 dry_penalty_last_n -1 # how many tokens to scan for repetitions (0 = disable penalty, -1 = context size) 30 | string[] dry_sequence_breakers ["\\n", ":", "\\\"", "*"] # default sequence breakers for DRY 31 | 32 | int32 mirostat 0 # Mirostart sampling (0 = disabled, 1 = mirostat, 2 = mirostat 2.0) 33 | float32 mirostat_eta 0.10 # Mirostat learning rate, parameter eta 34 | float32 mirostat_tau 5.0 # Mirostat target entropy, parameter tau 35 | 36 | string samplers_sequence "edskypmxt" # PENALTIES, DRY, TOP_N_SIGMA, TOP_K, TYPICAL_P, TOP_P, MIN_P, XTC, TEMP 37 | 38 | string grammar "" # optional BNF-like grammar to constrain sampling 39 | string grammar_schema "" # grammar schema that defines a JSON BNF grammar 40 | bool grammar_lazy # whether to use lazy grammar 41 | GrammarTrigger[] grammar_triggers # triggers for lazy grammar 42 | int32[] preserved_tokens # tokens to preserve 43 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/llama_rag_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Miguel Ángel González Santamarta 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | 26 | import bs4 27 | import rclpy 28 | 29 | from langchain_chroma import Chroma 30 | from langchain_community.document_loaders import WebBaseLoader 31 | from langchain_core.output_parsers import StrOutputParser 32 | from langchain_core.runnables import RunnablePassthrough 33 | from langchain_core.messages import SystemMessage 34 | from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate 35 | from langchain_text_splitters import RecursiveCharacterTextSplitter 36 | from langchain.retrievers import ContextualCompressionRetriever 37 | 38 | from llama_ros.langchain import ChatLlamaROS, LlamaROSEmbeddings, LlamaROSReranker 39 | 40 | 41 | def main(): 42 | rclpy.init() 43 | 44 | # load, chunk and index the contents of the blog 45 | loader = WebBaseLoader( 46 | web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), 47 | bs_kwargs=dict( 48 | parse_only=bs4.SoupStrainer( 49 | class_=("post-content", "post-title", "post-header") 50 | ) 51 | ), 52 | ) 53 | docs = loader.load() 54 | 55 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 56 | splits = text_splitter.split_documents(docs) 57 | vectorstore = Chroma.from_documents(documents=splits, embedding=LlamaROSEmbeddings()) 58 | 59 | # retrieve and generate using the relevant snippets of the blog 60 | retriever = vectorstore.as_retriever(search_kwargs={"k": 20}) 61 | 62 | # create prompt 63 | prompt = ChatPromptTemplate.from_messages( 64 | [ 65 | SystemMessage("You are an AI assistant that answer questions briefly."), 66 | HumanMessagePromptTemplate.from_template( 67 | "Taking into account the following information:{context}\n\n{question}" 68 | ), 69 | ] 70 | ) 71 | 72 | # create rerank compression retriever 73 | compressor = LlamaROSReranker(top_n=3) 74 | compression_retriever = ContextualCompressionRetriever( 75 | base_compressor=compressor, base_retriever=retriever 76 | ) 77 | 78 | def format_docs(docs): 79 | formated_docs = "" 80 | 81 | for d in docs: 82 | formated_docs += f"\n\n\t- {d.page_content}" 83 | 84 | return formated_docs 85 | 86 | # create and use the chain 87 | rag_chain = ( 88 | { 89 | "context": compression_retriever | format_docs, 90 | "question": RunnablePassthrough(), 91 | } 92 | | prompt 93 | | ChatLlamaROS(temp=0.0) 94 | | StrOutputParser() 95 | ) 96 | 97 | for c in rag_chain.stream("What is Task Decomposition?"): 98 | print(c, flush=True, end="") 99 | 100 | rclpy.shutdown() 101 | 102 | 103 | if __name__ == "__main__": 104 | main() 105 | -------------------------------------------------------------------------------- /llama_demos/llama_demos/chatllama_pddl_demo_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # MIT License 4 | # 5 | # Copyright (c) 2024 Alejandro González Cantón 6 | # Copyright (c) 2024 Miguel Ángel González Santamarta 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | 27 | import time 28 | import rclpy 29 | from langchain_core.messages import SystemMessage 30 | from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate 31 | from langchain_core.output_parsers import StrOutputParser 32 | from llama_ros.langchain import ChatLlamaROS 33 | 34 | 35 | def main(): 36 | 37 | tokens = 0 38 | initial_time = -1 39 | eval_time = -1 40 | 41 | rclpy.init() 42 | chat = ChatLlamaROS( 43 | temp=0.2, 44 | penalty_repeat=1.15, 45 | enable_thinking=True, 46 | stream_reasoning=True, 47 | ) 48 | 49 | domain = """(define (domain exercise0) 50 | (:requirements :strips :typing :negative-preconditions :disjunctive-preconditions :equality) 51 | 52 | (:types robot location object) 53 | 54 | (:predicates 55 | (at-robot ?r - robot ?loc - location) 56 | (at-object ?o - object ?loc - location) 57 | (holding ?r - robot ?o - object) 58 | ) 59 | 60 | (:action move 61 | :parameters (?r - robot ?from - location ?to - location) 62 | :precondition (and (at-robot ?r ?from) (not (= ?from ?to))) 63 | :effect (and (not (at-robot ?r ?from)) (at-robot ?r ?to)) 64 | ) 65 | 66 | (:action pick_up 67 | :parameters (?r - robot ?o - object ?loc - location) 68 | :precondition (and (at-robot ?r ?loc) (at-object ?o ?loc)) 69 | :effect (and (not (at-object ?o ?loc)) (holding ?r ?o)) 70 | ) 71 | 72 | (:action put_down 73 | :parameters (?r - robot ?o - object ?loc - location) 74 | :precondition (and (holding ?r ?o) (at-robot ?r ?loc)) 75 | :effect (and (not (holding ?r ?o)) (at-object ?o ?loc)) 76 | ) 77 | )""" 78 | 79 | problem = """(define (problem exercise0-problem-robot) 80 | (:domain exercise0) 81 | 82 | (:objects 83 | robot1 robot2 - robot 84 | loc1 loc2 loc3 - location 85 | box1 box2 - object 86 | ) 87 | 88 | (:init 89 | (at-robot robot1 loc1) 90 | (at-robot robot2 loc2) 91 | (at-object box1 loc1) 92 | (at-object box2 loc3) 93 | ) 94 | 95 | (:goal 96 | (and 97 | (at-robot robot1 loc2) 98 | (at-object box1 loc2) 99 | (at-object box2 loc1) 100 | ) 101 | ) 102 | )""" 103 | 104 | prompt = ChatPromptTemplate.from_messages( 105 | [ 106 | SystemMessage( 107 | "You are an IA PDDL planner that process PDDL domain and problem texts and generates plans for the goals of the problem." 108 | ), 109 | HumanMessagePromptTemplate.from_template( 110 | template=[ 111 | { 112 | "type": "text", 113 | "text": ( 114 | f"{domain}\n" 115 | f"{problem}\n" 116 | "Generate a plan to for the goals of the problem." 117 | ), 118 | }, 119 | ] 120 | ), 121 | ] 122 | ) 123 | 124 | chain = prompt | chat | StrOutputParser() 125 | 126 | initial_time = time.time() 127 | for text in chain.stream({}): 128 | tokens += 1 129 | print(text, end="", flush=True) 130 | if eval_time < 0: 131 | eval_time = time.time() 132 | 133 | print("", end="\n", flush=True) 134 | 135 | end_time = time.time() 136 | print(f"Time to eval: {eval_time - initial_time} s") 137 | print(f"Prediction speed: {tokens / (end_time - eval_time)} t/s") 138 | 139 | rclpy.shutdown() 140 | 141 | 142 | if __name__ == "__main__": 143 | main() 144 | -------------------------------------------------------------------------------- /llama_ros/src/llama_utils/logs.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2024 Miguel Ángel González Santamarta 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU General Public License 14 | // along with this program. If not, see . 15 | 16 | #include "llama_utils/logs.hpp" 17 | 18 | namespace llama_utils { 19 | 20 | /** 21 | * @brief Default error logging function. 22 | * 23 | * This function logs an error message to stderr with the format: 24 | * [ERROR] [file:function:line] message. 25 | * 26 | * @param file The name of the source file where the log function is called. 27 | * @param function The name of the function where the log function is called. 28 | * @param line The line number in the source file where the log function is 29 | * called. 30 | * @param text The format string for the log message. 31 | * @param ... Additional arguments for the format string. 32 | */ 33 | void default_log_error(const char *file, const char *function, int line, 34 | const char *text, ...) { 35 | va_list args; 36 | va_start(args, text); 37 | fprintf(stderr, "[ERROR] [%s:%s:%d] ", file, function, line); 38 | vfprintf(stderr, text, args); 39 | fprintf(stderr, "\n"); 40 | va_end(args); 41 | } 42 | 43 | /** 44 | * @brief Default warning logging function. 45 | * 46 | * This function logs a warning message to stderr with the format: 47 | * [WARN] [file:function:line] message. 48 | * 49 | * @param file The name of the source file where the log function is called. 50 | * @param function The name of the function where the log function is called. 51 | * @param line The line number in the source file where the log function is 52 | * called. 53 | * @param text The format string for the log message. 54 | * @param ... Additional arguments for the format string. 55 | */ 56 | void default_log_warn(const char *file, const char *function, int line, 57 | const char *text, ...) { 58 | va_list args; 59 | va_start(args, text); 60 | fprintf(stderr, "[WARN] [%s:%s:%d] ", file, function, line); 61 | vfprintf(stderr, text, args); 62 | fprintf(stderr, "\n"); 63 | va_end(args); 64 | } 65 | 66 | /** 67 | * @brief Default info logging function. 68 | * 69 | * This function logs an informational message to stderr with the format: 70 | * [INFO] [file:function:line] message. 71 | * 72 | * @param file The name of the source file where the log function is called. 73 | * @param function The name of the function where the log function is called. 74 | * @param line The line number in the source file where the log function is 75 | * called. 76 | * @param text The format string for the log message. 77 | * @param ... Additional arguments for the format string. 78 | */ 79 | void default_log_info(const char *file, const char *function, int line, 80 | const char *text, ...) { 81 | va_list args; 82 | va_start(args, text); 83 | fprintf(stderr, "[INFO] [%s:%s:%d] ", file, function, line); 84 | vfprintf(stderr, text, args); 85 | fprintf(stderr, "\n"); 86 | va_end(args); 87 | } 88 | 89 | /** 90 | * @brief Default debug logging function. 91 | * 92 | * This function logs a debug message to stderr with the format: 93 | * [DEBUG] [file:function:line] message. 94 | * 95 | * @param file The name of the source file where the log function is called. 96 | * @param function The name of the function where the log function is called. 97 | * @param line The line number in the source file where the log function is 98 | * called. 99 | * @param text The format string for the log message. 100 | * @param ... Additional arguments for the format string. 101 | */ 102 | void default_log_debug(const char *file, const char *function, int line, 103 | const char *text, ...) { 104 | va_list args; 105 | va_start(args, text); 106 | fprintf(stderr, "[DEBUG] [%s:%s:%d] ", file, function, line); 107 | vfprintf(stderr, text, args); 108 | fprintf(stderr, "\n"); 109 | va_end(args); 110 | } 111 | 112 | // Initialize the function pointers with default log functions 113 | LogFunction log_error = default_log_error; 114 | LogFunction log_warn = default_log_warn; 115 | LogFunction log_info = default_log_info; 116 | LogFunction log_debug = default_log_debug; 117 | 118 | // Initialize the log level to INFO 119 | LogLevel log_level = INFO; 120 | 121 | void set_log_level(LogLevel log_level) { log_level = log_level; } 122 | 123 | } // namespace llama_utils -------------------------------------------------------------------------------- /llama_ros/include/llava_ros/llava.hpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2024 Miguel Ángel González Santamarta 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LLAVA_ROS__LLAVA_HPP 24 | #define LLAVA_ROS__LLAVA_HPP 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "common.h" 33 | #include "mtmd-helper.h" 34 | #include "mtmd.h" 35 | 36 | #include "llama_ros/llama.hpp" 37 | 38 | namespace llava_ros { 39 | 40 | /** 41 | * @brief Represents the Llava model, extending the Llama model with image 42 | * processing capabilities. 43 | * 44 | * This class provides additional functionality for handling images and 45 | * generating embeddings. 46 | */ 47 | class Llava : public llama_ros::Llama { 48 | 49 | public: 50 | /** 51 | * @brief Constructs a new Llava instance. 52 | * 53 | * @param params Common parameters for the llama.cpp. 54 | * @param system_prompt The system prompt to initialize the model's context. 55 | */ 56 | Llava(const struct common_params ¶ms, std::string system_prompt = ""); 57 | 58 | /** 59 | * @brief Destroys the Llava instance. 60 | * 61 | * Cleans up resources associated with the Llava model. 62 | */ 63 | ~Llava(); 64 | 65 | /** 66 | * @brief Resets the internal state of the Llava model. 67 | * 68 | * This method overrides the reset functionality of the base Llama class. 69 | */ 70 | void reset() override; 71 | 72 | /** 73 | * @brief Loads an mtmd into the Llava model. 74 | * 75 | * @param std::vector buf The mtmd data as a byte buffer. 76 | * @return True if the mtmd is successfully loaded, false otherwise. 77 | */ 78 | bool load_mtmd(std::vector buf); 79 | 80 | /** 81 | * @brief Loads an mtmd into the Llava model. 82 | * 83 | * @param std::vector> mtmd The mtmds data as a vector 84 | * of 85 | * @return True if the image is successfully loaded, false otherwise. 86 | */ 87 | bool load_mtmds(std::vector> mtmds); 88 | 89 | /** 90 | * @brief Clears all loaded mtmds from the Llava model. 91 | */ 92 | void clear_mtmds(); 93 | 94 | protected: 95 | /** 96 | * @brief Loads a prompt into the Llava model. 97 | * 98 | * This method overrides the base Llama class to load a prompt into the Llava 99 | * model, with optional prefix and suffix handling. 100 | * 101 | * @param input_prompt The input text prompt to load. 102 | * @param add_pfx Whether to add a prefix to the prompt. 103 | * @param add_sfx Whether to add a suffix to the prompt. 104 | */ 105 | void load_prompt(const std::string &input_prompt, bool add_pfx, 106 | bool add_sfx) override; 107 | 108 | /** 109 | * @brief Evaluates a specific mtmd chunk in the Llava model. 110 | * 111 | * This method processes the provided mtmd chunk and integrates it into the 112 | * model's context. 113 | * 114 | * @param image_chunk The mtmd chunk to evaluate. 115 | * @return True if the mtmd chunk evaluation is successful, false 116 | * otherwise. 117 | */ 118 | bool eval_mtmd_chunk(const mtmd_input_chunk *image_chunk); 119 | 120 | /** 121 | * @brief Evaluates the input prompt in the Llava model. 122 | * 123 | * This method overrides the base Llama class to evaluate the input prompt, 124 | * including image-related context. 125 | * 126 | * @return True if the prompt evaluation is successful, false otherwise. 127 | */ 128 | bool eval_prompt() override; 129 | 130 | /** 131 | * @brief Pointer to the multimodal context used for image processing. 132 | * 133 | * This context is used for managing the state and operations of the 134 | * multimodal. 135 | */ 136 | struct mtmd_context *mtmd_ctx; 137 | 138 | private: 139 | /** 140 | * @brief Bitmaps for image processing. 141 | * 142 | * This structure holds the bitmap data for images used in the model. 143 | */ 144 | mtmd::bitmaps bitmaps; 145 | 146 | mtmd::input_chunks chunks; 147 | }; 148 | 149 | } // namespace llava_ros 150 | 151 | #endif 152 | -------------------------------------------------------------------------------- /llama_ros/src/llava_ros/llava_node.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2024 Miguel Ángel González Santamarta 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in 13 | // all copies or substantial portions of the Software. 14 | 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #if defined(CV_BRIDGE_H) 24 | #include 25 | #else 26 | #include 27 | #endif 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include "llama_utils/llama_params.hpp" 35 | #include "llava_ros/llava_node.hpp" 36 | 37 | using namespace llava_ros; 38 | 39 | LlavaNode::LlavaNode() : llama_ros::LlamaNode() {} 40 | 41 | void LlavaNode::create_llama() { 42 | this->llama = 43 | std::make_unique(this->params.params, this->params.system_prompt); 44 | } 45 | 46 | bool LlavaNode::goal_empty(std::shared_ptr goal) { 47 | return goal->prompt.size() == 0 && goal->images.size() == 0; 48 | } 49 | 50 | void LlavaNode::execute( 51 | const std::shared_ptr goal_handle) { 52 | 53 | auto result = std::make_shared(); 54 | auto images_msg = goal_handle->get_goal()->images; 55 | auto audios_msgs = goal_handle->get_goal()->audios; 56 | 57 | // Clear mtmds 58 | static_cast(this->llama.get())->clear_mtmds(); 59 | 60 | // load images 61 | if (!this->load_images(images_msg)) { 62 | this->goal_handle_->abort(result); 63 | } 64 | 65 | // load audios 66 | if (!this->load_audios(audios_msgs)) { 67 | this->goal_handle_->abort(result); 68 | } 69 | 70 | // llama_node execute 71 | llama_ros::LlamaNode::execute(goal_handle); 72 | } 73 | 74 | /* 75 | ************************ 76 | * CHAT COMPLETIONS * 77 | ************************ 78 | */ 79 | bool LlavaNode::goal_empty_chat_completions( 80 | std::shared_ptr goal) { 81 | return goal->messages.size() == 0 && goal->images.size() == 0; 82 | } 83 | 84 | void LlavaNode::execute_chat_completions( 85 | const std::shared_ptr goal_handle) { 86 | 87 | auto result = std::make_shared(); 88 | auto images_msg = goal_handle->get_goal()->images; 89 | auto audios_msgs = goal_handle->get_goal()->audios; 90 | 91 | RCLCPP_INFO(this->get_logger(), "Executing chat completions"); 92 | 93 | // Clear mtmds 94 | static_cast(this->llama.get())->clear_mtmds(); 95 | 96 | // load images 97 | if (!this->load_images(images_msg)) { 98 | this->goal_handle_chat_->abort(result); 99 | } 100 | 101 | // load audios 102 | if (!this->load_audios(audios_msgs)) { 103 | this->goal_handle_chat_->abort(result); 104 | } 105 | 106 | // llama_node execute_chat_completions 107 | llama_ros::LlamaNode::execute_chat_completions(goal_handle); 108 | } 109 | 110 | bool LlavaNode::load_images(std::vector images_msg) { 111 | 112 | std::vector> images; 113 | 114 | for (const auto &image_msg : images_msg) { 115 | if (image_msg.data.size() > 0) { 116 | 117 | RCLCPP_INFO(this->get_logger(), "Loading image..."); 118 | 119 | cv_bridge::CvImagePtr cv_ptr = 120 | cv_bridge::toCvCopy(image_msg, image_msg.encoding); 121 | 122 | std::vector buf; 123 | cv::imencode(".jpg", cv_ptr->image, buf); 124 | images.push_back(buf); 125 | } 126 | } 127 | 128 | if (!static_cast(this->llama.get())->load_mtmds(images)) { 129 | RCLCPP_ERROR(this->get_logger(), "Failed to load images"); 130 | return false; 131 | } 132 | 133 | RCLCPP_INFO(this->get_logger(), "Images loaded"); 134 | return true; 135 | } 136 | 137 | bool LlavaNode::load_audios( 138 | std::vector audios_msgs) { 139 | 140 | std::vector> audios; 141 | 142 | for (const auto &audio_msg : audios_msgs) { 143 | if (audio_msg.data.size() > 0) { 144 | RCLCPP_INFO(this->get_logger(), "Loading audio..."); 145 | std::vector buf; 146 | audios.push_back(audio_msg.data); 147 | } 148 | } 149 | 150 | if (!static_cast(this->llama.get())->load_mtmds(audios)) { 151 | RCLCPP_ERROR(this->get_logger(), "Failed to load audios"); 152 | return false; 153 | } 154 | 155 | RCLCPP_INFO(this->get_logger(), "Audios loaded"); 156 | return true; 157 | } -------------------------------------------------------------------------------- /llama_bringup/llama_bringup/utils.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2023 Miguel Ángel González Santamarta 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | 24 | import os 25 | import yaml 26 | from typing import Tuple 27 | from ament_index_python.packages import get_package_share_directory 28 | from launch.actions import IncludeLaunchDescription 29 | from launch.launch_description_sources import PythonLaunchDescriptionSource 30 | 31 | 32 | def load_prompt_type(prompt_file_name: str) -> Tuple: 33 | file_path = os.path.join( 34 | get_package_share_directory("llama_bringup"), 35 | "prompts", 36 | f"{prompt_file_name}.yaml", 37 | ) 38 | with open(file_path, "r") as file: 39 | yaml_data = yaml.safe_load(file) 40 | return ( 41 | yaml_data["prefix"], 42 | yaml_data["suffix"], 43 | yaml_data["stopping_words"], 44 | yaml_data["system_prompt"], 45 | ) 46 | 47 | 48 | def create_llama_launch_from_yaml(file_path: str) -> IncludeLaunchDescription: 49 | with open(file_path, "r") as file: 50 | config = yaml.safe_load(file) 51 | return create_llama_launch(**config) 52 | 53 | 54 | def create_llama_launch(**kwargs) -> IncludeLaunchDescription: 55 | prompt_data = ( 56 | load_prompt_type(kwargs["system_prompt_type"]) 57 | if kwargs.get("system_prompt_type") 58 | else ("", "", [], "") 59 | ) 60 | kwargs["prefix"] = kwargs.get("prefix", prompt_data[0]) 61 | kwargs["suffix"] = kwargs.get("suffix", prompt_data[1]) 62 | kwargs["system_prompt"] = kwargs.get("system_prompt", prompt_data[3]) 63 | 64 | # stopping_words 65 | kwargs["stopping_words"] = kwargs.get("stopping_words", prompt_data[2]) 66 | if not kwargs["stopping_words"]: 67 | kwargs["stopping_words"] = [""] 68 | kwargs["chat_template_file"] = kwargs.get("chat_template_file", "") 69 | if kwargs["chat_template_file"]: 70 | chat_template_path = "" 71 | 72 | if "/" in kwargs["chat_template_file"]: 73 | chat_template_path = kwargs["chat_template_file"] 74 | else: 75 | chat_template_path = os.path.join( 76 | get_package_share_directory("llama_cpp_vendor"), 77 | "models", 78 | "templates", 79 | kwargs["chat_template_file"], 80 | ) 81 | 82 | if not os.path.exists(chat_template_path): 83 | kwargs["chat_template_file"] = "" 84 | else: 85 | kwargs["chat_template_file"] = chat_template_path 86 | 87 | # load lora adapters 88 | lora_adapters = [""] 89 | lora_adapters_repos = [""] 90 | lora_adapters_filenames = [""] 91 | lora_adapters_scales = [0.0] 92 | 93 | if "lora_adapters" in kwargs: 94 | for i in range(len(kwargs["lora_adapters"])): 95 | if not lora_adapters[0]: 96 | lora_adapters.clear() 97 | lora_adapters_scales.clear() 98 | 99 | lora = kwargs["lora_adapters"][i] 100 | 101 | if "repo" in lora and "filename" in lora: 102 | if not lora_adapters_repos[0]: 103 | lora_adapters_repos.clear() 104 | lora_adapters_filenames.clear() 105 | 106 | lora_adapters_repos.append(lora["repo"]) 107 | lora_adapters_filenames.append(lora["filename"]) 108 | lora_adapters.append("HF") 109 | 110 | elif "path" in lora: 111 | lora_adapters.append(lora["path"]) 112 | 113 | else: 114 | continue 115 | 116 | if "scale" not in lora: 117 | continue 118 | 119 | lora_adapters_scales.append(lora["scale"]) 120 | 121 | kwargs["lora_adapters"] = lora_adapters 122 | kwargs["lora_adapters_repos"] = lora_adapters_repos 123 | kwargs["lora_adapters_filenames"] = lora_adapters_filenames 124 | kwargs["lora_adapters_scales"] = lora_adapters_scales 125 | 126 | # use llava 127 | if not kwargs.get("use_llava"): 128 | kwargs["use_llava"] = False 129 | 130 | return IncludeLaunchDescription( 131 | PythonLaunchDescriptionSource( 132 | os.path.join( 133 | get_package_share_directory("llama_bringup"), "launch", "base.launch.py" 134 | ) 135 | ), 136 | launch_arguments={key: str(value) for key, value in kwargs.items()}.items(), 137 | ) 138 | --------------------------------------------------------------------------------