├── .gitignore ├── LICENSE ├── README.md ├── app ├── api.py ├── chatbot.py └── gradio_chat.py ├── assets ├── 4070ti.gif ├── README.md └── umbrella.jpeg ├── configs ├── chat_config_12gb.json ├── chat_config_16gb.json ├── chat_config_24gb.json ├── chat_config_48gb.json ├── chat_config_48gb_qwen.json ├── chat_config_ar.json ├── chat_config_mistral_8gb.json ├── chat_config_qwq.json ├── chat_config_qwq_5080.json ├── chat_config_qwq_8gb.json ├── chat_config_qwq_awq.json ├── code_config_24gb.json ├── code_config_48gb.json ├── greedy_config_12gb.json ├── greedy_config_16gb.json ├── greedy_config_24gb.json └── greedy_config_48gb.json ├── draft ├── QwQ_draft.py ├── config.json ├── train_draft.py ├── train_draft_lc.py └── zero3.yaml ├── examples ├── README.md ├── ar_generate.py ├── bench.py ├── construct_sequoia.py ├── data │ └── question.jsonl ├── generate.py ├── h2o_generate.py ├── hf_generate.py ├── spec_bench.py ├── spec_bench_math.py ├── spec_bench_python.py └── spec_generate.py ├── install.sh ├── requirements.txt ├── setup.py └── umbrella ├── __init__.py ├── api ├── __init__.py ├── api_utils.py ├── client.py └── server.py ├── attn ├── __init__.py └── cache.py ├── data └── question.jsonl ├── engine ├── __init__.py ├── ar_engine.py ├── auto_engine.py ├── base.py ├── dynamic_speculation_engine.py ├── speculation_utils.py └── static_speculation_engine.py ├── logging_config.py ├── models ├── __init__.py ├── auto_model.py ├── base.py ├── gemma.py ├── gemma_layer.py ├── llama.py ├── llama_layer.py ├── mistral.py ├── mistral_layer.py ├── model_utils.py ├── qwen.py └── qwen_layer.py ├── quantization ├── __init__.py ├── awq_utils.py └── fbgemm_utils.py ├── sequoia_utils.py ├── templates.py ├── trees ├── 3seq.json ├── 8b_sequoia_tree-5x6.json ├── 8b_sequoia_tree-5x8.json ├── 8b_sequoia_tree-6x6.json ├── 8b_sequoia_tree-6x7.json ├── sequoia_tree-3x4.json └── sequoia_tree-5x6.json └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/README.md -------------------------------------------------------------------------------- /app/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/app/api.py -------------------------------------------------------------------------------- /app/chatbot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/app/chatbot.py -------------------------------------------------------------------------------- /app/gradio_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/app/gradio_chat.py -------------------------------------------------------------------------------- /assets/4070ti.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/assets/4070ti.gif -------------------------------------------------------------------------------- /assets/README.md: -------------------------------------------------------------------------------- 1 | # Assets of UMbreLLa 2 | -------------------------------------------------------------------------------- /assets/umbrella.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/assets/umbrella.jpeg -------------------------------------------------------------------------------- /configs/chat_config_12gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_12gb.json -------------------------------------------------------------------------------- /configs/chat_config_16gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_16gb.json -------------------------------------------------------------------------------- /configs/chat_config_24gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_24gb.json -------------------------------------------------------------------------------- /configs/chat_config_48gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_48gb.json -------------------------------------------------------------------------------- /configs/chat_config_48gb_qwen.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_48gb_qwen.json -------------------------------------------------------------------------------- /configs/chat_config_ar.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_ar.json -------------------------------------------------------------------------------- /configs/chat_config_mistral_8gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_mistral_8gb.json -------------------------------------------------------------------------------- /configs/chat_config_qwq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_qwq.json -------------------------------------------------------------------------------- /configs/chat_config_qwq_5080.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_qwq_5080.json -------------------------------------------------------------------------------- /configs/chat_config_qwq_8gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_qwq_8gb.json -------------------------------------------------------------------------------- /configs/chat_config_qwq_awq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/chat_config_qwq_awq.json -------------------------------------------------------------------------------- /configs/code_config_24gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/code_config_24gb.json -------------------------------------------------------------------------------- /configs/code_config_48gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/code_config_48gb.json -------------------------------------------------------------------------------- /configs/greedy_config_12gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/greedy_config_12gb.json -------------------------------------------------------------------------------- /configs/greedy_config_16gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/greedy_config_16gb.json -------------------------------------------------------------------------------- /configs/greedy_config_24gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/greedy_config_24gb.json -------------------------------------------------------------------------------- /configs/greedy_config_48gb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/configs/greedy_config_48gb.json -------------------------------------------------------------------------------- /draft/QwQ_draft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/draft/QwQ_draft.py -------------------------------------------------------------------------------- /draft/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/draft/config.json -------------------------------------------------------------------------------- /draft/train_draft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/draft/train_draft.py -------------------------------------------------------------------------------- /draft/train_draft_lc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/draft/train_draft_lc.py -------------------------------------------------------------------------------- /draft/zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/draft/zero3.yaml -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/README.md -------------------------------------------------------------------------------- /examples/ar_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/ar_generate.py -------------------------------------------------------------------------------- /examples/bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/bench.py -------------------------------------------------------------------------------- /examples/construct_sequoia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/construct_sequoia.py -------------------------------------------------------------------------------- /examples/data/question.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/data/question.jsonl -------------------------------------------------------------------------------- /examples/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/generate.py -------------------------------------------------------------------------------- /examples/h2o_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/h2o_generate.py -------------------------------------------------------------------------------- /examples/hf_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/hf_generate.py -------------------------------------------------------------------------------- /examples/spec_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/spec_bench.py -------------------------------------------------------------------------------- /examples/spec_bench_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/spec_bench_math.py -------------------------------------------------------------------------------- /examples/spec_bench_python.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/spec_bench_python.py -------------------------------------------------------------------------------- /examples/spec_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/examples/spec_generate.py -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/install.sh -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/setup.py -------------------------------------------------------------------------------- /umbrella/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /umbrella/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /umbrella/api/api_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/api/api_utils.py -------------------------------------------------------------------------------- /umbrella/api/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/api/client.py -------------------------------------------------------------------------------- /umbrella/api/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/api/server.py -------------------------------------------------------------------------------- /umbrella/attn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /umbrella/attn/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/attn/cache.py -------------------------------------------------------------------------------- /umbrella/data/question.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/data/question.jsonl -------------------------------------------------------------------------------- /umbrella/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /umbrella/engine/ar_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/engine/ar_engine.py -------------------------------------------------------------------------------- /umbrella/engine/auto_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/engine/auto_engine.py -------------------------------------------------------------------------------- /umbrella/engine/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/engine/base.py -------------------------------------------------------------------------------- /umbrella/engine/dynamic_speculation_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/engine/dynamic_speculation_engine.py -------------------------------------------------------------------------------- /umbrella/engine/speculation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/engine/speculation_utils.py -------------------------------------------------------------------------------- /umbrella/engine/static_speculation_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/engine/static_speculation_engine.py -------------------------------------------------------------------------------- /umbrella/logging_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/logging_config.py -------------------------------------------------------------------------------- /umbrella/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/__init__.py -------------------------------------------------------------------------------- /umbrella/models/auto_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/auto_model.py -------------------------------------------------------------------------------- /umbrella/models/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/base.py -------------------------------------------------------------------------------- /umbrella/models/gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/gemma.py -------------------------------------------------------------------------------- /umbrella/models/gemma_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/gemma_layer.py -------------------------------------------------------------------------------- /umbrella/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/llama.py -------------------------------------------------------------------------------- /umbrella/models/llama_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/llama_layer.py -------------------------------------------------------------------------------- /umbrella/models/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/mistral.py -------------------------------------------------------------------------------- /umbrella/models/mistral_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/mistral_layer.py -------------------------------------------------------------------------------- /umbrella/models/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/model_utils.py -------------------------------------------------------------------------------- /umbrella/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/qwen.py -------------------------------------------------------------------------------- /umbrella/models/qwen_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/models/qwen_layer.py -------------------------------------------------------------------------------- /umbrella/quantization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /umbrella/quantization/awq_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/quantization/awq_utils.py -------------------------------------------------------------------------------- /umbrella/quantization/fbgemm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/quantization/fbgemm_utils.py -------------------------------------------------------------------------------- /umbrella/sequoia_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/sequoia_utils.py -------------------------------------------------------------------------------- /umbrella/templates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/templates.py -------------------------------------------------------------------------------- /umbrella/trees/3seq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/trees/3seq.json -------------------------------------------------------------------------------- /umbrella/trees/8b_sequoia_tree-5x6.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/trees/8b_sequoia_tree-5x6.json -------------------------------------------------------------------------------- /umbrella/trees/8b_sequoia_tree-5x8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/trees/8b_sequoia_tree-5x8.json -------------------------------------------------------------------------------- /umbrella/trees/8b_sequoia_tree-6x6.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/trees/8b_sequoia_tree-6x6.json -------------------------------------------------------------------------------- /umbrella/trees/8b_sequoia_tree-6x7.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/trees/8b_sequoia_tree-6x7.json -------------------------------------------------------------------------------- /umbrella/trees/sequoia_tree-3x4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/trees/sequoia_tree-3x4.json -------------------------------------------------------------------------------- /umbrella/trees/sequoia_tree-5x6.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/trees/sequoia_tree-5x6.json -------------------------------------------------------------------------------- /umbrella/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/UMbreLLa/HEAD/umbrella/utils.py --------------------------------------------------------------------------------