├── .github └── workflows │ └── build_650.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── overlook.cmake ├── scripts ├── gradio_demo.png ├── gradio_demo.py ├── qwen2.5_tokenizer │ ├── merges.txt │ ├── tokenizer.json │ ├── tokenizer_config.json │ └── vocab.json └── qwen2.5_tokenizer_uid.py ├── src ├── cmdline.hpp ├── main.cpp ├── main_api.cpp ├── main_prof.cpp └── runner │ ├── LLM.hpp │ ├── LLMEmbedSelector.hpp │ ├── LLMPostprocess.hpp │ ├── Tokenizer │ ├── QwenTokenizer.cpp │ ├── QwenTokenizer.hpp │ ├── Tokenizer.cpp │ ├── Tokenizer.hpp │ ├── base64.h │ ├── tiktoken.h │ └── unordered_dense.h │ ├── ax_model_runner │ ├── ax_model_runner.hpp │ ├── ax_model_runner_ax650.cpp │ └── ax_model_runner_ax650.hpp │ └── utils │ ├── ax_cmm_utils.hpp │ ├── bfloat16.hpp │ ├── cqdm.cpp │ ├── cqdm.h │ ├── http_utils.hpp │ ├── httplib.h │ ├── json.hpp │ ├── memory_utils.cpp │ ├── memory_utils.hpp │ ├── sample_log.h │ ├── string_utility.hpp │ └── timer.hpp ├── toolchains ├── aarch64-none-linux-gnu.toolchain.cmake └── arm-linux-gnueabihf.toolchain.cmake └── tools └── fp32_to_bf16.cpp /.github/workflows/build_650.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/.github/workflows/build_650.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/.gitmodules -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/README.md -------------------------------------------------------------------------------- /overlook.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/overlook.cmake -------------------------------------------------------------------------------- /scripts/gradio_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/scripts/gradio_demo.png -------------------------------------------------------------------------------- /scripts/gradio_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/scripts/gradio_demo.py -------------------------------------------------------------------------------- /scripts/qwen2.5_tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/scripts/qwen2.5_tokenizer/merges.txt -------------------------------------------------------------------------------- /scripts/qwen2.5_tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/scripts/qwen2.5_tokenizer/tokenizer.json -------------------------------------------------------------------------------- /scripts/qwen2.5_tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/scripts/qwen2.5_tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /scripts/qwen2.5_tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/scripts/qwen2.5_tokenizer/vocab.json -------------------------------------------------------------------------------- /scripts/qwen2.5_tokenizer_uid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/scripts/qwen2.5_tokenizer_uid.py -------------------------------------------------------------------------------- /src/cmdline.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/cmdline.hpp -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/main.cpp -------------------------------------------------------------------------------- /src/main_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/main_api.cpp -------------------------------------------------------------------------------- /src/main_prof.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/main_prof.cpp -------------------------------------------------------------------------------- /src/runner/LLM.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/LLM.hpp -------------------------------------------------------------------------------- /src/runner/LLMEmbedSelector.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/LLMEmbedSelector.hpp -------------------------------------------------------------------------------- /src/runner/LLMPostprocess.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/LLMPostprocess.hpp -------------------------------------------------------------------------------- /src/runner/Tokenizer/QwenTokenizer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/Tokenizer/QwenTokenizer.cpp -------------------------------------------------------------------------------- /src/runner/Tokenizer/QwenTokenizer.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/Tokenizer/QwenTokenizer.hpp -------------------------------------------------------------------------------- /src/runner/Tokenizer/Tokenizer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/Tokenizer/Tokenizer.cpp -------------------------------------------------------------------------------- /src/runner/Tokenizer/Tokenizer.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/Tokenizer/Tokenizer.hpp -------------------------------------------------------------------------------- /src/runner/Tokenizer/base64.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/Tokenizer/base64.h -------------------------------------------------------------------------------- /src/runner/Tokenizer/tiktoken.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/Tokenizer/tiktoken.h -------------------------------------------------------------------------------- /src/runner/Tokenizer/unordered_dense.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/Tokenizer/unordered_dense.h -------------------------------------------------------------------------------- /src/runner/ax_model_runner/ax_model_runner.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/ax_model_runner/ax_model_runner.hpp -------------------------------------------------------------------------------- /src/runner/ax_model_runner/ax_model_runner_ax650.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/ax_model_runner/ax_model_runner_ax650.cpp -------------------------------------------------------------------------------- /src/runner/ax_model_runner/ax_model_runner_ax650.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/ax_model_runner/ax_model_runner_ax650.hpp -------------------------------------------------------------------------------- /src/runner/utils/ax_cmm_utils.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/ax_cmm_utils.hpp -------------------------------------------------------------------------------- /src/runner/utils/bfloat16.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/bfloat16.hpp -------------------------------------------------------------------------------- /src/runner/utils/cqdm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/cqdm.cpp -------------------------------------------------------------------------------- /src/runner/utils/cqdm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/cqdm.h -------------------------------------------------------------------------------- /src/runner/utils/http_utils.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/http_utils.hpp -------------------------------------------------------------------------------- /src/runner/utils/httplib.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/httplib.h -------------------------------------------------------------------------------- /src/runner/utils/json.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/json.hpp -------------------------------------------------------------------------------- /src/runner/utils/memory_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/memory_utils.cpp -------------------------------------------------------------------------------- /src/runner/utils/memory_utils.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/memory_utils.hpp -------------------------------------------------------------------------------- /src/runner/utils/sample_log.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/sample_log.h -------------------------------------------------------------------------------- /src/runner/utils/string_utility.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/string_utility.hpp -------------------------------------------------------------------------------- /src/runner/utils/timer.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/src/runner/utils/timer.hpp -------------------------------------------------------------------------------- /toolchains/aarch64-none-linux-gnu.toolchain.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/toolchains/aarch64-none-linux-gnu.toolchain.cmake -------------------------------------------------------------------------------- /toolchains/arm-linux-gnueabihf.toolchain.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/toolchains/arm-linux-gnueabihf.toolchain.cmake -------------------------------------------------------------------------------- /tools/fp32_to_bf16.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AXERA-TECH/ax-llm/HEAD/tools/fp32_to_bf16.cpp --------------------------------------------------------------------------------