├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── core └── process_manager.py ├── docs ├── AUTO_LOAD.md ├── AUTO_UNLOAD.md ├── KV_CACHE.md ├── WEBUI.md └── imgs │ └── webui_sample.png ├── embedding ├── embedding_schemas.py ├── models │ └── .gitkeep └── run_process.py ├── main.py ├── models ├── .gitkeep └── store_model_files_here.txt ├── requirements.txt ├── schemas.py ├── tts └── kokoro_tts │ ├── generate.py │ ├── models │ └── .gitkeep │ └── run_process.py ├── utils ├── __init__.py ├── kv_cache_utils.py └── utils.py ├── webui ├── static │ ├── css │ │ └── style.css │ └── js │ │ └── main.js └── templates │ └── index.html ├── whisper_stt ├── uploads │ └── .gitkeep └── whisper.py └── worker ├── __init__.py ├── kv_cache ├── data │ └── .gitkeep ├── kv_cache_manager.py └── kv_cache_metadata.py ├── llm_model.py ├── llm_process.py ├── logger_config.py ├── task ├── completions_stream │ └── generation_service.py ├── debug_info │ └── debug_info.py ├── load │ └── model_loader.py └── token_count │ └── tokenizer_service.py ├── task_response.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/README.md -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/process_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/core/process_manager.py -------------------------------------------------------------------------------- /docs/AUTO_LOAD.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/docs/AUTO_LOAD.md -------------------------------------------------------------------------------- /docs/AUTO_UNLOAD.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/docs/AUTO_UNLOAD.md -------------------------------------------------------------------------------- /docs/KV_CACHE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/docs/KV_CACHE.md -------------------------------------------------------------------------------- /docs/WEBUI.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/docs/WEBUI.md -------------------------------------------------------------------------------- /docs/imgs/webui_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/docs/imgs/webui_sample.png -------------------------------------------------------------------------------- /embedding/embedding_schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/embedding/embedding_schemas.py -------------------------------------------------------------------------------- /embedding/models/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /embedding/run_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/embedding/run_process.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/main.py -------------------------------------------------------------------------------- /models/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/store_model_files_here.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/requirements.txt -------------------------------------------------------------------------------- /schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/schemas.py -------------------------------------------------------------------------------- /tts/kokoro_tts/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/tts/kokoro_tts/generate.py -------------------------------------------------------------------------------- /tts/kokoro_tts/models/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tts/kokoro_tts/run_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/tts/kokoro_tts/run_process.py -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/kv_cache_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/utils/kv_cache_utils.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/utils/utils.py -------------------------------------------------------------------------------- /webui/static/css/style.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/webui/static/css/style.css -------------------------------------------------------------------------------- /webui/static/js/main.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/webui/static/js/main.js -------------------------------------------------------------------------------- /webui/templates/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/webui/templates/index.html -------------------------------------------------------------------------------- /whisper_stt/uploads/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /whisper_stt/whisper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/whisper_stt/whisper.py -------------------------------------------------------------------------------- /worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /worker/kv_cache/data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /worker/kv_cache/kv_cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/kv_cache/kv_cache_manager.py -------------------------------------------------------------------------------- /worker/kv_cache/kv_cache_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/kv_cache/kv_cache_metadata.py -------------------------------------------------------------------------------- /worker/llm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/llm_model.py -------------------------------------------------------------------------------- /worker/llm_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/llm_process.py -------------------------------------------------------------------------------- /worker/logger_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/logger_config.py -------------------------------------------------------------------------------- /worker/task/completions_stream/generation_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/task/completions_stream/generation_service.py -------------------------------------------------------------------------------- /worker/task/debug_info/debug_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/task/debug_info/debug_info.py -------------------------------------------------------------------------------- /worker/task/load/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/task/load/model_loader.py -------------------------------------------------------------------------------- /worker/task/token_count/tokenizer_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/task/token_count/tokenizer_service.py -------------------------------------------------------------------------------- /worker/task_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/task_response.py -------------------------------------------------------------------------------- /worker/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitkaz/mlx_gguf_server/HEAD/worker/utils.py --------------------------------------------------------------------------------