├── .clang-format ├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── -bug-.yaml │ ├── -bug2-.yaml │ ├── -feature-.yaml │ └── -feature2-.yaml └── workflows │ ├── book-ci.yml │ ├── deploy.yml │ ├── docker-image.yml │ ├── install.yml │ ├── package_wheel_release.yml │ ├── package_wheel_test.yml │ └── score.yml ├── .gitignore ├── .gitmodules ├── .pylintrc ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── README_ZH.md ├── README_optimize.md ├── SECURITY.md ├── WeChatGroup.png ├── book.toml ├── csrc ├── balance_serve │ ├── CMakeLists.txt │ ├── kvc2 │ │ ├── .clang-format │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── config │ │ │ ├── model_configs.json │ │ │ └── quant_configs.json │ │ ├── export_envs_before_run.sh │ │ ├── install_deps.sh │ │ ├── mkfs.sh │ │ ├── src │ │ │ ├── CMakeLists.txt │ │ │ ├── async_store.cpp │ │ │ ├── async_store.hh │ │ │ ├── bind.cpp │ │ │ ├── cache_entry.cpp │ │ │ ├── cache_entry.hh │ │ │ ├── common.h │ │ │ ├── cuda_stream_manager.cpp │ │ │ ├── cuda_stream_manager.hh │ │ │ ├── defs.h │ │ │ ├── gpu_cache.cpp │ │ │ ├── gpu_cache.hh │ │ │ ├── hasher.hpp │ │ │ ├── io_helper.hpp │ │ │ ├── kvc2.h │ │ │ ├── kvc2_utils.py │ │ │ ├── metrics.cpp │ │ │ ├── metrics.h │ │ │ ├── model_config.h │ │ │ ├── page_aligned_memory_pool.cpp │ │ │ ├── page_aligned_memory_pool.h │ │ │ ├── prefix.cpp │ │ │ └── utils │ │ │ │ ├── all.hpp │ │ │ │ ├── arithmetic.hpp │ │ │ │ ├── easy_format.hpp │ │ │ │ ├── lock_free_queue.hpp │ │ │ │ ├── mpsc.hpp │ │ │ │ ├── mutex_extend.hpp │ │ │ │ ├── periodic_task.hpp │ │ │ │ ├── spin_lock.hpp │ │ │ │ └── timer.hpp │ │ ├── test │ │ │ ├── CMakeLists.txt │ │ │ ├── hashmap_test.cpp │ │ │ ├── kvc2_export_header_test.cpp │ │ │ ├── kvc2_export_load_test.cpp │ │ │ ├── kvc2_test_utils.cpp │ │ │ ├── kvc2test │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── append-tokens.cpp │ │ │ │ ├── check-flush-back.cpp │ │ │ │ ├── common.hpp │ │ │ │ ├── flush-back.cpp │ │ │ │ ├── lookup-alt-gpu.cpp │ │ │ │ ├── lookup-alt.cpp │ │ │ │ ├── lookup-gpu-async.cpp │ │ │ │ ├── lookup-gpu-mt-without-vcache.cpp │ │ │ │ ├── lookup-gpu-mt.cpp │ │ │ │ ├── lookup-gpu.cpp │ │ │ │ ├── lookup-mt.cpp │ │ │ │ ├── lookup-without-vcache.cpp │ │ │ │ ├── lookup.cpp │ │ │ │ └── raw_insert_read.cpp │ │ │ ├── kvcache_disk_insert_read_test.cpp │ │ │ ├── kvcache_mem_eviction_test.cpp │ │ │ ├── kvcache_mem_insert_read_test.cpp │ │ │ ├── kvcache_save_load_test.cpp │ │ │ ├── kvcache_test_utils.cpp │ │ │ ├── page_pool_test.cpp │ │ │ ├── prefix_test.cpp │ │ │ ├── pytest_load.py │ │ │ ├── pytest_mem_prefix_test.py │ │ │ ├── pytest_mem_read.py │ │ │ ├── pytest_raw_insert_and_read.py │ │ │ ├── test_align.py │ │ │ ├── test_cuda_stream.cpp │ │ │ ├── test_cuda_stream_manager.cpp │ │ │ ├── test_lock_free_queue.cpp │ │ │ ├── test_periodic_task.cpp │ │ │ ├── test_queue_perf.cpp │ │ │ ├── test_std_list.cpp │ │ │ └── xxHash_test.cpp │ │ └── unit_test.sh │ └── sched │ │ ├── CMakeLists.txt │ │ ├── bind.cpp │ │ ├── metrics.cpp │ │ ├── metrics.h │ │ ├── model_config.h │ │ ├── scheduler.cpp │ │ ├── scheduler.h │ │ └── utils │ │ ├── all.hpp │ │ ├── arithmetic.hpp │ │ ├── atomic_ptr_with_flags.hpp │ │ ├── csv.hpp │ │ ├── easy_format.hpp │ │ ├── mpsc.hpp │ │ ├── readable_number.hpp │ │ ├── statistics.hpp │ │ └── timer.hpp ├── custom_marlin │ ├── __init__.py │ ├── binding.cpp │ ├── gptq_marlin │ │ ├── gptq_marlin.cu │ │ ├── gptq_marlin.cuh │ │ ├── gptq_marlin_dtypes.cuh │ │ ├── gptq_marlin_repack.cu │ │ └── ops.h │ ├── setup.py │ ├── test_cuda_graph.py │ └── utils │ │ ├── __init__.py │ │ ├── format24.py │ │ ├── marlin_24_perms.py │ │ ├── marlin_perms.py │ │ ├── marlin_utils.py │ │ └── quant_utils.py └── ktransformers_ext │ ├── CMakeLists.txt │ ├── bench │ ├── bench_attention.py │ ├── bench_attention_torch.py │ ├── bench_linear.py │ ├── bench_linear_torch.py │ ├── bench_mlp.py │ ├── bench_mlp_torch.py │ ├── bench_moe.py │ ├── bench_moe_amx.py │ └── bench_moe_torch.py │ ├── cmake │ └── FindSIMD.cmake │ ├── cpu_backend │ ├── backend.cpp │ ├── backend.h │ ├── core_info.h │ ├── cpuinfer.h │ ├── shared_mem_buffer.cpp │ ├── shared_mem_buffer.h │ ├── task_queue.cpp │ ├── task_queue.h │ └── vendors │ │ ├── README.md │ │ ├── cuda.h │ │ ├── hip.h │ │ ├── musa.h │ │ └── vendor.h │ ├── cuda │ ├── binding.cpp │ ├── custom_gguf │ │ ├── dequant.cu │ │ └── ops.h │ ├── gptq_marlin │ │ ├── gptq_marlin.cu │ │ ├── gptq_marlin.cuh │ │ ├── gptq_marlin_dtypes.cuh │ │ └── ops.h │ ├── setup.py │ └── test_dequant.py │ ├── examples │ ├── test_attention.py │ ├── test_linear.py │ ├── test_mlp.py │ └── test_moe.py │ ├── ext_bindings.cpp │ ├── operators │ ├── amx │ │ ├── la │ │ │ ├── amx.hpp │ │ │ └── utils.hpp │ │ └── moe.hpp │ ├── kvcache │ │ ├── kvcache.h │ │ ├── kvcache_attn.cpp │ │ ├── kvcache_load_dump.cpp │ │ ├── kvcache_read_write.cpp │ │ └── kvcache_utils.cpp │ └── llamafile │ │ ├── conversion.h │ │ ├── linear.cpp │ │ ├── linear.h │ │ ├── mlp.cpp │ │ ├── mlp.h │ │ ├── moe.cpp │ │ └── moe.h │ └── vendors │ ├── cuda.h │ ├── hip.h │ ├── musa.h │ └── vendor.h ├── custom ├── .gitignore ├── Makefile └── core_info.c ├── doc ├── README.md ├── SUMMARY.md ├── assets │ ├── BigCodeBench.png │ ├── DeepSeek-on-KTransformers.png │ ├── Framework_effect.png │ ├── InfLLM_equation.jpg │ ├── InfLLM_framework.png │ ├── InjectStruction.png │ ├── KTransformers.png │ ├── KTransformers_long_context_v1.png │ ├── KTransformers_long_context_v2.png │ ├── Quest_framework.png │ ├── SnapKV_framework.png │ ├── SparQ_attention.png │ ├── amx.png │ ├── amx_avx.png │ ├── amx_intro.png │ ├── cpuinfer.png │ ├── deepseekv2_structure.png │ ├── internlm_memory.png │ ├── long_context_generate.png │ ├── long_context_prefill.png │ ├── model_structure_guild.png │ ├── multi_gpu.png │ ├── needle_128K.png │ ├── needle_1M.png │ ├── onednn_1.png │ └── website.png ├── basic │ ├── note1.md │ └── note2.md ├── en │ ├── AMX.md │ ├── DeepseekR1_V3_tutorial.md │ ├── Docker.md │ ├── FAQ.md │ ├── ROCm.md │ ├── V3-success.md │ ├── api │ │ └── server │ │ │ ├── api.md │ │ │ ├── run-tabby.png │ │ │ ├── server-arch.png │ │ │ ├── server.md │ │ │ ├── tabby.md │ │ │ ├── visit-api-tags.png │ │ │ └── website.md │ ├── balance-serve.md │ ├── benchmark.md │ ├── deepseek-v2-injection.md │ ├── fp8_kernel.md │ ├── injection_tutorial.md │ ├── install.md │ ├── llama4.md │ ├── long_context_introduction.md │ ├── long_context_tutorial.md │ ├── makefile_usage.md │ ├── multi-gpu-tutorial.md │ └── operators │ │ ├── Combined_MoE_time_per_layer.png │ │ ├── Linear_projection_time.png │ │ └── llamafile.md └── zh │ ├── DeepseekR1_V3_tutorial_zh.md │ └── api │ └── server │ ├── api.md │ ├── run-tabby.png │ ├── server-arch.png │ ├── server.md │ ├── tabby.md │ ├── visit-api-tags.png │ └── website.md ├── install.bat ├── install.sh ├── ktransformers ├── __init__.py ├── configs │ ├── config.yaml │ └── log_config.ini ├── ktransformers_ext │ ├── operators │ │ └── custom_marlin │ │ │ └── quantize │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── format_24.py │ │ │ ├── marlin_24_perms.py │ │ │ ├── marlin_perms.py │ │ │ ├── marlin_utils.py │ │ │ └── quant_utils.py │ └── triton │ │ └── fp8gemm.py ├── local_chat.py ├── local_chat_test.py ├── models │ ├── __init__.py │ ├── configuration_deepseek.py │ ├── configuration_deepseek_v3.py │ ├── configuration_llama.py │ ├── configuration_qwen2_moe.py │ ├── configuration_qwen3_moe.py │ ├── custom_cache.py │ ├── custom_modeling_deepseek_v2.py │ ├── custom_modeling_deepseek_v3.py │ ├── custom_modeling_qwen2_moe.py │ ├── custom_modeling_qwen3_moe.py │ ├── modeling_deepseek.py │ ├── modeling_deepseek_v3.py │ ├── modeling_llama.py │ ├── modeling_mixtral.py │ ├── modeling_qwen2_moe.py │ └── modeling_qwen3_moe.py ├── operators │ ├── RoPE.py │ ├── __init__.py │ ├── attention.py │ ├── balance_serve_attention.py │ ├── base_operator.py │ ├── cpuinfer.py │ ├── dynamic_attention.py │ ├── experts.py │ ├── flashinfer_batch_prefill_wrapper.py │ ├── flashinfer_wrapper.py │ ├── gate.py │ ├── layernorm.py │ ├── linear.py │ ├── mlp.py │ ├── models.py │ ├── triton_attention.py │ └── triton_attention_prefill.py ├── optimize │ ├── optimize.py │ └── optimize_rules │ │ ├── DeepSeek-V2-Chat-multi-gpu-4.yaml │ │ ├── DeepSeek-V2-Chat-multi-gpu.yaml │ │ ├── DeepSeek-V2-Chat.yaml │ │ ├── DeepSeek-V2-Lite-Chat-multi-gpu.yaml │ │ ├── DeepSeek-V2-Lite-Chat.yaml │ │ ├── DeepSeek-V3-Chat-amx.yaml │ │ ├── DeepSeek-V3-Chat-fp8-linear-ggml-experts-serve.yaml │ │ ├── DeepSeek-V3-Chat-fp8-linear-ggml-experts.yaml │ │ ├── DeepSeek-V3-Chat-multi-gpu-4.yaml │ │ ├── DeepSeek-V3-Chat-multi-gpu-8.yaml │ │ ├── DeepSeek-V3-Chat-multi-gpu-fp8-linear-ggml-experts.yaml │ │ ├── DeepSeek-V3-Chat-multi-gpu-marlin.yaml │ │ ├── DeepSeek-V3-Chat-multi-gpu.yaml │ │ ├── DeepSeek-V3-Chat-serve.yaml │ │ ├── DeepSeek-V3-Chat.yaml │ │ ├── Internlm2_5-7b-Chat-1m.yaml │ │ ├── Mixtral.yaml │ │ ├── Moonlight-16B-A3B-serve.yaml │ │ ├── Moonlight-16B-A3B.yaml │ │ ├── Qwen2-57B-A14B-Instruct-multi-gpu.yaml │ │ ├── Qwen2-57B-A14B-Instruct.yaml │ │ ├── Qwen2-serve-amx.yaml │ │ ├── Qwen2-serve.yaml │ │ ├── Qwen3Moe-serve-amx.yaml │ │ ├── Qwen3Moe-serve.yaml │ │ └── rocm │ │ └── DeepSeek-V3-Chat.yaml ├── server │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── ollama │ │ │ ├── __init__.py │ │ │ └── completions.py │ │ ├── openai │ │ │ ├── __init__.py │ │ │ ├── assistants │ │ │ │ ├── __init__.py │ │ │ │ ├── assistants.py │ │ │ │ ├── messages.py │ │ │ │ ├── runs.py │ │ │ │ └── threads.py │ │ │ ├── endpoints │ │ │ │ ├── __init__.py │ │ │ │ └── chat.py │ │ │ └── legacy │ │ │ │ ├── __init__.py │ │ │ │ └── completions.py │ │ └── web │ │ │ ├── __init__.py │ │ │ └── system.py │ ├── args.py │ ├── backend │ │ ├── __init__.py │ │ ├── args.py │ │ ├── base.py │ │ ├── context_manager.py │ │ └── interfaces │ │ │ ├── __init__.py │ │ │ ├── balance_serve.py │ │ │ ├── exllamav2.py │ │ │ ├── ktransformers.py │ │ │ └── transformers.py │ ├── balance_serve │ │ ├── inference │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── distributed │ │ │ │ ├── __init__.py │ │ │ │ ├── communication_op.py │ │ │ │ ├── cuda_wrapper.py │ │ │ │ ├── custom_all_reduce.py │ │ │ │ ├── custom_all_reduce_utils.py │ │ │ │ ├── parallel_state.py │ │ │ │ ├── pynccl.py │ │ │ │ ├── pynccl_wrapper.py │ │ │ │ └── utils.py │ │ │ ├── forward_batch.py │ │ │ ├── model_runner.py │ │ │ ├── query_manager.py │ │ │ └── sampling │ │ │ │ ├── penaltylib │ │ │ │ ├── __init__.py │ │ │ │ ├── orchestrator.py │ │ │ │ └── penalizers │ │ │ │ │ ├── frequency_penalty.py │ │ │ │ │ ├── min_new_tokens.py │ │ │ │ │ ├── presence_penalty.py │ │ │ │ │ └── repetition_penalty.py │ │ │ │ └── sampler.py │ │ ├── sched_rpc.py │ │ └── settings.py │ ├── config │ │ ├── config.py │ │ ├── log.py │ │ └── singleton.py │ ├── crud │ │ ├── __init__.py │ │ └── assistants │ │ │ ├── __init__.py │ │ │ ├── assistants.py │ │ │ ├── messages.py │ │ │ ├── runs.py │ │ │ └── threads.py │ ├── exceptions.py │ ├── main.py │ ├── models │ │ ├── __init__.py │ │ └── assistants │ │ │ ├── __init__.py │ │ │ ├── assistants.py │ │ │ ├── messages.py │ │ │ ├── run_steps.py │ │ │ ├── runs.py │ │ │ └── threads.py │ ├── requirements.txt │ ├── schemas │ │ ├── __init__.py │ │ ├── assistants │ │ │ ├── __init__.py │ │ │ ├── assistants.py │ │ │ ├── messages.py │ │ │ ├── runs.py │ │ │ ├── streaming.py │ │ │ ├── threads.py │ │ │ └── tool.py │ │ ├── base.py │ │ ├── conversation.py │ │ ├── endpoints │ │ │ └── chat.py │ │ └── legacy │ │ │ ├── __init__.py │ │ │ └── completions.py │ └── utils │ │ ├── __init__.py │ │ ├── create_interface.py │ │ ├── multi_timer.py │ │ └── sql_utils.py ├── tests │ ├── .gitignore │ ├── AIME_2024 │ │ ├── eval_api.py │ │ ├── evaluation.py │ │ └── prompts.py │ ├── dequant_gpu.py │ ├── dequant_gpu_t.py │ ├── function_call_test.py │ ├── humaneval │ │ ├── eval_api.py │ │ ├── evaluation.py │ │ └── prompts.py │ ├── mmlu_pro_test.py │ ├── mmlu_test.py │ ├── mmlu_test_multi.py │ ├── score.py │ ├── test_client.py │ ├── test_pytorch_q8.py │ ├── test_speed.py │ └── triton_fp8gemm_test.py ├── util │ ├── cuda_graph_runner.py │ ├── custom_gguf.py │ ├── custom_loader.py │ ├── modeling_rope_utils.py │ ├── textstream.py │ ├── utils.py │ └── vendors.py └── website │ ├── .browserslistrc │ ├── .eslintrc.js │ ├── .gitignore │ ├── README.md │ ├── config.d.ts │ ├── jest.config.js │ ├── package-lock.json │ ├── package.json │ ├── public │ ├── balck.ico │ ├── config.js │ ├── css │ │ └── reset.css │ ├── images │ │ ├── assistant-avatar.png │ │ ├── avatar.png │ │ ├── bgbg.png │ │ ├── logo.ico │ │ ├── logo.png │ │ ├── three.png │ │ └── user-filling.png │ └── index.html │ ├── src │ ├── App.vue │ ├── api │ │ ├── api-client.ts │ │ ├── assistant.ts │ │ ├── message.ts │ │ ├── run.ts │ │ └── thread.ts │ ├── assets │ │ ├── css │ │ │ └── mixins.styl │ │ └── iconfont │ │ │ ├── demo.css │ │ │ ├── demo_index.html │ │ │ ├── iconfont.css │ │ │ ├── iconfont.js │ │ │ ├── iconfont.json │ │ │ ├── iconfont.svg │ │ │ ├── iconfont.ttf │ │ │ ├── iconfont.woff │ │ │ └── iconfont.woff2 │ ├── components │ │ └── chat │ │ │ └── index.vue │ ├── conf │ │ └── config.ts │ ├── locals │ │ ├── en.js │ │ ├── index.js │ │ └── zh.js │ ├── main.ts │ ├── router │ │ └── index.ts │ ├── shims-vue.d.ts │ ├── store │ │ └── index.ts │ ├── utils │ │ ├── copy.ts │ │ └── types.ts │ └── views │ │ └── home.vue │ ├── tests │ └── unit │ │ └── example.spec.ts │ ├── tsconfig.json │ └── vue.config.js ├── merge_tensors └── merge_safetensor_gguf.py ├── pyproject.toml ├── requirements-local_chat.txt ├── scripts ├── generate-core-info.py ├── numa-stats.sh ├── show-cpu.sh └── show-mem.sh ├── setup.py └── third_party ├── llamafile ├── README.md ├── bench.h ├── flags.cpp ├── flags.h ├── iqk_mul_mat.inc ├── iqk_mul_mat_amd_avx2.cpp ├── iqk_mul_mat_amd_zen4.cpp ├── iqk_mul_mat_arm82.cpp ├── macros.h ├── micros.h ├── numba.h ├── sgemm.cpp ├── sgemm.h ├── tinyblas_cpu.h ├── tinyblas_cpu_mixmul.inc ├── tinyblas_cpu_mixmul_amd_avx.cpp ├── tinyblas_cpu_mixmul_amd_avx2.cpp ├── tinyblas_cpu_mixmul_amd_avx512f.cpp ├── tinyblas_cpu_mixmul_amd_avxvnni.cpp ├── tinyblas_cpu_mixmul_amd_fma.cpp ├── tinyblas_cpu_mixmul_amd_zen4.cpp ├── tinyblas_cpu_mixmul_arm80.cpp ├── tinyblas_cpu_mixmul_arm82.cpp ├── tinyblas_cpu_sgemm.inc ├── tinyblas_cpu_sgemm_amd_avx.cpp ├── tinyblas_cpu_sgemm_amd_avx2.cpp ├── tinyblas_cpu_sgemm_amd_avx512f.cpp ├── tinyblas_cpu_sgemm_amd_avxvnni.cpp ├── tinyblas_cpu_sgemm_amd_fma.cpp ├── tinyblas_cpu_sgemm_amd_zen4.cpp ├── tinyblas_cpu_sgemm_arm80.cpp ├── tinyblas_cpu_sgemm_arm82.cpp └── tinyblas_cpu_unsupported.cpp └── nlohmann ├── json.hpp └── json_fwd.hpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: LLVM 3 | ColumnLimit: 120 # 设置最大行宽为 100 4 | IndentWidth: 2 5 | --- 6 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.devcontainer/Dockerfile -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.devcontainer/devcontainer.json -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.flake8 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/-bug-.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/ISSUE_TEMPLATE/-bug-.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/-bug2-.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/ISSUE_TEMPLATE/-bug2-.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/-feature-.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/ISSUE_TEMPLATE/-feature-.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/-feature2-.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/ISSUE_TEMPLATE/-feature2-.yaml -------------------------------------------------------------------------------- /.github/workflows/book-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/workflows/book-ci.yml -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/workflows/deploy.yml -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/workflows/docker-image.yml -------------------------------------------------------------------------------- /.github/workflows/install.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/workflows/install.yml -------------------------------------------------------------------------------- /.github/workflows/package_wheel_release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/workflows/package_wheel_release.yml -------------------------------------------------------------------------------- /.github/workflows/package_wheel_test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/workflows/package_wheel_test.yml -------------------------------------------------------------------------------- /.github/workflows/score.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.github/workflows/score.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.gitmodules -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/.pylintrc -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/README.md -------------------------------------------------------------------------------- /README_ZH.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/README_ZH.md -------------------------------------------------------------------------------- /README_optimize.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/README_optimize.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/SECURITY.md -------------------------------------------------------------------------------- /WeChatGroup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/WeChatGroup.png -------------------------------------------------------------------------------- /book.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/book.toml -------------------------------------------------------------------------------- /csrc/balance_serve/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/.clang-format -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/README.md -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/config/model_configs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/config/model_configs.json -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/config/quant_configs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/config/quant_configs.json -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/export_envs_before_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/export_envs_before_run.sh -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/install_deps.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/install_deps.sh -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/mkfs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/mkfs.sh -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/async_store.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/async_store.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/async_store.hh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/async_store.hh -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/bind.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/bind.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/cache_entry.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/cache_entry.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/cache_entry.hh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/cache_entry.hh -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/common.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/cuda_stream_manager.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/cuda_stream_manager.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/cuda_stream_manager.hh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/cuda_stream_manager.hh -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/defs.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/defs.h -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/gpu_cache.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/gpu_cache.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/gpu_cache.hh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/gpu_cache.hh -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/hasher.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/hasher.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/io_helper.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/io_helper.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/kvc2.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/kvc2.h -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/kvc2_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/kvc2_utils.py -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/metrics.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/metrics.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/metrics.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/metrics.h -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/model_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/model_config.h -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/page_aligned_memory_pool.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/page_aligned_memory_pool.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/page_aligned_memory_pool.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/page_aligned_memory_pool.h -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/prefix.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/prefix.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/utils/all.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/utils/all.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/utils/arithmetic.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/utils/arithmetic.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/utils/easy_format.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/utils/easy_format.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/utils/lock_free_queue.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/utils/lock_free_queue.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/utils/mpsc.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/utils/mpsc.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/utils/mutex_extend.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/utils/mutex_extend.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/utils/periodic_task.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/utils/periodic_task.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/utils/spin_lock.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/utils/spin_lock.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/src/utils/timer.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/src/utils/timer.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/hashmap_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/hashmap_test.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2_export_header_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2_export_header_test.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2_export_load_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2_export_load_test.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2_test_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2_test_utils.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/append-tokens.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/append-tokens.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/check-flush-back.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/check-flush-back.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/common.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/common.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/flush-back.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/flush-back.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/lookup-alt-gpu.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/lookup-alt-gpu.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/lookup-alt.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/lookup-alt.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu-async.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu-async.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu-mt-without-vcache.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu-mt-without-vcache.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu-mt.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu-mt.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/lookup-mt.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/lookup-mt.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/lookup-without-vcache.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/lookup-without-vcache.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/lookup.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/lookup.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvc2test/raw_insert_read.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvc2test/raw_insert_read.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvcache_disk_insert_read_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvcache_disk_insert_read_test.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvcache_mem_eviction_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvcache_mem_eviction_test.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvcache_mem_insert_read_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvcache_mem_insert_read_test.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvcache_save_load_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/kvcache_save_load_test.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/kvcache_test_utils.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/page_pool_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/page_pool_test.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/prefix_test.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/pytest_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/pytest_load.py -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/pytest_mem_prefix_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/pytest_mem_prefix_test.py -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/pytest_mem_read.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/pytest_mem_read.py -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/pytest_raw_insert_and_read.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/pytest_raw_insert_and_read.py -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/test_align.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/test_align.py -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/test_cuda_stream.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/test_cuda_stream.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/test_cuda_stream_manager.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/test_cuda_stream_manager.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/test_lock_free_queue.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/test_lock_free_queue.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/test_periodic_task.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/test_periodic_task.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/test_queue_perf.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/test_queue_perf.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/test_std_list.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/test_std_list.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/test/xxHash_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/test/xxHash_test.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/kvc2/unit_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/kvc2/unit_test.sh -------------------------------------------------------------------------------- /csrc/balance_serve/sched/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/balance_serve/sched/bind.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/bind.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/metrics.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/metrics.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/metrics.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/metrics.h -------------------------------------------------------------------------------- /csrc/balance_serve/sched/model_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/model_config.h -------------------------------------------------------------------------------- /csrc/balance_serve/sched/scheduler.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/scheduler.cpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/scheduler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/scheduler.h -------------------------------------------------------------------------------- /csrc/balance_serve/sched/utils/all.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/utils/all.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/utils/arithmetic.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/utils/arithmetic.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/utils/atomic_ptr_with_flags.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/utils/atomic_ptr_with_flags.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/utils/csv.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/utils/csv.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/utils/easy_format.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/utils/easy_format.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/utils/mpsc.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/utils/mpsc.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/utils/readable_number.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/utils/readable_number.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/utils/statistics.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/utils/statistics.hpp -------------------------------------------------------------------------------- /csrc/balance_serve/sched/utils/timer.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/balance_serve/sched/utils/timer.hpp -------------------------------------------------------------------------------- /csrc/custom_marlin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /csrc/custom_marlin/binding.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/binding.cpp -------------------------------------------------------------------------------- /csrc/custom_marlin/gptq_marlin/gptq_marlin.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/gptq_marlin/gptq_marlin.cu -------------------------------------------------------------------------------- /csrc/custom_marlin/gptq_marlin/gptq_marlin.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/gptq_marlin/gptq_marlin.cuh -------------------------------------------------------------------------------- /csrc/custom_marlin/gptq_marlin/gptq_marlin_dtypes.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/gptq_marlin/gptq_marlin_dtypes.cuh -------------------------------------------------------------------------------- /csrc/custom_marlin/gptq_marlin/gptq_marlin_repack.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/gptq_marlin/gptq_marlin_repack.cu -------------------------------------------------------------------------------- /csrc/custom_marlin/gptq_marlin/ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/gptq_marlin/ops.h -------------------------------------------------------------------------------- /csrc/custom_marlin/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/setup.py -------------------------------------------------------------------------------- /csrc/custom_marlin/test_cuda_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/test_cuda_graph.py -------------------------------------------------------------------------------- /csrc/custom_marlin/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /csrc/custom_marlin/utils/format24.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/utils/format24.py -------------------------------------------------------------------------------- /csrc/custom_marlin/utils/marlin_24_perms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/utils/marlin_24_perms.py -------------------------------------------------------------------------------- /csrc/custom_marlin/utils/marlin_perms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/utils/marlin_perms.py -------------------------------------------------------------------------------- /csrc/custom_marlin/utils/marlin_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/utils/marlin_utils.py -------------------------------------------------------------------------------- /csrc/custom_marlin/utils/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/custom_marlin/utils/quant_utils.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/ktransformers_ext/bench/bench_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/bench/bench_attention.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/bench/bench_attention_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/bench/bench_attention_torch.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/bench/bench_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/bench/bench_linear.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/bench/bench_linear_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/bench/bench_linear_torch.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/bench/bench_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/bench/bench_mlp.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/bench/bench_mlp_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/bench/bench_mlp_torch.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/bench/bench_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/bench/bench_moe.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/bench/bench_moe_amx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/bench/bench_moe_amx.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/bench/bench_moe_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/bench/bench_moe_torch.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cmake/FindSIMD.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cmake/FindSIMD.cmake -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/backend.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/backend.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/backend.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/core_info.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/core_info.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/cpuinfer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/cpuinfer.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/shared_mem_buffer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/shared_mem_buffer.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/shared_mem_buffer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/shared_mem_buffer.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/task_queue.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/task_queue.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/task_queue.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/task_queue.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/vendors/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/vendors/README.md -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/vendors/cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/vendors/cuda.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/vendors/hip.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/vendors/hip.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/vendors/musa.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/vendors/musa.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cpu_backend/vendors/vendor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cpu_backend/vendors/vendor.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cuda/binding.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cuda/binding.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cuda/custom_gguf/dequant.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cuda/custom_gguf/dequant.cu -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cuda/custom_gguf/ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cuda/custom_gguf/ops.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cuda/gptq_marlin/gptq_marlin.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cuda/gptq_marlin/gptq_marlin.cu -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cuda/gptq_marlin/gptq_marlin.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cuda/gptq_marlin/gptq_marlin.cuh -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cuda/gptq_marlin/gptq_marlin_dtypes.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cuda/gptq_marlin/gptq_marlin_dtypes.cuh -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cuda/gptq_marlin/ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cuda/gptq_marlin/ops.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cuda/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cuda/setup.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/cuda/test_dequant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/cuda/test_dequant.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/examples/test_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/examples/test_attention.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/examples/test_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/examples/test_linear.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/examples/test_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/examples/test_mlp.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/examples/test_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/examples/test_moe.py -------------------------------------------------------------------------------- /csrc/ktransformers_ext/ext_bindings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/ext_bindings.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/amx/la/amx.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/amx/la/amx.hpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/amx/la/utils.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/amx/la/utils.hpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/amx/moe.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/amx/moe.hpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/kvcache/kvcache.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/kvcache/kvcache.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/kvcache/kvcache_attn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/kvcache/kvcache_attn.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/kvcache/kvcache_load_dump.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/kvcache/kvcache_load_dump.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/kvcache/kvcache_read_write.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/kvcache/kvcache_read_write.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/kvcache/kvcache_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/kvcache/kvcache_utils.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/llamafile/conversion.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/llamafile/conversion.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/llamafile/linear.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/llamafile/linear.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/llamafile/linear.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/llamafile/linear.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/llamafile/mlp.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/llamafile/mlp.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/llamafile/mlp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/llamafile/mlp.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/llamafile/moe.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/llamafile/moe.cpp -------------------------------------------------------------------------------- /csrc/ktransformers_ext/operators/llamafile/moe.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/operators/llamafile/moe.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/vendors/cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/vendors/cuda.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/vendors/hip.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/vendors/hip.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/vendors/musa.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/vendors/musa.h -------------------------------------------------------------------------------- /csrc/ktransformers_ext/vendors/vendor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/csrc/ktransformers_ext/vendors/vendor.h -------------------------------------------------------------------------------- /custom/.gitignore: -------------------------------------------------------------------------------- 1 | libcore_info.so 2 | -------------------------------------------------------------------------------- /custom/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/custom/Makefile -------------------------------------------------------------------------------- /custom/core_info.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/custom/core_info.c -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/README.md -------------------------------------------------------------------------------- /doc/SUMMARY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/SUMMARY.md -------------------------------------------------------------------------------- /doc/assets/BigCodeBench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/BigCodeBench.png -------------------------------------------------------------------------------- /doc/assets/DeepSeek-on-KTransformers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/DeepSeek-on-KTransformers.png -------------------------------------------------------------------------------- /doc/assets/Framework_effect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/Framework_effect.png -------------------------------------------------------------------------------- /doc/assets/InfLLM_equation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/InfLLM_equation.jpg -------------------------------------------------------------------------------- /doc/assets/InfLLM_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/InfLLM_framework.png -------------------------------------------------------------------------------- /doc/assets/InjectStruction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/InjectStruction.png -------------------------------------------------------------------------------- /doc/assets/KTransformers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/KTransformers.png -------------------------------------------------------------------------------- /doc/assets/KTransformers_long_context_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/KTransformers_long_context_v1.png -------------------------------------------------------------------------------- /doc/assets/KTransformers_long_context_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/KTransformers_long_context_v2.png -------------------------------------------------------------------------------- /doc/assets/Quest_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/Quest_framework.png -------------------------------------------------------------------------------- /doc/assets/SnapKV_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/SnapKV_framework.png -------------------------------------------------------------------------------- /doc/assets/SparQ_attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/SparQ_attention.png -------------------------------------------------------------------------------- /doc/assets/amx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/amx.png -------------------------------------------------------------------------------- /doc/assets/amx_avx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/amx_avx.png -------------------------------------------------------------------------------- /doc/assets/amx_intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/amx_intro.png -------------------------------------------------------------------------------- /doc/assets/cpuinfer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/cpuinfer.png -------------------------------------------------------------------------------- /doc/assets/deepseekv2_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/deepseekv2_structure.png -------------------------------------------------------------------------------- /doc/assets/internlm_memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/internlm_memory.png -------------------------------------------------------------------------------- /doc/assets/long_context_generate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/long_context_generate.png -------------------------------------------------------------------------------- /doc/assets/long_context_prefill.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/long_context_prefill.png -------------------------------------------------------------------------------- /doc/assets/model_structure_guild.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/model_structure_guild.png -------------------------------------------------------------------------------- /doc/assets/multi_gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/multi_gpu.png -------------------------------------------------------------------------------- /doc/assets/needle_128K.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/needle_128K.png -------------------------------------------------------------------------------- /doc/assets/needle_1M.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/needle_1M.png -------------------------------------------------------------------------------- /doc/assets/onednn_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/onednn_1.png -------------------------------------------------------------------------------- /doc/assets/website.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/assets/website.png -------------------------------------------------------------------------------- /doc/basic/note1.md: -------------------------------------------------------------------------------- 1 | # basic-first20 2 | -------------------------------------------------------------------------------- /doc/basic/note2.md: -------------------------------------------------------------------------------- 1 | # basic-data_structure 2 | -------------------------------------------------------------------------------- /doc/en/AMX.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/AMX.md -------------------------------------------------------------------------------- /doc/en/DeepseekR1_V3_tutorial.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/DeepseekR1_V3_tutorial.md -------------------------------------------------------------------------------- /doc/en/Docker.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/Docker.md -------------------------------------------------------------------------------- /doc/en/FAQ.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/FAQ.md -------------------------------------------------------------------------------- /doc/en/ROCm.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/ROCm.md -------------------------------------------------------------------------------- /doc/en/V3-success.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/V3-success.md -------------------------------------------------------------------------------- /doc/en/api/server/api.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/api/server/api.md -------------------------------------------------------------------------------- /doc/en/api/server/run-tabby.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/api/server/run-tabby.png -------------------------------------------------------------------------------- /doc/en/api/server/server-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/api/server/server-arch.png -------------------------------------------------------------------------------- /doc/en/api/server/server.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/api/server/server.md -------------------------------------------------------------------------------- /doc/en/api/server/tabby.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/api/server/tabby.md -------------------------------------------------------------------------------- /doc/en/api/server/visit-api-tags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/api/server/visit-api-tags.png -------------------------------------------------------------------------------- /doc/en/api/server/website.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/api/server/website.md -------------------------------------------------------------------------------- /doc/en/balance-serve.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/balance-serve.md -------------------------------------------------------------------------------- /doc/en/benchmark.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/benchmark.md -------------------------------------------------------------------------------- /doc/en/deepseek-v2-injection.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/deepseek-v2-injection.md -------------------------------------------------------------------------------- /doc/en/fp8_kernel.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/fp8_kernel.md -------------------------------------------------------------------------------- /doc/en/injection_tutorial.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/injection_tutorial.md -------------------------------------------------------------------------------- /doc/en/install.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/install.md -------------------------------------------------------------------------------- /doc/en/llama4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/llama4.md -------------------------------------------------------------------------------- /doc/en/long_context_introduction.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/long_context_introduction.md -------------------------------------------------------------------------------- /doc/en/long_context_tutorial.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/long_context_tutorial.md -------------------------------------------------------------------------------- /doc/en/makefile_usage.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/makefile_usage.md -------------------------------------------------------------------------------- /doc/en/multi-gpu-tutorial.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/multi-gpu-tutorial.md -------------------------------------------------------------------------------- /doc/en/operators/Combined_MoE_time_per_layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/operators/Combined_MoE_time_per_layer.png -------------------------------------------------------------------------------- /doc/en/operators/Linear_projection_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/operators/Linear_projection_time.png -------------------------------------------------------------------------------- /doc/en/operators/llamafile.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/en/operators/llamafile.md -------------------------------------------------------------------------------- /doc/zh/DeepseekR1_V3_tutorial_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/zh/DeepseekR1_V3_tutorial_zh.md -------------------------------------------------------------------------------- /doc/zh/api/server/api.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/zh/api/server/api.md -------------------------------------------------------------------------------- /doc/zh/api/server/run-tabby.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/zh/api/server/run-tabby.png -------------------------------------------------------------------------------- /doc/zh/api/server/server-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/zh/api/server/server-arch.png -------------------------------------------------------------------------------- /doc/zh/api/server/server.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/zh/api/server/server.md -------------------------------------------------------------------------------- /doc/zh/api/server/tabby.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/zh/api/server/tabby.md -------------------------------------------------------------------------------- /doc/zh/api/server/visit-api-tags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/zh/api/server/visit-api-tags.png -------------------------------------------------------------------------------- /doc/zh/api/server/website.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/doc/zh/api/server/website.md -------------------------------------------------------------------------------- /install.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/install.bat -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/install.sh -------------------------------------------------------------------------------- /ktransformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/__init__.py -------------------------------------------------------------------------------- /ktransformers/configs/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/configs/config.yaml -------------------------------------------------------------------------------- /ktransformers/configs/log_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/configs/log_config.ini -------------------------------------------------------------------------------- /ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/format_24.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/format_24.py -------------------------------------------------------------------------------- /ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_24_perms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_24_perms.py -------------------------------------------------------------------------------- /ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_perms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_perms.py -------------------------------------------------------------------------------- /ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_utils.py -------------------------------------------------------------------------------- /ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/quant_utils.py -------------------------------------------------------------------------------- /ktransformers/ktransformers_ext/triton/fp8gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/ktransformers_ext/triton/fp8gemm.py -------------------------------------------------------------------------------- /ktransformers/local_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/local_chat.py -------------------------------------------------------------------------------- /ktransformers/local_chat_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/local_chat_test.py -------------------------------------------------------------------------------- /ktransformers/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/models/configuration_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/configuration_deepseek.py -------------------------------------------------------------------------------- /ktransformers/models/configuration_deepseek_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/configuration_deepseek_v3.py -------------------------------------------------------------------------------- /ktransformers/models/configuration_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/configuration_llama.py -------------------------------------------------------------------------------- /ktransformers/models/configuration_qwen2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/configuration_qwen2_moe.py -------------------------------------------------------------------------------- /ktransformers/models/configuration_qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/configuration_qwen3_moe.py -------------------------------------------------------------------------------- /ktransformers/models/custom_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/custom_cache.py -------------------------------------------------------------------------------- /ktransformers/models/custom_modeling_deepseek_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/custom_modeling_deepseek_v2.py -------------------------------------------------------------------------------- /ktransformers/models/custom_modeling_deepseek_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/custom_modeling_deepseek_v3.py -------------------------------------------------------------------------------- /ktransformers/models/custom_modeling_qwen2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/custom_modeling_qwen2_moe.py -------------------------------------------------------------------------------- /ktransformers/models/custom_modeling_qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/custom_modeling_qwen3_moe.py -------------------------------------------------------------------------------- /ktransformers/models/modeling_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/modeling_deepseek.py -------------------------------------------------------------------------------- /ktransformers/models/modeling_deepseek_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/modeling_deepseek_v3.py -------------------------------------------------------------------------------- /ktransformers/models/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/modeling_llama.py -------------------------------------------------------------------------------- /ktransformers/models/modeling_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/modeling_mixtral.py -------------------------------------------------------------------------------- /ktransformers/models/modeling_qwen2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/modeling_qwen2_moe.py -------------------------------------------------------------------------------- /ktransformers/models/modeling_qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/models/modeling_qwen3_moe.py -------------------------------------------------------------------------------- /ktransformers/operators/RoPE.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/RoPE.py -------------------------------------------------------------------------------- /ktransformers/operators/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ktransformers/operators/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/attention.py -------------------------------------------------------------------------------- /ktransformers/operators/balance_serve_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/balance_serve_attention.py -------------------------------------------------------------------------------- /ktransformers/operators/base_operator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/base_operator.py -------------------------------------------------------------------------------- /ktransformers/operators/cpuinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/cpuinfer.py -------------------------------------------------------------------------------- /ktransformers/operators/dynamic_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/dynamic_attention.py -------------------------------------------------------------------------------- /ktransformers/operators/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/experts.py -------------------------------------------------------------------------------- /ktransformers/operators/flashinfer_batch_prefill_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/flashinfer_batch_prefill_wrapper.py -------------------------------------------------------------------------------- /ktransformers/operators/flashinfer_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/flashinfer_wrapper.py -------------------------------------------------------------------------------- /ktransformers/operators/gate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/gate.py -------------------------------------------------------------------------------- /ktransformers/operators/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/layernorm.py -------------------------------------------------------------------------------- /ktransformers/operators/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/linear.py -------------------------------------------------------------------------------- /ktransformers/operators/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/mlp.py -------------------------------------------------------------------------------- /ktransformers/operators/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/models.py -------------------------------------------------------------------------------- /ktransformers/operators/triton_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/triton_attention.py -------------------------------------------------------------------------------- /ktransformers/operators/triton_attention_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/operators/triton_attention_prefill.py -------------------------------------------------------------------------------- /ktransformers/optimize/optimize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize.py -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V2-Chat-multi-gpu-4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V2-Chat-multi-gpu-4.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V2-Chat-multi-gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V2-Chat-multi-gpu.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V2-Chat.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V2-Chat.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V2-Lite-Chat-multi-gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V2-Lite-Chat-multi-gpu.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V2-Lite-Chat.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V2-Lite-Chat.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-amx.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-amx.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-fp8-linear-ggml-experts-serve.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-fp8-linear-ggml-experts-serve.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-fp8-linear-ggml-experts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-fp8-linear-ggml-experts.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-4.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-8.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-fp8-linear-ggml-experts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-fp8-linear-ggml-experts.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-marlin.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-marlin.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-serve.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-serve.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/Internlm2_5-7b-Chat-1m.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/Internlm2_5-7b-Chat-1m.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/Mixtral.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/Mixtral.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/Moonlight-16B-A3B-serve.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/Moonlight-16B-A3B-serve.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/Moonlight-16B-A3B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/Moonlight-16B-A3B.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct-multi-gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct-multi-gpu.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/Qwen2-serve-amx.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/Qwen2-serve-amx.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/Qwen2-serve.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/Qwen2-serve.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/Qwen3Moe-serve-amx.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/Qwen3Moe-serve-amx.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/Qwen3Moe-serve.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/Qwen3Moe-serve.yaml -------------------------------------------------------------------------------- /ktransformers/optimize/optimize_rules/rocm/DeepSeek-V3-Chat.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/optimize/optimize_rules/rocm/DeepSeek-V3-Chat.yaml -------------------------------------------------------------------------------- /ktransformers/server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/__init__.py -------------------------------------------------------------------------------- /ktransformers/server/api/ollama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/ollama/__init__.py -------------------------------------------------------------------------------- /ktransformers/server/api/ollama/completions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/ollama/completions.py -------------------------------------------------------------------------------- /ktransformers/server/api/openai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/openai/__init__.py -------------------------------------------------------------------------------- /ktransformers/server/api/openai/assistants/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/openai/assistants/__init__.py -------------------------------------------------------------------------------- /ktransformers/server/api/openai/assistants/assistants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/openai/assistants/assistants.py -------------------------------------------------------------------------------- /ktransformers/server/api/openai/assistants/messages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/openai/assistants/messages.py -------------------------------------------------------------------------------- /ktransformers/server/api/openai/assistants/runs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/openai/assistants/runs.py -------------------------------------------------------------------------------- /ktransformers/server/api/openai/assistants/threads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/openai/assistants/threads.py -------------------------------------------------------------------------------- /ktransformers/server/api/openai/endpoints/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/api/openai/endpoints/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/openai/endpoints/chat.py -------------------------------------------------------------------------------- /ktransformers/server/api/openai/legacy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/openai/legacy/__init__.py -------------------------------------------------------------------------------- /ktransformers/server/api/openai/legacy/completions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/openai/legacy/completions.py -------------------------------------------------------------------------------- /ktransformers/server/api/web/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/web/__init__.py -------------------------------------------------------------------------------- /ktransformers/server/api/web/system.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/api/web/system.py -------------------------------------------------------------------------------- /ktransformers/server/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/args.py -------------------------------------------------------------------------------- /ktransformers/server/backend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/backend/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/backend/args.py -------------------------------------------------------------------------------- /ktransformers/server/backend/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/backend/base.py -------------------------------------------------------------------------------- /ktransformers/server/backend/context_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/backend/context_manager.py -------------------------------------------------------------------------------- /ktransformers/server/backend/interfaces/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/backend/interfaces/balance_serve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/backend/interfaces/balance_serve.py -------------------------------------------------------------------------------- /ktransformers/server/backend/interfaces/exllamav2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/backend/interfaces/exllamav2.py -------------------------------------------------------------------------------- /ktransformers/server/backend/interfaces/ktransformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/backend/interfaces/ktransformers.py -------------------------------------------------------------------------------- /ktransformers/server/backend/interfaces/transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/backend/interfaces/transformers.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/config.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/distributed/__init__.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/distributed/communication_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/distributed/communication_op.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/distributed/cuda_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/distributed/cuda_wrapper.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/distributed/custom_all_reduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/distributed/custom_all_reduce.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/distributed/custom_all_reduce_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/distributed/custom_all_reduce_utils.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/distributed/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/distributed/parallel_state.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/distributed/pynccl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/distributed/pynccl.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/distributed/pynccl_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/distributed/pynccl_wrapper.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/distributed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/distributed/utils.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/forward_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/forward_batch.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/model_runner.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/query_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/query_manager.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/sampling/penaltylib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/sampling/penaltylib/__init__.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/sampling/penaltylib/orchestrator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/sampling/penaltylib/orchestrator.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/frequency_penalty.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/frequency_penalty.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/min_new_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/min_new_tokens.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/presence_penalty.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/presence_penalty.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/repetition_penalty.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/repetition_penalty.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/inference/sampling/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/inference/sampling/sampler.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/sched_rpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/sched_rpc.py -------------------------------------------------------------------------------- /ktransformers/server/balance_serve/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/balance_serve/settings.py -------------------------------------------------------------------------------- /ktransformers/server/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/config/config.py -------------------------------------------------------------------------------- /ktransformers/server/config/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/config/log.py -------------------------------------------------------------------------------- /ktransformers/server/config/singleton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/config/singleton.py -------------------------------------------------------------------------------- /ktransformers/server/crud/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/crud/assistants/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/crud/assistants/assistants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/crud/assistants/assistants.py -------------------------------------------------------------------------------- /ktransformers/server/crud/assistants/messages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/crud/assistants/messages.py -------------------------------------------------------------------------------- /ktransformers/server/crud/assistants/runs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/crud/assistants/runs.py -------------------------------------------------------------------------------- /ktransformers/server/crud/assistants/threads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/crud/assistants/threads.py -------------------------------------------------------------------------------- /ktransformers/server/exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/exceptions.py -------------------------------------------------------------------------------- /ktransformers/server/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/main.py -------------------------------------------------------------------------------- /ktransformers/server/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/models/assistants/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/models/assistants/assistants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/models/assistants/assistants.py -------------------------------------------------------------------------------- /ktransformers/server/models/assistants/messages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/models/assistants/messages.py -------------------------------------------------------------------------------- /ktransformers/server/models/assistants/run_steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/models/assistants/run_steps.py -------------------------------------------------------------------------------- /ktransformers/server/models/assistants/runs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/models/assistants/runs.py -------------------------------------------------------------------------------- /ktransformers/server/models/assistants/threads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/models/assistants/threads.py -------------------------------------------------------------------------------- /ktransformers/server/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/requirements.txt -------------------------------------------------------------------------------- /ktransformers/server/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/schemas/assistants/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/schemas/assistants/assistants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/schemas/assistants/assistants.py -------------------------------------------------------------------------------- /ktransformers/server/schemas/assistants/messages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/schemas/assistants/messages.py -------------------------------------------------------------------------------- /ktransformers/server/schemas/assistants/runs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/schemas/assistants/runs.py -------------------------------------------------------------------------------- /ktransformers/server/schemas/assistants/streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/schemas/assistants/streaming.py -------------------------------------------------------------------------------- /ktransformers/server/schemas/assistants/threads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/schemas/assistants/threads.py -------------------------------------------------------------------------------- /ktransformers/server/schemas/assistants/tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/schemas/assistants/tool.py -------------------------------------------------------------------------------- /ktransformers/server/schemas/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/schemas/base.py -------------------------------------------------------------------------------- /ktransformers/server/schemas/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/schemas/conversation.py -------------------------------------------------------------------------------- /ktransformers/server/schemas/endpoints/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/schemas/endpoints/chat.py -------------------------------------------------------------------------------- /ktransformers/server/schemas/legacy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/schemas/legacy/completions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/schemas/legacy/completions.py -------------------------------------------------------------------------------- /ktransformers/server/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ktransformers/server/utils/create_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/utils/create_interface.py -------------------------------------------------------------------------------- /ktransformers/server/utils/multi_timer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/utils/multi_timer.py -------------------------------------------------------------------------------- /ktransformers/server/utils/sql_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/server/utils/sql_utils.py -------------------------------------------------------------------------------- /ktransformers/tests/.gitignore: -------------------------------------------------------------------------------- 1 | results/ -------------------------------------------------------------------------------- /ktransformers/tests/AIME_2024/eval_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/AIME_2024/eval_api.py -------------------------------------------------------------------------------- /ktransformers/tests/AIME_2024/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/AIME_2024/evaluation.py -------------------------------------------------------------------------------- /ktransformers/tests/AIME_2024/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/AIME_2024/prompts.py -------------------------------------------------------------------------------- /ktransformers/tests/dequant_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/dequant_gpu.py -------------------------------------------------------------------------------- /ktransformers/tests/dequant_gpu_t.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/dequant_gpu_t.py -------------------------------------------------------------------------------- /ktransformers/tests/function_call_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/function_call_test.py -------------------------------------------------------------------------------- /ktransformers/tests/humaneval/eval_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/humaneval/eval_api.py -------------------------------------------------------------------------------- /ktransformers/tests/humaneval/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/humaneval/evaluation.py -------------------------------------------------------------------------------- /ktransformers/tests/humaneval/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/humaneval/prompts.py -------------------------------------------------------------------------------- /ktransformers/tests/mmlu_pro_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/mmlu_pro_test.py -------------------------------------------------------------------------------- /ktransformers/tests/mmlu_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/mmlu_test.py -------------------------------------------------------------------------------- /ktransformers/tests/mmlu_test_multi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/mmlu_test_multi.py -------------------------------------------------------------------------------- /ktransformers/tests/score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/score.py -------------------------------------------------------------------------------- /ktransformers/tests/test_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/test_client.py -------------------------------------------------------------------------------- /ktransformers/tests/test_pytorch_q8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/test_pytorch_q8.py -------------------------------------------------------------------------------- /ktransformers/tests/test_speed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/test_speed.py -------------------------------------------------------------------------------- /ktransformers/tests/triton_fp8gemm_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/tests/triton_fp8gemm_test.py -------------------------------------------------------------------------------- /ktransformers/util/cuda_graph_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/util/cuda_graph_runner.py -------------------------------------------------------------------------------- /ktransformers/util/custom_gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/util/custom_gguf.py -------------------------------------------------------------------------------- /ktransformers/util/custom_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/util/custom_loader.py -------------------------------------------------------------------------------- /ktransformers/util/modeling_rope_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/util/modeling_rope_utils.py -------------------------------------------------------------------------------- /ktransformers/util/textstream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/util/textstream.py -------------------------------------------------------------------------------- /ktransformers/util/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/util/utils.py -------------------------------------------------------------------------------- /ktransformers/util/vendors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/util/vendors.py -------------------------------------------------------------------------------- /ktransformers/website/.browserslistrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/.browserslistrc -------------------------------------------------------------------------------- /ktransformers/website/.eslintrc.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/.eslintrc.js -------------------------------------------------------------------------------- /ktransformers/website/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/.gitignore -------------------------------------------------------------------------------- /ktransformers/website/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/README.md -------------------------------------------------------------------------------- /ktransformers/website/config.d.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/config.d.ts -------------------------------------------------------------------------------- /ktransformers/website/jest.config.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/jest.config.js -------------------------------------------------------------------------------- /ktransformers/website/package-lock.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/package-lock.json -------------------------------------------------------------------------------- /ktransformers/website/package.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/package.json -------------------------------------------------------------------------------- /ktransformers/website/public/balck.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/balck.ico -------------------------------------------------------------------------------- /ktransformers/website/public/config.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/config.js -------------------------------------------------------------------------------- /ktransformers/website/public/css/reset.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/css/reset.css -------------------------------------------------------------------------------- /ktransformers/website/public/images/assistant-avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/images/assistant-avatar.png -------------------------------------------------------------------------------- /ktransformers/website/public/images/avatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/images/avatar.png -------------------------------------------------------------------------------- /ktransformers/website/public/images/bgbg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/images/bgbg.png -------------------------------------------------------------------------------- /ktransformers/website/public/images/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/images/logo.ico -------------------------------------------------------------------------------- /ktransformers/website/public/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/images/logo.png -------------------------------------------------------------------------------- /ktransformers/website/public/images/three.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/images/three.png -------------------------------------------------------------------------------- /ktransformers/website/public/images/user-filling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/images/user-filling.png -------------------------------------------------------------------------------- /ktransformers/website/public/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/public/index.html -------------------------------------------------------------------------------- /ktransformers/website/src/App.vue: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/App.vue -------------------------------------------------------------------------------- /ktransformers/website/src/api/api-client.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/api/api-client.ts -------------------------------------------------------------------------------- /ktransformers/website/src/api/assistant.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/api/assistant.ts -------------------------------------------------------------------------------- /ktransformers/website/src/api/message.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/api/message.ts -------------------------------------------------------------------------------- /ktransformers/website/src/api/run.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/api/run.ts -------------------------------------------------------------------------------- /ktransformers/website/src/api/thread.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/api/thread.ts -------------------------------------------------------------------------------- /ktransformers/website/src/assets/css/mixins.styl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/assets/css/mixins.styl -------------------------------------------------------------------------------- /ktransformers/website/src/assets/iconfont/demo.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/assets/iconfont/demo.css -------------------------------------------------------------------------------- /ktransformers/website/src/assets/iconfont/demo_index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/assets/iconfont/demo_index.html -------------------------------------------------------------------------------- /ktransformers/website/src/assets/iconfont/iconfont.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/assets/iconfont/iconfont.css -------------------------------------------------------------------------------- /ktransformers/website/src/assets/iconfont/iconfont.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/assets/iconfont/iconfont.js -------------------------------------------------------------------------------- /ktransformers/website/src/assets/iconfont/iconfont.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/assets/iconfont/iconfont.json -------------------------------------------------------------------------------- /ktransformers/website/src/assets/iconfont/iconfont.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/assets/iconfont/iconfont.svg -------------------------------------------------------------------------------- /ktransformers/website/src/assets/iconfont/iconfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/assets/iconfont/iconfont.ttf -------------------------------------------------------------------------------- /ktransformers/website/src/assets/iconfont/iconfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/assets/iconfont/iconfont.woff -------------------------------------------------------------------------------- /ktransformers/website/src/assets/iconfont/iconfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/assets/iconfont/iconfont.woff2 -------------------------------------------------------------------------------- /ktransformers/website/src/components/chat/index.vue: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/components/chat/index.vue -------------------------------------------------------------------------------- /ktransformers/website/src/conf/config.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/conf/config.ts -------------------------------------------------------------------------------- /ktransformers/website/src/locals/en.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/locals/en.js -------------------------------------------------------------------------------- /ktransformers/website/src/locals/index.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/locals/index.js -------------------------------------------------------------------------------- /ktransformers/website/src/locals/zh.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/locals/zh.js -------------------------------------------------------------------------------- /ktransformers/website/src/main.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/main.ts -------------------------------------------------------------------------------- /ktransformers/website/src/router/index.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/router/index.ts -------------------------------------------------------------------------------- /ktransformers/website/src/shims-vue.d.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/shims-vue.d.ts -------------------------------------------------------------------------------- /ktransformers/website/src/store/index.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/store/index.ts -------------------------------------------------------------------------------- /ktransformers/website/src/utils/copy.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/utils/copy.ts -------------------------------------------------------------------------------- /ktransformers/website/src/utils/types.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/utils/types.ts -------------------------------------------------------------------------------- /ktransformers/website/src/views/home.vue: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/src/views/home.vue -------------------------------------------------------------------------------- /ktransformers/website/tests/unit/example.spec.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/tests/unit/example.spec.ts -------------------------------------------------------------------------------- /ktransformers/website/tsconfig.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/tsconfig.json -------------------------------------------------------------------------------- /ktransformers/website/vue.config.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/ktransformers/website/vue.config.js -------------------------------------------------------------------------------- /merge_tensors/merge_safetensor_gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/merge_tensors/merge_safetensor_gguf.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements-local_chat.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/requirements-local_chat.txt -------------------------------------------------------------------------------- /scripts/generate-core-info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/scripts/generate-core-info.py -------------------------------------------------------------------------------- /scripts/numa-stats.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/scripts/numa-stats.sh -------------------------------------------------------------------------------- /scripts/show-cpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/scripts/show-cpu.sh -------------------------------------------------------------------------------- /scripts/show-mem.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/scripts/show-mem.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/setup.py -------------------------------------------------------------------------------- /third_party/llamafile/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/README.md -------------------------------------------------------------------------------- /third_party/llamafile/bench.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/bench.h -------------------------------------------------------------------------------- /third_party/llamafile/flags.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/flags.cpp -------------------------------------------------------------------------------- /third_party/llamafile/flags.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/flags.h -------------------------------------------------------------------------------- /third_party/llamafile/iqk_mul_mat.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/iqk_mul_mat.inc -------------------------------------------------------------------------------- /third_party/llamafile/iqk_mul_mat_amd_avx2.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/iqk_mul_mat_amd_avx2.cpp -------------------------------------------------------------------------------- /third_party/llamafile/iqk_mul_mat_amd_zen4.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/iqk_mul_mat_amd_zen4.cpp -------------------------------------------------------------------------------- /third_party/llamafile/iqk_mul_mat_arm82.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/iqk_mul_mat_arm82.cpp -------------------------------------------------------------------------------- /third_party/llamafile/macros.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/macros.h -------------------------------------------------------------------------------- /third_party/llamafile/micros.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/micros.h -------------------------------------------------------------------------------- /third_party/llamafile/numba.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/numba.h -------------------------------------------------------------------------------- /third_party/llamafile/sgemm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/sgemm.cpp -------------------------------------------------------------------------------- /third_party/llamafile/sgemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/sgemm.h -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu.h -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_mixmul.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_mixmul.inc -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_mixmul_amd_avx.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_mixmul_amd_avx.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_mixmul_amd_avx2.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_mixmul_amd_avx2.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_mixmul_amd_avx512f.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_mixmul_amd_avx512f.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_mixmul_amd_avxvnni.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_mixmul_amd_avxvnni.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_mixmul_amd_fma.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_mixmul_amd_fma.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_mixmul_amd_zen4.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_mixmul_amd_zen4.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_mixmul_arm80.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_mixmul_arm80.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_mixmul_arm82.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_mixmul_arm82.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_sgemm.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_sgemm.inc -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_sgemm_amd_avx.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_sgemm_amd_avx.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_sgemm_amd_avx2.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_sgemm_amd_avx2.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_sgemm_amd_avx512f.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_sgemm_amd_avx512f.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_sgemm_amd_avxvnni.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_sgemm_amd_avxvnni.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_sgemm_amd_fma.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_sgemm_amd_fma.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_sgemm_amd_zen4.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_sgemm_amd_zen4.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_sgemm_arm80.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_sgemm_arm80.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_sgemm_arm82.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_sgemm_arm82.cpp -------------------------------------------------------------------------------- /third_party/llamafile/tinyblas_cpu_unsupported.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/llamafile/tinyblas_cpu_unsupported.cpp -------------------------------------------------------------------------------- /third_party/nlohmann/json.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/nlohmann/json.hpp -------------------------------------------------------------------------------- /third_party/nlohmann/json_fwd.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vproxy-tools/ktransformers/HEAD/third_party/nlohmann/json_fwd.hpp --------------------------------------------------------------------------------