├── .github ├── FUNDING.yml └── workflows │ ├── build.yml │ └── build_windows_only.yml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── convert.py ├── doc ├── cat.png ├── convert.md ├── cq_humaneval.png ├── exl3.md ├── gumbel_eval.png ├── humaneval.png ├── llama31_70b_instruct_bpw.png ├── llama31_70b_instruct_vram.png ├── llama31_8b_instruct_bpw.png ├── llama31_8b_instruct_kld_bpw.png ├── llama31_8b_instruct_kld_vram.png ├── llama31_8b_instruct_vram.png ├── llama32_1b_instruct_bpw.png ├── llama32_1b_instruct_vram.png ├── mistral_7b_instruct_v0.3_bpw.png ├── mistral_7b_instruct_v0.3_vram.png └── procedural_codebook.png ├── eval ├── compare_q.py ├── compare_q_anyprecision.py ├── compare_q_exllamav2.py ├── compare_q_exllamav3.py ├── compare_q_llamacpp.py ├── compare_q_logits.py ├── compare_q_qtip.py ├── compare_q_transformers.py ├── eval_texts │ ├── illustrious_client.txt │ ├── illustrious_client_c1.txt │ ├── illustrious_client_c2.txt │ ├── illustrious_client_sum.txt │ ├── variable_man_char.txt │ ├── variable_man_mod.txt │ ├── variable_man_mod_c1.txt │ └── variable_man_sum.txt ├── humaneval.py ├── longctx.py ├── mmlu.py ├── model_diff.py ├── ppl.py ├── ppl_transformers.py ├── prequant_test.py └── spec │ ├── llama3.1-70b-instruct_aqlm.json │ ├── llama3.1-70b-instruct_awq.json │ ├── llama3.1-70b-instruct_exl2.json │ ├── llama3.1-70b-instruct_exl3.json │ ├── llama3.1-70b-instruct_gguf.json │ ├── llama3.1-70b-instruct_qtip.json │ ├── llama3.1-70b-instruct_vptq.json │ ├── llama3.1-8b-instruct_anyp.json │ ├── llama3.1-8b-instruct_aqlm.json │ ├── llama3.1-8b-instruct_autoround.json │ ├── llama3.1-8b-instruct_exl2.json │ ├── llama3.1-8b-instruct_exl3.json │ ├── llama3.1-8b-instruct_gguf.json │ ├── llama3.1-8b-instruct_hf.json │ ├── llama3.1-8b-instruct_qtip.json │ ├── llama3.1-8b-instruct_vptq.json │ ├── llama3.2-1b-instruct_aqlm.json │ ├── llama3.2-1b-instruct_awq.json │ ├── llama3.2-1b-instruct_bnb.json │ ├── llama3.2-1b-instruct_exl2.json │ ├── llama3.2-1b-instruct_exl3.json │ ├── llama3.2-1b-instruct_gguf.json │ ├── llama3.2-1b-instruct_hf.json │ ├── mistral-7b-instruct-v0.3_awq.json │ ├── mistral-7b-instruct-v0.3_exl2.json │ ├── mistral-7b-instruct-v0.3_exl3.json │ ├── mistral-7b-instruct-v0.3_gguf.json │ ├── wiki2_llama3.json │ ├── wiki2_llama3_large.json │ └── wiki2_mistral_large.json ├── examples ├── async_generator.py ├── banned_strings.py ├── batched_translation.py ├── chat.py ├── chat_console.py ├── chat_io.py ├── chat_templates.py ├── chat_util.py ├── common.py ├── constrained_generation.py ├── dynamic_gen.py ├── generation_loop.py ├── generator.py ├── imgsearch.py ├── imgsearch_gallery.py ├── loading.py ├── media │ ├── cat.png │ └── strawberry.png ├── multimodal.py ├── overrides.yaml └── transformers_integration.py ├── exllamav3 ├── __init__.py ├── architecture │ ├── __init__.py │ ├── apertus.py │ ├── arcee.py │ ├── architectures.py │ ├── cohere.py │ ├── cohere2.py │ ├── decilm.py │ ├── dots1.py │ ├── ernie4_5.py │ ├── ernie4_5_moe.py │ ├── exaone4.py │ ├── gemma2.py │ ├── gemma3.py │ ├── glm4.py │ ├── glm4_moe.py │ ├── glm4v.py │ ├── glm4v_moe.py │ ├── llama.py │ ├── mimo.py │ ├── minimax_m2.py │ ├── mistral.py │ ├── mistral3.py │ ├── mixtral.py │ ├── phi3.py │ ├── qwen2.py │ ├── qwen3.py │ ├── qwen3_moe.py │ ├── qwen3_next.py │ ├── qwen3_vl.py │ ├── qwen3_vl_moe.py │ ├── seedoss.py │ └── smollm3.py ├── cache │ ├── __init__.py │ ├── cache.py │ ├── fp16.py │ ├── quant.py │ └── recurrent.py ├── constants.py ├── conversion │ ├── __init__.py │ ├── allocation.py │ ├── calibration_data.py │ ├── compile.py │ ├── convert_model.py │ ├── measure_model.py │ ├── optimize_model.py │ ├── quant_config.py │ └── standard_cal_data │ │ ├── __init__.py │ │ ├── c4.utf8 │ │ ├── code.utf8 │ │ ├── multilingual.utf8 │ │ ├── technical.utf8 │ │ ├── tiny.utf8 │ │ └── wiki.utf8 ├── exllamav3_ext │ ├── activation.cu │ ├── activation.cuh │ ├── activation_kernels.cuh │ ├── add.cu │ ├── add.cuh │ ├── avx2_target.cpp │ ├── avx2_target.h │ ├── bindings.cpp │ ├── cache │ │ ├── q_cache.cu │ │ ├── q_cache.cuh │ │ └── q_cache_kernels.cuh │ ├── causal_conv1d.cu │ ├── causal_conv1d.cuh │ ├── compat.cuh │ ├── gdn.cuh │ ├── generator │ │ ├── cache.cu │ │ ├── cache.cuh │ │ ├── gumbel.cu │ │ ├── gumbel.cuh │ │ ├── rep_pen.cu │ │ ├── rep_pen.cuh │ │ ├── sampling_basic.cu │ │ ├── sampling_basic.cuh │ │ ├── strings.cpp │ │ └── strings.h │ ├── gnd.cu │ ├── graph.cu │ ├── graph.cuh │ ├── hadamard.cpp │ ├── hadamard.h │ ├── hgemm.cu │ ├── hgemm.cuh │ ├── histogram.cu │ ├── histogram.cuh │ ├── libtorch │ │ ├── blocksparse_mlp.cpp │ │ ├── blocksparse_mlp.h │ │ ├── blocksparse_mlp_bc.h │ │ ├── gated_delta_net.cpp │ │ ├── gated_delta_net.h │ │ ├── gated_delta_net_bc.h │ │ ├── gated_rmsnorm.cpp │ │ ├── gated_rmsnorm.h │ │ ├── gated_rmsnorm_bc.h │ │ ├── linear.cpp │ │ ├── linear.h │ │ ├── linear_bc.h │ │ ├── mlp.cpp │ │ ├── mlp.h │ │ └── mlp_bc.h │ ├── norm.cu │ ├── norm.cuh │ ├── parallel │ │ ├── all_reduce.cu │ │ ├── all_reduce.cuh │ │ ├── all_reduce_cpu.cu │ │ ├── all_reduce_cpu_avx2.cpp │ │ ├── all_reduce_cpu_avx2.h │ │ ├── barrier.cu │ │ ├── barrier.cuh │ │ ├── barrier_inner.cuh │ │ ├── broadcast.cu │ │ ├── broadcast.cuh │ │ ├── context.cu │ │ ├── context.cuh │ │ ├── gather.cu │ │ ├── gather.cuh │ │ ├── ll.cuh │ │ └── timeout.cuh │ ├── ptx.cuh │ ├── quant │ │ ├── codebook.cuh │ │ ├── comp_units │ │ │ ├── exl3_comp_unit_1.cu │ │ │ ├── exl3_comp_unit_1.cuh │ │ │ ├── exl3_comp_unit_2.cu │ │ │ ├── exl3_comp_unit_2.cuh │ │ │ ├── exl3_comp_unit_3.cu │ │ │ ├── exl3_comp_unit_3.cuh │ │ │ ├── exl3_comp_unit_4.cu │ │ │ ├── exl3_comp_unit_4.cuh │ │ │ ├── exl3_comp_unit_5.cu │ │ │ ├── exl3_comp_unit_5.cuh │ │ │ ├── exl3_comp_unit_6.cu │ │ │ ├── exl3_comp_unit_6.cuh │ │ │ ├── exl3_comp_unit_7.cu │ │ │ ├── exl3_comp_unit_7.cuh │ │ │ ├── exl3_comp_unit_8.cu │ │ │ └── exl3_comp_unit_8.cuh │ │ ├── exl3_devctx.cu │ │ ├── exl3_devctx.cuh │ │ ├── exl3_dq.cuh │ │ ├── exl3_gemm.cu │ │ ├── exl3_gemm.cuh │ │ ├── exl3_gemm_inner.cuh │ │ ├── exl3_gemm_kernel.cuh │ │ ├── exl3_gemv.cu │ │ ├── exl3_gemv.cuh │ │ ├── exl3_gemv_kernel.cuh │ │ ├── exl3_kernel_map.cu │ │ ├── exl3_kernel_map.cuh │ │ ├── exl3_kernel_map_samples.cuh │ │ ├── hadamard.cu │ │ ├── hadamard.cuh │ │ ├── hadamard_inner.cuh │ │ ├── pack.cu │ │ ├── pack.cuh │ │ ├── quantize.cu │ │ ├── quantize.cuh │ │ ├── reconstruct.cu │ │ ├── reconstruct.cuh │ │ ├── util.cu │ │ └── util.cuh │ ├── reduction.cuh │ ├── rope.cu │ ├── rope.cuh │ ├── routing.cu │ ├── routing.cuh │ ├── softcap.cu │ ├── softcap.cuh │ ├── stloader.cpp │ ├── stloader.h │ ├── stloader_cu.cu │ ├── stloader_cu.cuh │ ├── util.cuh │ └── util.h ├── ext.py ├── generator │ ├── __init__.py │ ├── async_generator.py │ ├── filter │ │ ├── __init__.py │ │ ├── filter.py │ │ └── formatron.py │ ├── generator.py │ ├── job.py │ ├── pagetable.py │ ├── sampler │ │ ├── __init__.py │ │ ├── custom.py │ │ ├── presets.py │ │ └── sampler.py │ └── visualizer.py ├── integration │ ├── __init__.py │ └── transformers.py ├── loader │ ├── __init__.py │ └── safetensors.py ├── model │ ├── __init__.py │ ├── config.py │ ├── model.py │ ├── model_ls.py │ ├── model_tp.py │ ├── model_tp_alloc.py │ ├── model_tp_backend.py │ ├── model_tp_cuda.py │ ├── model_tp_fn.py │ └── model_tp_shared.py ├── model_init.py ├── modules │ ├── __init__.py │ ├── attn.py │ ├── block_sparse_mlp.py │ ├── conv.py │ ├── deepstack.py │ ├── embedding.py │ ├── gated_delta_net.py │ ├── gated_rmsnorm.py │ ├── gather.py │ ├── glm4v_pos_embedding.py │ ├── layernorm.py │ ├── linear.py │ ├── mlp.py │ ├── module.py │ ├── multilinear.py │ ├── pos_embedding.py │ ├── quant │ │ ├── __init__.py │ │ ├── exl3.py │ │ ├── exl3_lib │ │ │ ├── __init__.py │ │ │ └── quantize.py │ │ └── fp16.py │ ├── qwen3_vl_pos_embedding.py │ ├── rmsnorm.py │ └── transformer.py ├── tokenizer │ ├── __init__.py │ ├── mm_embedding.py │ └── tokenizer.py ├── util │ ├── __init__.py │ ├── arch_list.py │ ├── debug.py │ ├── file.py │ ├── hadamard.py │ ├── hadamard_data │ │ ├── hadamard_1.txt │ │ ├── hadamard_100.txt │ │ ├── hadamard_116.txt │ │ ├── hadamard_156.txt │ │ ├── hadamard_172.txt │ │ ├── hadamard_188.txt │ │ ├── hadamard_236.txt │ │ ├── hadamard_244.txt │ │ ├── hadamard_428.txt │ │ ├── hadamard_52.txt │ │ ├── hadamard_92.txt │ │ └── primes.txt │ ├── measures.py │ ├── memory.py │ ├── misc.py │ ├── profile_opt.py │ ├── progress.py │ ├── rope.py │ ├── tensor.py │ └── vision.py └── version.py ├── requirements.txt ├── requirements_eval.txt ├── requirements_examples.txt ├── science ├── codebook_eval.py ├── gumbel_eval.py ├── kv_quant_exp.py ├── qgemm_benchmark.py └── qgemm_pretune.py ├── setup.py ├── tests ├── generator_stresstest.py ├── test_cache_rotate.py ├── test_ext_norm.py ├── test_kv_quant.py ├── test_qgemm.py ├── test_quant_fn.py ├── test_rope.py ├── test_sampler.py └── util.py └── util ├── add_quant_config.py ├── add_safetensors_index.py ├── measure.py ├── optimize.py └── recompile.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: turboderp -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/.github/workflows/build.yml -------------------------------------------------------------------------------- /.github/workflows/build_windows_only.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/.github/workflows/build_windows_only.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/README.md -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/convert.py -------------------------------------------------------------------------------- /doc/cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/cat.png -------------------------------------------------------------------------------- /doc/convert.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/convert.md -------------------------------------------------------------------------------- /doc/cq_humaneval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/cq_humaneval.png -------------------------------------------------------------------------------- /doc/exl3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/exl3.md -------------------------------------------------------------------------------- /doc/gumbel_eval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/gumbel_eval.png -------------------------------------------------------------------------------- /doc/humaneval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/humaneval.png -------------------------------------------------------------------------------- /doc/llama31_70b_instruct_bpw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/llama31_70b_instruct_bpw.png -------------------------------------------------------------------------------- /doc/llama31_70b_instruct_vram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/llama31_70b_instruct_vram.png -------------------------------------------------------------------------------- /doc/llama31_8b_instruct_bpw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/llama31_8b_instruct_bpw.png -------------------------------------------------------------------------------- /doc/llama31_8b_instruct_kld_bpw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/llama31_8b_instruct_kld_bpw.png -------------------------------------------------------------------------------- /doc/llama31_8b_instruct_kld_vram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/llama31_8b_instruct_kld_vram.png -------------------------------------------------------------------------------- /doc/llama31_8b_instruct_vram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/llama31_8b_instruct_vram.png -------------------------------------------------------------------------------- /doc/llama32_1b_instruct_bpw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/llama32_1b_instruct_bpw.png -------------------------------------------------------------------------------- /doc/llama32_1b_instruct_vram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/llama32_1b_instruct_vram.png -------------------------------------------------------------------------------- /doc/mistral_7b_instruct_v0.3_bpw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/mistral_7b_instruct_v0.3_bpw.png -------------------------------------------------------------------------------- /doc/mistral_7b_instruct_v0.3_vram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/mistral_7b_instruct_v0.3_vram.png -------------------------------------------------------------------------------- /doc/procedural_codebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/doc/procedural_codebook.png -------------------------------------------------------------------------------- /eval/compare_q.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/compare_q.py -------------------------------------------------------------------------------- /eval/compare_q_anyprecision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/compare_q_anyprecision.py -------------------------------------------------------------------------------- /eval/compare_q_exllamav2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/compare_q_exllamav2.py -------------------------------------------------------------------------------- /eval/compare_q_exllamav3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/compare_q_exllamav3.py -------------------------------------------------------------------------------- /eval/compare_q_llamacpp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/compare_q_llamacpp.py -------------------------------------------------------------------------------- /eval/compare_q_logits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/compare_q_logits.py -------------------------------------------------------------------------------- /eval/compare_q_qtip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/compare_q_qtip.py -------------------------------------------------------------------------------- /eval/compare_q_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/compare_q_transformers.py -------------------------------------------------------------------------------- /eval/eval_texts/illustrious_client.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/eval_texts/illustrious_client.txt -------------------------------------------------------------------------------- /eval/eval_texts/illustrious_client_c1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/eval_texts/illustrious_client_c1.txt -------------------------------------------------------------------------------- /eval/eval_texts/illustrious_client_c2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/eval_texts/illustrious_client_c2.txt -------------------------------------------------------------------------------- /eval/eval_texts/illustrious_client_sum.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/eval_texts/illustrious_client_sum.txt -------------------------------------------------------------------------------- /eval/eval_texts/variable_man_char.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/eval_texts/variable_man_char.txt -------------------------------------------------------------------------------- /eval/eval_texts/variable_man_mod.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/eval_texts/variable_man_mod.txt -------------------------------------------------------------------------------- /eval/eval_texts/variable_man_mod_c1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/eval_texts/variable_man_mod_c1.txt -------------------------------------------------------------------------------- /eval/eval_texts/variable_man_sum.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/eval_texts/variable_man_sum.txt -------------------------------------------------------------------------------- /eval/humaneval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/humaneval.py -------------------------------------------------------------------------------- /eval/longctx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/longctx.py -------------------------------------------------------------------------------- /eval/mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/mmlu.py -------------------------------------------------------------------------------- /eval/model_diff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/model_diff.py -------------------------------------------------------------------------------- /eval/ppl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/ppl.py -------------------------------------------------------------------------------- /eval/ppl_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/ppl_transformers.py -------------------------------------------------------------------------------- /eval/prequant_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/prequant_test.py -------------------------------------------------------------------------------- /eval/spec/llama3.1-70b-instruct_aqlm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-70b-instruct_aqlm.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-70b-instruct_awq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-70b-instruct_awq.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-70b-instruct_exl2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-70b-instruct_exl2.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-70b-instruct_exl3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-70b-instruct_exl3.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-70b-instruct_gguf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-70b-instruct_gguf.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-70b-instruct_qtip.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-70b-instruct_qtip.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-70b-instruct_vptq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-70b-instruct_vptq.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-8b-instruct_anyp.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-8b-instruct_anyp.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-8b-instruct_aqlm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-8b-instruct_aqlm.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-8b-instruct_autoround.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-8b-instruct_autoround.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-8b-instruct_exl2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-8b-instruct_exl2.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-8b-instruct_exl3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-8b-instruct_exl3.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-8b-instruct_gguf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-8b-instruct_gguf.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-8b-instruct_hf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-8b-instruct_hf.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-8b-instruct_qtip.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-8b-instruct_qtip.json -------------------------------------------------------------------------------- /eval/spec/llama3.1-8b-instruct_vptq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.1-8b-instruct_vptq.json -------------------------------------------------------------------------------- /eval/spec/llama3.2-1b-instruct_aqlm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.2-1b-instruct_aqlm.json -------------------------------------------------------------------------------- /eval/spec/llama3.2-1b-instruct_awq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.2-1b-instruct_awq.json -------------------------------------------------------------------------------- /eval/spec/llama3.2-1b-instruct_bnb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.2-1b-instruct_bnb.json -------------------------------------------------------------------------------- /eval/spec/llama3.2-1b-instruct_exl2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.2-1b-instruct_exl2.json -------------------------------------------------------------------------------- /eval/spec/llama3.2-1b-instruct_exl3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.2-1b-instruct_exl3.json -------------------------------------------------------------------------------- /eval/spec/llama3.2-1b-instruct_gguf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.2-1b-instruct_gguf.json -------------------------------------------------------------------------------- /eval/spec/llama3.2-1b-instruct_hf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/llama3.2-1b-instruct_hf.json -------------------------------------------------------------------------------- /eval/spec/mistral-7b-instruct-v0.3_awq.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/mistral-7b-instruct-v0.3_awq.json -------------------------------------------------------------------------------- /eval/spec/mistral-7b-instruct-v0.3_exl2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/mistral-7b-instruct-v0.3_exl2.json -------------------------------------------------------------------------------- /eval/spec/mistral-7b-instruct-v0.3_exl3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/mistral-7b-instruct-v0.3_exl3.json -------------------------------------------------------------------------------- /eval/spec/mistral-7b-instruct-v0.3_gguf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/mistral-7b-instruct-v0.3_gguf.json -------------------------------------------------------------------------------- /eval/spec/wiki2_llama3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/wiki2_llama3.json -------------------------------------------------------------------------------- /eval/spec/wiki2_llama3_large.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/wiki2_llama3_large.json -------------------------------------------------------------------------------- /eval/spec/wiki2_mistral_large.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/eval/spec/wiki2_mistral_large.json -------------------------------------------------------------------------------- /examples/async_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/async_generator.py -------------------------------------------------------------------------------- /examples/banned_strings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/banned_strings.py -------------------------------------------------------------------------------- /examples/batched_translation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/batched_translation.py -------------------------------------------------------------------------------- /examples/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/chat.py -------------------------------------------------------------------------------- /examples/chat_console.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/chat_console.py -------------------------------------------------------------------------------- /examples/chat_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/chat_io.py -------------------------------------------------------------------------------- /examples/chat_templates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/chat_templates.py -------------------------------------------------------------------------------- /examples/chat_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/chat_util.py -------------------------------------------------------------------------------- /examples/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/common.py -------------------------------------------------------------------------------- /examples/constrained_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/constrained_generation.py -------------------------------------------------------------------------------- /examples/dynamic_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/dynamic_gen.py -------------------------------------------------------------------------------- /examples/generation_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/generation_loop.py -------------------------------------------------------------------------------- /examples/generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/generator.py -------------------------------------------------------------------------------- /examples/imgsearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/imgsearch.py -------------------------------------------------------------------------------- /examples/imgsearch_gallery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/imgsearch_gallery.py -------------------------------------------------------------------------------- /examples/loading.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/loading.py -------------------------------------------------------------------------------- /examples/media/cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/media/cat.png -------------------------------------------------------------------------------- /examples/media/strawberry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/media/strawberry.png -------------------------------------------------------------------------------- /examples/multimodal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/multimodal.py -------------------------------------------------------------------------------- /examples/overrides.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/overrides.yaml -------------------------------------------------------------------------------- /examples/transformers_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/examples/transformers_integration.py -------------------------------------------------------------------------------- /exllamav3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/__init__.py -------------------------------------------------------------------------------- /exllamav3/architecture/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | -------------------------------------------------------------------------------- /exllamav3/architecture/apertus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/apertus.py -------------------------------------------------------------------------------- /exllamav3/architecture/arcee.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/arcee.py -------------------------------------------------------------------------------- /exllamav3/architecture/architectures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/architectures.py -------------------------------------------------------------------------------- /exllamav3/architecture/cohere.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/cohere.py -------------------------------------------------------------------------------- /exllamav3/architecture/cohere2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/cohere2.py -------------------------------------------------------------------------------- /exllamav3/architecture/decilm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/decilm.py -------------------------------------------------------------------------------- /exllamav3/architecture/dots1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/dots1.py -------------------------------------------------------------------------------- /exllamav3/architecture/ernie4_5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/ernie4_5.py -------------------------------------------------------------------------------- /exllamav3/architecture/ernie4_5_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/ernie4_5_moe.py -------------------------------------------------------------------------------- /exllamav3/architecture/exaone4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/exaone4.py -------------------------------------------------------------------------------- /exllamav3/architecture/gemma2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/gemma2.py -------------------------------------------------------------------------------- /exllamav3/architecture/gemma3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/gemma3.py -------------------------------------------------------------------------------- /exllamav3/architecture/glm4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/glm4.py -------------------------------------------------------------------------------- /exllamav3/architecture/glm4_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/glm4_moe.py -------------------------------------------------------------------------------- /exllamav3/architecture/glm4v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/glm4v.py -------------------------------------------------------------------------------- /exllamav3/architecture/glm4v_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/glm4v_moe.py -------------------------------------------------------------------------------- /exllamav3/architecture/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/llama.py -------------------------------------------------------------------------------- /exllamav3/architecture/mimo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/mimo.py -------------------------------------------------------------------------------- /exllamav3/architecture/minimax_m2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/minimax_m2.py -------------------------------------------------------------------------------- /exllamav3/architecture/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/mistral.py -------------------------------------------------------------------------------- /exllamav3/architecture/mistral3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/mistral3.py -------------------------------------------------------------------------------- /exllamav3/architecture/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/mixtral.py -------------------------------------------------------------------------------- /exllamav3/architecture/phi3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/phi3.py -------------------------------------------------------------------------------- /exllamav3/architecture/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/qwen2.py -------------------------------------------------------------------------------- /exllamav3/architecture/qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/qwen3.py -------------------------------------------------------------------------------- /exllamav3/architecture/qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/qwen3_moe.py -------------------------------------------------------------------------------- /exllamav3/architecture/qwen3_next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/qwen3_next.py -------------------------------------------------------------------------------- /exllamav3/architecture/qwen3_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/qwen3_vl.py -------------------------------------------------------------------------------- /exllamav3/architecture/qwen3_vl_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/qwen3_vl_moe.py -------------------------------------------------------------------------------- /exllamav3/architecture/seedoss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/seedoss.py -------------------------------------------------------------------------------- /exllamav3/architecture/smollm3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/architecture/smollm3.py -------------------------------------------------------------------------------- /exllamav3/cache/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/cache/__init__.py -------------------------------------------------------------------------------- /exllamav3/cache/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/cache/cache.py -------------------------------------------------------------------------------- /exllamav3/cache/fp16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/cache/fp16.py -------------------------------------------------------------------------------- /exllamav3/cache/quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/cache/quant.py -------------------------------------------------------------------------------- /exllamav3/cache/recurrent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/cache/recurrent.py -------------------------------------------------------------------------------- /exllamav3/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/constants.py -------------------------------------------------------------------------------- /exllamav3/conversion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /exllamav3/conversion/allocation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/allocation.py -------------------------------------------------------------------------------- /exllamav3/conversion/calibration_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/calibration_data.py -------------------------------------------------------------------------------- /exllamav3/conversion/compile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/compile.py -------------------------------------------------------------------------------- /exllamav3/conversion/convert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/convert_model.py -------------------------------------------------------------------------------- /exllamav3/conversion/measure_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/measure_model.py -------------------------------------------------------------------------------- /exllamav3/conversion/optimize_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/optimize_model.py -------------------------------------------------------------------------------- /exllamav3/conversion/quant_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/quant_config.py -------------------------------------------------------------------------------- /exllamav3/conversion/standard_cal_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /exllamav3/conversion/standard_cal_data/c4.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/standard_cal_data/c4.utf8 -------------------------------------------------------------------------------- /exllamav3/conversion/standard_cal_data/code.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/standard_cal_data/code.utf8 -------------------------------------------------------------------------------- /exllamav3/conversion/standard_cal_data/multilingual.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/standard_cal_data/multilingual.utf8 -------------------------------------------------------------------------------- /exllamav3/conversion/standard_cal_data/technical.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/standard_cal_data/technical.utf8 -------------------------------------------------------------------------------- /exllamav3/conversion/standard_cal_data/tiny.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/standard_cal_data/tiny.utf8 -------------------------------------------------------------------------------- /exllamav3/conversion/standard_cal_data/wiki.utf8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/conversion/standard_cal_data/wiki.utf8 -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/activation.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/activation.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/activation.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/activation.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/activation_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/activation_kernels.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/add.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/add.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/add.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/add.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/avx2_target.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/avx2_target.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/avx2_target.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/avx2_target.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/bindings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/bindings.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/cache/q_cache.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/cache/q_cache.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/cache/q_cache.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/cache/q_cache.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/cache/q_cache_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/cache/q_cache_kernels.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/causal_conv1d.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/causal_conv1d.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/causal_conv1d.cuh: -------------------------------------------------------------------------------- 1 | void causal_conv1d 2 | ( 3 | 4 | ); -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/compat.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/gdn.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/gdn.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/generator/cache.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/generator/cache.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/generator/cache.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/generator/cache.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/generator/gumbel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/generator/gumbel.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/generator/gumbel.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/generator/gumbel.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/generator/rep_pen.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/generator/rep_pen.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/generator/rep_pen.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/generator/rep_pen.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/generator/sampling_basic.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/generator/sampling_basic.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/generator/sampling_basic.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/generator/sampling_basic.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/generator/strings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/generator/strings.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/generator/strings.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/generator/strings.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/gnd.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/gnd.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/graph.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/graph.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/graph.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/graph.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/hadamard.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/hadamard.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/hadamard.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/hadamard.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/hgemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/hgemm.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/hgemm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/hgemm.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/histogram.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/histogram.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/histogram.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/histogram.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/blocksparse_mlp.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/blocksparse_mlp.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/blocksparse_mlp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/blocksparse_mlp.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/blocksparse_mlp_bc.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/blocksparse_mlp_bc.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/gated_delta_net.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/gated_delta_net.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/gated_delta_net.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/gated_delta_net.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/gated_delta_net_bc.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/gated_delta_net_bc.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/gated_rmsnorm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/gated_rmsnorm.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/gated_rmsnorm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/gated_rmsnorm.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/gated_rmsnorm_bc.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/gated_rmsnorm_bc.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/linear.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/linear.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/linear.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/linear.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/linear_bc.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/linear_bc.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/mlp.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/mlp.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/mlp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/mlp.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/libtorch/mlp_bc.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/libtorch/mlp_bc.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/norm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/norm.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/norm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/norm.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/all_reduce.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/all_reduce.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/all_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/all_reduce.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/all_reduce_cpu.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/all_reduce_cpu.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/all_reduce_cpu_avx2.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/all_reduce_cpu_avx2.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/all_reduce_cpu_avx2.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/all_reduce_cpu_avx2.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/barrier.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/barrier.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/barrier.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/barrier.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/barrier_inner.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/barrier_inner.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/broadcast.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/broadcast.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/broadcast.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/broadcast.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/context.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/context.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/context.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/context.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/gather.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/gather.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/gather.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/gather.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/ll.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/ll.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/parallel/timeout.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/parallel/timeout.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/ptx.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/ptx.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/codebook.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/codebook.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_1.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_1.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_1.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_1.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_2.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_2.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_2.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_2.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_3.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_3.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_3.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_3.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_4.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_4.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_4.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_4.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_5.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_5.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_5.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_5.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_6.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_6.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_6.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_6.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_7.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_7.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_7.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_7.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_8.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_8.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/comp_units/exl3_comp_unit_8.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_devctx.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_devctx.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_devctx.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_devctx.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_dq.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_dq.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_gemm.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_gemm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_gemm.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_gemm_inner.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_gemm_inner.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_gemm_kernel.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_gemm_kernel.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_gemv.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_gemv.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_gemv.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_gemv.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_gemv_kernel.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_gemv_kernel.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_kernel_map.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_kernel_map.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_kernel_map.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_kernel_map.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/exl3_kernel_map_samples.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/exl3_kernel_map_samples.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/hadamard.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/hadamard.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/hadamard.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/hadamard.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/hadamard_inner.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/hadamard_inner.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/pack.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/pack.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/pack.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/pack.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/quantize.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/quantize.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/quantize.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/quantize.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/reconstruct.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/reconstruct.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/reconstruct.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/reconstruct.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/util.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/util.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/quant/util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/quant/util.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/reduction.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/reduction.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/rope.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/rope.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/rope.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/rope.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/routing.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/routing.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/routing.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/routing.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/softcap.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/softcap.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/softcap.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/softcap.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/stloader.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/stloader.cpp -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/stloader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/stloader.h -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/stloader_cu.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/stloader_cu.cu -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/stloader_cu.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/stloader_cu.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/util.cuh -------------------------------------------------------------------------------- /exllamav3/exllamav3_ext/util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/exllamav3_ext/util.h -------------------------------------------------------------------------------- /exllamav3/ext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/ext.py -------------------------------------------------------------------------------- /exllamav3/generator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/__init__.py -------------------------------------------------------------------------------- /exllamav3/generator/async_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/async_generator.py -------------------------------------------------------------------------------- /exllamav3/generator/filter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/filter/__init__.py -------------------------------------------------------------------------------- /exllamav3/generator/filter/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/filter/filter.py -------------------------------------------------------------------------------- /exllamav3/generator/filter/formatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/filter/formatron.py -------------------------------------------------------------------------------- /exllamav3/generator/generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/generator.py -------------------------------------------------------------------------------- /exllamav3/generator/job.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/job.py -------------------------------------------------------------------------------- /exllamav3/generator/pagetable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/pagetable.py -------------------------------------------------------------------------------- /exllamav3/generator/sampler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/sampler/__init__.py -------------------------------------------------------------------------------- /exllamav3/generator/sampler/custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/sampler/custom.py -------------------------------------------------------------------------------- /exllamav3/generator/sampler/presets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/sampler/presets.py -------------------------------------------------------------------------------- /exllamav3/generator/sampler/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/sampler/sampler.py -------------------------------------------------------------------------------- /exllamav3/generator/visualizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/generator/visualizer.py -------------------------------------------------------------------------------- /exllamav3/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /exllamav3/integration/transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/integration/transformers.py -------------------------------------------------------------------------------- /exllamav3/loader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/loader/__init__.py -------------------------------------------------------------------------------- /exllamav3/loader/safetensors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/loader/safetensors.py -------------------------------------------------------------------------------- /exllamav3/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model/__init__.py -------------------------------------------------------------------------------- /exllamav3/model/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model/config.py -------------------------------------------------------------------------------- /exllamav3/model/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model/model.py -------------------------------------------------------------------------------- /exllamav3/model/model_ls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model/model_ls.py -------------------------------------------------------------------------------- /exllamav3/model/model_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model/model_tp.py -------------------------------------------------------------------------------- /exllamav3/model/model_tp_alloc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model/model_tp_alloc.py -------------------------------------------------------------------------------- /exllamav3/model/model_tp_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model/model_tp_backend.py -------------------------------------------------------------------------------- /exllamav3/model/model_tp_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model/model_tp_cuda.py -------------------------------------------------------------------------------- /exllamav3/model/model_tp_fn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model/model_tp_fn.py -------------------------------------------------------------------------------- /exllamav3/model/model_tp_shared.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model/model_tp_shared.py -------------------------------------------------------------------------------- /exllamav3/model_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/model_init.py -------------------------------------------------------------------------------- /exllamav3/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/__init__.py -------------------------------------------------------------------------------- /exllamav3/modules/attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/attn.py -------------------------------------------------------------------------------- /exllamav3/modules/block_sparse_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/block_sparse_mlp.py -------------------------------------------------------------------------------- /exllamav3/modules/conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/conv.py -------------------------------------------------------------------------------- /exllamav3/modules/deepstack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/deepstack.py -------------------------------------------------------------------------------- /exllamav3/modules/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/embedding.py -------------------------------------------------------------------------------- /exllamav3/modules/gated_delta_net.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/gated_delta_net.py -------------------------------------------------------------------------------- /exllamav3/modules/gated_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/gated_rmsnorm.py -------------------------------------------------------------------------------- /exllamav3/modules/gather.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/gather.py -------------------------------------------------------------------------------- /exllamav3/modules/glm4v_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/glm4v_pos_embedding.py -------------------------------------------------------------------------------- /exllamav3/modules/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/layernorm.py -------------------------------------------------------------------------------- /exllamav3/modules/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/linear.py -------------------------------------------------------------------------------- /exllamav3/modules/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/mlp.py -------------------------------------------------------------------------------- /exllamav3/modules/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/module.py -------------------------------------------------------------------------------- /exllamav3/modules/multilinear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/multilinear.py -------------------------------------------------------------------------------- /exllamav3/modules/pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/pos_embedding.py -------------------------------------------------------------------------------- /exllamav3/modules/quant/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/quant/__init__.py -------------------------------------------------------------------------------- /exllamav3/modules/quant/exl3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/quant/exl3.py -------------------------------------------------------------------------------- /exllamav3/modules/quant/exl3_lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/quant/exl3_lib/__init__.py -------------------------------------------------------------------------------- /exllamav3/modules/quant/exl3_lib/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/quant/exl3_lib/quantize.py -------------------------------------------------------------------------------- /exllamav3/modules/quant/fp16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/quant/fp16.py -------------------------------------------------------------------------------- /exllamav3/modules/qwen3_vl_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/qwen3_vl_pos_embedding.py -------------------------------------------------------------------------------- /exllamav3/modules/rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/rmsnorm.py -------------------------------------------------------------------------------- /exllamav3/modules/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/modules/transformer.py -------------------------------------------------------------------------------- /exllamav3/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/tokenizer/__init__.py -------------------------------------------------------------------------------- /exllamav3/tokenizer/mm_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/tokenizer/mm_embedding.py -------------------------------------------------------------------------------- /exllamav3/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /exllamav3/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/__init__.py -------------------------------------------------------------------------------- /exllamav3/util/arch_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/arch_list.py -------------------------------------------------------------------------------- /exllamav3/util/debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/debug.py -------------------------------------------------------------------------------- /exllamav3/util/file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/file.py -------------------------------------------------------------------------------- /exllamav3/util/hadamard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard.py -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_1.txt: -------------------------------------------------------------------------------- 1 | + -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_100.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/hadamard_100.txt -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_116.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/hadamard_116.txt -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_156.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/hadamard_156.txt -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_172.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/hadamard_172.txt -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_188.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/hadamard_188.txt -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_236.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/hadamard_236.txt -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_244.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/hadamard_244.txt -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_428.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/hadamard_428.txt -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_52.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/hadamard_52.txt -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/hadamard_92.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/hadamard_92.txt -------------------------------------------------------------------------------- /exllamav3/util/hadamard_data/primes.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/hadamard_data/primes.txt -------------------------------------------------------------------------------- /exllamav3/util/measures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/measures.py -------------------------------------------------------------------------------- /exllamav3/util/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/memory.py -------------------------------------------------------------------------------- /exllamav3/util/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/misc.py -------------------------------------------------------------------------------- /exllamav3/util/profile_opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/profile_opt.py -------------------------------------------------------------------------------- /exllamav3/util/progress.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/progress.py -------------------------------------------------------------------------------- /exllamav3/util/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/rope.py -------------------------------------------------------------------------------- /exllamav3/util/tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/tensor.py -------------------------------------------------------------------------------- /exllamav3/util/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/exllamav3/util/vision.py -------------------------------------------------------------------------------- /exllamav3/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.16" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/requirements.txt -------------------------------------------------------------------------------- /requirements_eval.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/requirements_eval.txt -------------------------------------------------------------------------------- /requirements_examples.txt: -------------------------------------------------------------------------------- 1 | blessed 2 | prompt_toolkit 3 | pyperclip 4 | requests 5 | pydantic -------------------------------------------------------------------------------- /science/codebook_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/science/codebook_eval.py -------------------------------------------------------------------------------- /science/gumbel_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/science/gumbel_eval.py -------------------------------------------------------------------------------- /science/kv_quant_exp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/science/kv_quant_exp.py -------------------------------------------------------------------------------- /science/qgemm_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/science/qgemm_benchmark.py -------------------------------------------------------------------------------- /science/qgemm_pretune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/science/qgemm_pretune.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/setup.py -------------------------------------------------------------------------------- /tests/generator_stresstest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/tests/generator_stresstest.py -------------------------------------------------------------------------------- /tests/test_cache_rotate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/tests/test_cache_rotate.py -------------------------------------------------------------------------------- /tests/test_ext_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/tests/test_ext_norm.py -------------------------------------------------------------------------------- /tests/test_kv_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/tests/test_kv_quant.py -------------------------------------------------------------------------------- /tests/test_qgemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/tests/test_qgemm.py -------------------------------------------------------------------------------- /tests/test_quant_fn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/tests/test_quant_fn.py -------------------------------------------------------------------------------- /tests/test_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/tests/test_rope.py -------------------------------------------------------------------------------- /tests/test_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/tests/test_sampler.py -------------------------------------------------------------------------------- /tests/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/tests/util.py -------------------------------------------------------------------------------- /util/add_quant_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/util/add_quant_config.py -------------------------------------------------------------------------------- /util/add_safetensors_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/util/add_safetensors_index.py -------------------------------------------------------------------------------- /util/measure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/util/measure.py -------------------------------------------------------------------------------- /util/optimize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/util/optimize.py -------------------------------------------------------------------------------- /util/recompile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/turboderp-org/exllamav3/HEAD/util/recompile.py --------------------------------------------------------------------------------