├── .gitignore
├── README.md
├── example
    ├── blend.py
    ├── blend_musique.py
    ├── blend_samsum.py
    ├── blend_wikimqa.py
    └── utils.py
├── inputs
    ├── 1.json
    ├── 10.json
    ├── 2.json
    ├── 3.json
    ├── 4.json
    ├── 5.json
    ├── 6.json
    ├── 7.json
    ├── 8.json
    ├── 9.json
    ├── musique_s.json
    ├── samsum.json
    └── wikimqa_s.json
├── requirements.txt
└── vllm_blend
    ├── .buildkite
        ├── download-images.sh
        ├── run-amd-test.sh
        ├── run-benchmarks.sh
        ├── run-cpu-test.sh
        ├── run-neuron-test.sh
        ├── test-pipeline.yaml
        └── test-template.j2
    ├── .dockerignore
    ├── .github
        ├── ISSUE_TEMPLATE
        │   ├── 100-documentation.yml
        │   ├── 200-installation.yml
        │   ├── 300-usage.yml
        │   ├── 400-bug report.yml
        │   ├── 500-feature request.yml
        │   ├── 600-new model.yml
        │   ├── 700-performance discussion.yml
        │   ├── 800-misc discussion.yml
        │   └── config.yml
        ├── PULL_REQUEST_TEMPLATE.md
        └── workflows
        │   ├── mypy.yaml
        │   ├── publish.yml
        │   ├── ruff.yml
        │   ├── scripts
        │       ├── build.sh
        │       ├── create_release.js
        │       ├── cuda-install.sh
        │       ├── env.sh
        │       └── pytorch-install.sh
        │   └── yapf.yml
    ├── .gitignore
    ├── .readthedocs.yaml
    ├── .yapfignore
    ├── CMakeLists.txt
    ├── CONTRIBUTING.md
    ├── Dockerfile
    ├── Dockerfile.cpu
    ├── Dockerfile.neuron
    ├── Dockerfile.rocm
    ├── LICENSE
    ├── MANIFEST.in
    ├── README.md
    ├── benchmarks
        ├── README.md
        ├── backend_request_func.py
        ├── benchmark_latency.py
        ├── benchmark_prefix_caching.py
        ├── benchmark_serving.py
        ├── benchmark_throughput.py
        ├── kernels
        │   ├── benchmark_aqlm.py
        │   ├── benchmark_mixtral_moe.py
        │   ├── benchmark_paged_attention.py
        │   └── benchmark_rope.py
        ├── launch_tgi_server.sh
        └── sonnet.txt
    ├── build-docker.sh
    ├── cmake
        ├── cpu_extension.cmake
        ├── hipify.py
        └── utils.cmake
    ├── collect_env.py
    ├── csrc
        ├── activation_kernels.cu
        ├── attention
        │   ├── attention_dtypes.h
        │   ├── attention_generic.cuh
        │   ├── attention_kernels.cu
        │   ├── attention_utils.cuh
        │   ├── dtype_bfloat16.cuh
        │   ├── dtype_float16.cuh
        │   ├── dtype_float32.cuh
        │   └── dtype_fp8.cuh
        ├── cache.h
        ├── cache_kernels.cu
        ├── cpu
        │   ├── activation.cpp
        │   ├── attention.cpp
        │   ├── cache.cpp
        │   ├── cpu_types.hpp
        │   ├── layernorm.cpp
        │   ├── pos_encoding.cpp
        │   └── pybind.cpp
        ├── cuda_compat.h
        ├── cuda_utils.h
        ├── cuda_utils_kernels.cu
        ├── custom_all_reduce.cu
        ├── custom_all_reduce.cuh
        ├── custom_all_reduce_test.cu
        ├── dispatch_utils.h
        ├── layernorm_kernels.cu
        ├── moe
        │   ├── moe_ops.cpp
        │   ├── moe_ops.h
        │   └── topk_softmax_kernels.cu
        ├── moe_align_block_size_kernels.cu
        ├── ops.h
        ├── pos_encoding_kernels.cu
        ├── punica
        │   ├── LICENSE
        │   ├── bgmv
        │   │   ├── bgmv_bf16_bf16_bf16.cu
        │   │   ├── bgmv_bf16_fp32_bf16.cu
        │   │   ├── bgmv_config.h
        │   │   ├── bgmv_fp16_fp16_fp16.cu
        │   │   ├── bgmv_fp16_fp32_fp16.cu
        │   │   ├── bgmv_fp32_bf16_bf16.cu
        │   │   ├── bgmv_fp32_fp16_fp16.cu
        │   │   ├── bgmv_impl.cuh
        │   │   ├── generator.py
        │   │   └── vec_dtypes.cuh
        │   └── punica_ops.cc
        ├── pybind.cpp
        ├── quantization
        │   ├── aqlm
        │   │   └── gemm_kernels.cu
        │   ├── awq
        │   │   ├── dequantize.cuh
        │   │   └── gemm_kernels.cu
        │   ├── fp8
        │   │   ├── amd_detail
        │   │   │   ├── hip_float8.h
        │   │   │   ├── hip_float8_impl.h
        │   │   │   └── quant_utils.cuh
        │   │   └── fp8_cuda_kernels.cu
        │   ├── fp8_e5m2_kvcache
        │   │   └── quant_utils.cuh
        │   ├── gptq
        │   │   ├── compat.cuh
        │   │   ├── matrix_view.cuh
        │   │   ├── q_gemm.cu
        │   │   ├── qdq_2.cuh
        │   │   ├── qdq_3.cuh
        │   │   ├── qdq_4.cuh
        │   │   ├── qdq_8.cuh
        │   │   └── qdq_util.cuh
        │   ├── marlin
        │   │   ├── LICENSE
        │   │   └── marlin_cuda_kernel.cu
        │   └── squeezellm
        │   │   └── quant_cuda_kernel.cu
        └── reduction_utils.cuh
    ├── docs
        ├── Makefile
        ├── README.md
        ├── make.bat
        ├── requirements-docs.txt
        └── source
        │   ├── assets
        │       ├── kernel
        │       │   ├── k_vecs.png
        │       │   ├── key.png
        │       │   ├── logits_vec.png
        │       │   ├── q_vecs.png
        │       │   ├── query.png
        │       │   ├── v_vec.png
        │       │   └── value.png
        │       └── logos
        │       │   ├── vllm-logo-only-light.png
        │       │   ├── vllm-logo-text-dark.png
        │       │   └── vllm-logo-text-light.png
        │   ├── conf.py
        │   ├── dev
        │       ├── engine
        │       │   ├── async_llm_engine.rst
        │       │   ├── engine_index.rst
        │       │   └── llm_engine.rst
        │       ├── kernel
        │       │   └── paged_attention.rst
        │       └── sampling_params.rst
        │   ├── generate_examples.py
        │   ├── getting_started
        │       ├── amd-installation.rst
        │       ├── cpu-installation.rst
        │       ├── examples
        │       │   └── examples_index.template.rst
        │       ├── installation.rst
        │       ├── neuron-installation.rst
        │       └── quickstart.rst
        │   ├── index.rst
        │   ├── models
        │       ├── adding_model.rst
        │       ├── engine_args.rst
        │       ├── lora.rst
        │       └── supported_models.rst
        │   ├── quantization
        │       ├── auto_awq.rst
        │       ├── fp8_e4m3_kvcache.rst
        │       └── fp8_e5m2_kvcache.rst
        │   └── serving
        │       ├── deploying_with_bentoml.rst
        │       ├── deploying_with_docker.rst
        │       ├── deploying_with_kserve.rst
        │       ├── deploying_with_triton.rst
        │       ├── distributed_serving.rst
        │       ├── integrations.rst
        │       ├── metrics.rst
        │       ├── openai_compatible_server.md
        │       ├── run_on_sky.rst
        │       ├── serving_with_langchain.rst
        │       └── usage_stats.md
    ├── examples
        ├── api_client.py
        ├── aqlm_example.py
        ├── fp8
        │   ├── README.md
        │   ├── extract_scales.py
        │   └── quantizer
        │   │   ├── README.md
        │   │   └── quantize.py
        ├── gradio_openai_chatbot_webserver.py
        ├── gradio_webserver.py
        ├── llava_example.py
        ├── llm_engine_example.py
        ├── multilora_inference.py
        ├── offline_inference.py
        ├── offline_inference_distributed.py
        ├── offline_inference_neuron.py
        ├── offline_inference_with_prefix.py
        ├── openai_chat_completion_client.py
        ├── openai_completion_client.py
        ├── production_monitoring
        │   ├── README.md
        │   ├── docker-compose.yaml
        │   └── prometheus.yaml
        ├── template_alpaca.jinja
        ├── template_baichuan.jinja
        ├── template_chatglm.jinja
        ├── template_chatglm2.jinja
        ├── template_chatml.jinja
        ├── template_falcon.jinja
        ├── template_falcon_180b.jinja
        ├── template_inkbot.jinja
        └── tensorize_vllm_model.py
    ├── format.sh
    ├── pyproject.toml
    ├── requirements-build.txt
    ├── requirements-common.txt
    ├── requirements-cpu.txt
    ├── requirements-cuda.txt
    ├── requirements-dev.txt
    ├── requirements-neuron.txt
    ├── requirements-rocm.txt
    ├── rocm_patch
        └── rocm_bf16.patch
    ├── setup.py
    ├── tests
        ├── __init__.py
        ├── async_engine
        │   ├── api_server_async_engine.py
        │   ├── test_api_server.py
        │   ├── test_async_llm_engine.py
        │   ├── test_chat_template.py
        │   └── test_request_tracker.py
        ├── basic_correctness
        │   ├── test_basic_correctness.py
        │   └── test_chunked_prefill.py
        ├── conftest.py
        ├── core
        │   ├── __init__.py
        │   ├── block
        │   │   ├── __init__.py
        │   │   ├── conftest.py
        │   │   ├── e2e
        │   │   │   ├── conftest.py
        │   │   │   └── test_correctness.py
        │   │   ├── test_block_manager_v2.py
        │   │   ├── test_block_table.py
        │   │   ├── test_common.py
        │   │   ├── test_cpu_gpu_block_allocator.py
        │   │   ├── test_naive_block.py
        │   │   └── test_prefix_caching_block.py
        │   ├── test_block_manager.py
        │   ├── test_chunked_prefill_scheduler.py
        │   ├── test_scheduler.py
        │   └── utils.py
        ├── distributed
        │   ├── test_basic_distributed_correctness.py
        │   ├── test_chunked_prefill_distributed.py
        │   ├── test_comm_ops.py
        │   ├── test_custom_all_reduce.py
        │   ├── test_pynccl.py
        │   └── test_pynccl_library.py
        ├── engine
        │   ├── output_processor
        │   │   └── test_multi_step.py
        │   ├── test_computed_prefix_blocks.py
        │   ├── test_detokenization.py
        │   ├── test_skip_tokenizer_init.py
        │   ├── test_stop_reason.py
        │   └── test_stop_strings.py
        ├── entrypoints
        │   ├── test_guided_processors.py
        │   ├── test_llm_generate.py
        │   ├── test_openai_server.py
        │   └── test_server_oot_registration.py
        ├── kernels
        │   ├── allclose_default.py
        │   ├── conftest.py
        │   ├── test_activation.py
        │   ├── test_attention.py
        │   ├── test_cache.py
        │   ├── test_layernorm.py
        │   ├── test_moe.py
        │   ├── test_pos_encoding.py
        │   ├── test_prefix_prefill.py
        │   ├── test_rand.py
        │   └── test_sampler.py
        ├── lora
        │   ├── __init__.py
        │   ├── conftest.py
        │   ├── test_baichuan.py
        │   ├── test_chatglm3.py
        │   ├── test_gemma.py
        │   ├── test_layer_variation.py
        │   ├── test_layers.py
        │   ├── test_llama.py
        │   ├── test_lora.py
        │   ├── test_lora_checkpoints.py
        │   ├── test_lora_manager.py
        │   ├── test_mixtral.py
        │   ├── test_punica.py
        │   ├── test_quant_model.py
        │   ├── test_tokenizer_group.py
        │   ├── test_utils.py
        │   ├── test_worker.py
        │   └── utils.py
        ├── metrics
        │   └── test_metrics.py
        ├── model_executor
        │   └── weight_utils.py
        ├── models
        │   ├── test_aqlm.py
        │   ├── test_big_models.py
        │   ├── test_llava.py
        │   ├── test_marlin.py
        │   ├── test_mistral.py
        │   ├── test_models.py
        │   └── test_oot_registration.py
        ├── prefix_caching
        │   └── test_prefix_caching.py
        ├── prompts
        │   ├── example.txt
        │   └── summary.txt
        ├── quantization
        │   ├── test_autogptq_marlin_configs.py
        │   └── test_fp8.py
        ├── samplers
        │   ├── test_beam_search.py
        │   ├── test_logits_processor.py
        │   ├── test_logprobs.py
        │   ├── test_ranks.py
        │   ├── test_rejection_sampler.py
        │   ├── test_sampler.py
        │   └── test_seeded_generate.py
        ├── spec_decode
        │   ├── __init__.py
        │   ├── e2e
        │   │   ├── __init__.py
        │   │   ├── conftest.py
        │   │   ├── test_compatibility.py
        │   │   └── test_correctness.py
        │   ├── test_batch_expansion.py
        │   ├── test_metrics.py
        │   ├── test_multi_step_worker.py
        │   ├── test_spec_decode_worker.py
        │   ├── test_utils.py
        │   └── utils.py
        ├── tensorizer_loader
        │   ├── __init__.py
        │   ├── tensorize_vllm_model_for_testing.py
        │   └── test_tensorizer.py
        ├── test_cache_block_hashing.py
        ├── test_config.py
        ├── test_logger.py
        ├── test_logits_processor.py
        ├── test_regression.py
        ├── test_sampling_params.py
        ├── test_sequence.py
        ├── tokenization
        │   ├── __init__.py
        │   ├── test_cached_tokenizer.py
        │   ├── test_detokenize.py
        │   └── test_tokenizer_group.py
        └── worker
        │   ├── __init__.py
        │   ├── test_model_runner.py
        │   └── test_swap.py
    └── vllm
        ├── __init__.py
        ├── _custom_ops.py
        ├── attention
            ├── __init__.py
            ├── backends
            │   ├── __init__.py
            │   ├── abstract.py
            │   ├── flash_attn.py
            │   ├── rocm_flash_attn.py
            │   ├── torch_sdpa.py
            │   ├── xformers.py
            │   └── xformers_org.py
            ├── layer.py
            ├── layer_org.py
            ├── ops
            │   ├── __init__.py
            │   ├── paged_attn.py
            │   ├── prefix_prefill.py
            │   └── triton_flash_attention.py
            └── selector.py
        ├── block.py
        ├── config.py
        ├── core
            ├── __init__.py
            ├── block
            │   ├── __init__.py
            │   ├── block_table.py
            │   ├── common.py
            │   ├── cpu_gpu_block_allocator.py
            │   ├── interfaces.py
            │   ├── naive_block.py
            │   └── prefix_caching_block.py
            ├── block_manager_v1.py
            ├── block_manager_v2.py
            ├── evictor.py
            ├── interfaces.py
            ├── policy.py
            └── scheduler.py
        ├── distributed
            ├── __init__.py
            ├── communication_op.py
            ├── device_communicators
            │   ├── __init__.py
            │   ├── custom_all_reduce.py
            │   ├── pynccl.py
            │   └── pynccl_utils.py
            ├── parallel_state.py
            └── utils.py
        ├── engine
            ├── __init__.py
            ├── arg_utils.py
            ├── async_llm_engine.py
            ├── llm_engine.py
            ├── metrics.py
            ├── output_processor
            │   ├── __init__.py
            │   ├── interfaces.py
            │   ├── multi_step.py
            │   ├── single_step.py
            │   ├── stop_checker.py
            │   └── util.py
            └── ray_utils.py
        ├── entrypoints
            ├── __init__.py
            ├── api_server.py
            ├── llm.py
            └── openai
            │   ├── __init__.py
            │   ├── api_server.py
            │   ├── cli_args.py
            │   ├── protocol.py
            │   ├── serving_chat.py
            │   ├── serving_completion.py
            │   └── serving_engine.py
        ├── executor
            ├── __init__.py
            ├── cpu_executor.py
            ├── executor_base.py
            ├── gpu_executor.py
            ├── neuron_executor.py
            └── ray_gpu_executor.py
        ├── logger.py
        ├── lora
            ├── __init__.py
            ├── layers.py
            ├── lora.py
            ├── models.py
            ├── punica.py
            ├── request.py
            ├── utils.py
            └── worker_manager.py
        ├── model_executor
            ├── __init__.py
            ├── guided_decoding
            │   ├── __init__.py
            │   ├── lm_format_enforcer_decoding.py
            │   ├── outlines_decoding.py
            │   └── outlines_logits_processors.py
            ├── layers
            │   ├── __init__.py
            │   ├── activation.py
            │   ├── fused_moe
            │   │   ├── __init__.py
            │   │   ├── configs
            │   │   │   └── README
            │   │   └── fused_moe.py
            │   ├── layernorm.py
            │   ├── linear.py
            │   ├── logits_processor.py
            │   ├── ops
            │   │   ├── __init__.py
            │   │   ├── rand.py
            │   │   └── sample.py
            │   ├── quantization
            │   │   ├── __init__.py
            │   │   ├── aqlm.py
            │   │   ├── awq.py
            │   │   ├── base_config.py
            │   │   ├── fp8.py
            │   │   ├── gptq.py
            │   │   ├── marlin.py
            │   │   ├── schema.py
            │   │   └── squeezellm.py
            │   ├── rejection_sampler.py
            │   ├── rotary_embedding.py
            │   ├── sampler.py
            │   └── vocab_parallel_embedding.py
            ├── model_loader
            │   ├── __init__.py
            │   ├── loader.py
            │   ├── neuron.py
            │   ├── tensorizer.py
            │   ├── utils.py
            │   └── weight_utils.py
            ├── models
            │   ├── __init__.py
            │   ├── baichuan.py
            │   ├── bloom.py
            │   ├── chatglm.py
            │   ├── commandr.py
            │   ├── dbrx.py
            │   ├── decilm.py
            │   ├── deepseek.py
            │   ├── falcon.py
            │   ├── gemma.py
            │   ├── gpt2.py
            │   ├── gpt_bigcode.py
            │   ├── gpt_j.py
            │   ├── gpt_neox.py
            │   ├── internlm2.py
            │   ├── jais.py
            │   ├── llama.py
            │   ├── llama_org.py
            │   ├── llava.py
            │   ├── minicpm.py
            │   ├── mixtral.py
            │   ├── mixtral_quant.py
            │   ├── mpt.py
            │   ├── olmo.py
            │   ├── opt.py
            │   ├── orion.py
            │   ├── phi.py
            │   ├── qwen.py
            │   ├── qwen2.py
            │   ├── qwen2_moe.py
            │   ├── stablelm.py
            │   ├── starcoder2.py
            │   └── xverse.py
            ├── sampling_metadata.py
            └── utils.py
        ├── outputs.py
        ├── py.typed
        ├── sampling_params.py
        ├── sequence.py
        ├── spec_decode
            ├── __init__.py
            ├── batch_expansion.py
            ├── interfaces.py
            ├── metrics.py
            ├── multi_step_worker.py
            ├── spec_decode_worker.py
            └── util.py
        ├── test_utils.py
        ├── transformers_utils
            ├── __init__.py
            ├── config.py
            ├── configs
            │   ├── __init__.py
            │   ├── chatglm.py
            │   ├── dbrx.py
            │   ├── falcon.py
            │   ├── jais.py
            │   └── mpt.py
            ├── detokenizer.py
            ├── tokenizer.py
            ├── tokenizer_group
            │   ├── __init__.py
            │   ├── base_tokenizer_group.py
            │   ├── ray_tokenizer_group.py
            │   └── tokenizer_group.py
            └── tokenizers
            │   ├── __init__.py
            │   └── baichuan.py
        ├── usage
            ├── __init__.py
            └── usage_lib.py
        ├── utils.py
        └── worker
            ├── __init__.py
            ├── cache_engine.py
            ├── cpu_model_runner.py
            ├── cpu_worker.py
            ├── model_runner.py
            ├── neuron_model_runner.py
            ├── neuron_worker.py
            ├── worker.py
            └── worker_base.py


/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__
2 | outputs/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/README.md


--------------------------------------------------------------------------------
/example/blend.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/example/blend.py


--------------------------------------------------------------------------------
/example/blend_musique.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/example/blend_musique.py


--------------------------------------------------------------------------------
/example/blend_samsum.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/example/blend_samsum.py


--------------------------------------------------------------------------------
/example/blend_wikimqa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/example/blend_wikimqa.py


--------------------------------------------------------------------------------
/example/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/example/utils.py


--------------------------------------------------------------------------------
/inputs/1.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/1.json


--------------------------------------------------------------------------------
/inputs/10.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/10.json


--------------------------------------------------------------------------------
/inputs/2.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/2.json


--------------------------------------------------------------------------------
/inputs/3.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/3.json


--------------------------------------------------------------------------------
/inputs/4.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/4.json


--------------------------------------------------------------------------------
/inputs/5.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/5.json


--------------------------------------------------------------------------------
/inputs/6.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/6.json


--------------------------------------------------------------------------------
/inputs/7.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/7.json


--------------------------------------------------------------------------------
/inputs/8.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/8.json


--------------------------------------------------------------------------------
/inputs/9.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/9.json


--------------------------------------------------------------------------------
/inputs/musique_s.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/musique_s.json


--------------------------------------------------------------------------------
/inputs/samsum.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/samsum.json


--------------------------------------------------------------------------------
/inputs/wikimqa_s.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/inputs/wikimqa_s.json


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | rouge_score


--------------------------------------------------------------------------------
/vllm_blend/.buildkite/download-images.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.buildkite/download-images.sh


--------------------------------------------------------------------------------
/vllm_blend/.buildkite/run-amd-test.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.buildkite/run-amd-test.sh


--------------------------------------------------------------------------------
/vllm_blend/.buildkite/run-benchmarks.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.buildkite/run-benchmarks.sh


--------------------------------------------------------------------------------
/vllm_blend/.buildkite/run-cpu-test.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.buildkite/run-cpu-test.sh


--------------------------------------------------------------------------------
/vllm_blend/.buildkite/run-neuron-test.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.buildkite/run-neuron-test.sh


--------------------------------------------------------------------------------
/vllm_blend/.buildkite/test-pipeline.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.buildkite/test-pipeline.yaml


--------------------------------------------------------------------------------
/vllm_blend/.buildkite/test-template.j2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.buildkite/test-template.j2


--------------------------------------------------------------------------------
/vllm_blend/.dockerignore:
--------------------------------------------------------------------------------
1 | vllm/*.so
2 | 


--------------------------------------------------------------------------------
/vllm_blend/.github/ISSUE_TEMPLATE/100-documentation.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/ISSUE_TEMPLATE/100-documentation.yml


--------------------------------------------------------------------------------
/vllm_blend/.github/ISSUE_TEMPLATE/200-installation.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/ISSUE_TEMPLATE/200-installation.yml


--------------------------------------------------------------------------------
/vllm_blend/.github/ISSUE_TEMPLATE/300-usage.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/ISSUE_TEMPLATE/300-usage.yml


--------------------------------------------------------------------------------
/vllm_blend/.github/ISSUE_TEMPLATE/400-bug report.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/ISSUE_TEMPLATE/400-bug report.yml


--------------------------------------------------------------------------------
/vllm_blend/.github/ISSUE_TEMPLATE/500-feature request.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/ISSUE_TEMPLATE/500-feature request.yml


--------------------------------------------------------------------------------
/vllm_blend/.github/ISSUE_TEMPLATE/600-new model.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/ISSUE_TEMPLATE/600-new model.yml


--------------------------------------------------------------------------------
/vllm_blend/.github/ISSUE_TEMPLATE/700-performance discussion.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/ISSUE_TEMPLATE/700-performance discussion.yml


--------------------------------------------------------------------------------
/vllm_blend/.github/ISSUE_TEMPLATE/800-misc discussion.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/ISSUE_TEMPLATE/800-misc discussion.yml


--------------------------------------------------------------------------------
/vllm_blend/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | 


--------------------------------------------------------------------------------
/vllm_blend/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/PULL_REQUEST_TEMPLATE.md


--------------------------------------------------------------------------------
/vllm_blend/.github/workflows/mypy.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/workflows/mypy.yaml


--------------------------------------------------------------------------------
/vllm_blend/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/workflows/publish.yml


--------------------------------------------------------------------------------
/vllm_blend/.github/workflows/ruff.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/workflows/ruff.yml


--------------------------------------------------------------------------------
/vllm_blend/.github/workflows/scripts/build.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/workflows/scripts/build.sh


--------------------------------------------------------------------------------
/vllm_blend/.github/workflows/scripts/create_release.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/workflows/scripts/create_release.js


--------------------------------------------------------------------------------
/vllm_blend/.github/workflows/scripts/cuda-install.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/workflows/scripts/cuda-install.sh


--------------------------------------------------------------------------------
/vllm_blend/.github/workflows/scripts/env.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/workflows/scripts/env.sh


--------------------------------------------------------------------------------
/vllm_blend/.github/workflows/scripts/pytorch-install.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/workflows/scripts/pytorch-install.sh


--------------------------------------------------------------------------------
/vllm_blend/.github/workflows/yapf.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.github/workflows/yapf.yml


--------------------------------------------------------------------------------
/vllm_blend/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.gitignore


--------------------------------------------------------------------------------
/vllm_blend/.readthedocs.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/.readthedocs.yaml


--------------------------------------------------------------------------------
/vllm_blend/.yapfignore:
--------------------------------------------------------------------------------
1 | collect_env.py
2 | 


--------------------------------------------------------------------------------
/vllm_blend/CMakeLists.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/CMakeLists.txt


--------------------------------------------------------------------------------
/vllm_blend/CONTRIBUTING.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/CONTRIBUTING.md


--------------------------------------------------------------------------------
/vllm_blend/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/Dockerfile


--------------------------------------------------------------------------------
/vllm_blend/Dockerfile.cpu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/Dockerfile.cpu


--------------------------------------------------------------------------------
/vllm_blend/Dockerfile.neuron:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/Dockerfile.neuron


--------------------------------------------------------------------------------
/vllm_blend/Dockerfile.rocm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/Dockerfile.rocm


--------------------------------------------------------------------------------
/vllm_blend/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/LICENSE


--------------------------------------------------------------------------------
/vllm_blend/MANIFEST.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/MANIFEST.in


--------------------------------------------------------------------------------
/vllm_blend/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/README.md


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/README.md


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/backend_request_func.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/backend_request_func.py


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/benchmark_latency.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/benchmark_latency.py


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/benchmark_prefix_caching.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/benchmark_prefix_caching.py


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/benchmark_serving.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/benchmark_serving.py


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/benchmark_throughput.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/benchmark_throughput.py


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/kernels/benchmark_aqlm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/kernels/benchmark_aqlm.py


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/kernels/benchmark_mixtral_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/kernels/benchmark_mixtral_moe.py


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/kernels/benchmark_paged_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/kernels/benchmark_paged_attention.py


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/kernels/benchmark_rope.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/kernels/benchmark_rope.py


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/launch_tgi_server.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/launch_tgi_server.sh


--------------------------------------------------------------------------------
/vllm_blend/benchmarks/sonnet.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/benchmarks/sonnet.txt


--------------------------------------------------------------------------------
/vllm_blend/build-docker.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/build-docker.sh


--------------------------------------------------------------------------------
/vllm_blend/cmake/cpu_extension.cmake:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/cmake/cpu_extension.cmake


--------------------------------------------------------------------------------
/vllm_blend/cmake/hipify.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/cmake/hipify.py


--------------------------------------------------------------------------------
/vllm_blend/cmake/utils.cmake:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/cmake/utils.cmake


--------------------------------------------------------------------------------
/vllm_blend/collect_env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/collect_env.py


--------------------------------------------------------------------------------
/vllm_blend/csrc/activation_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/activation_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/attention/attention_dtypes.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/attention/attention_dtypes.h


--------------------------------------------------------------------------------
/vllm_blend/csrc/attention/attention_generic.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/attention/attention_generic.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/attention/attention_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/attention/attention_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/attention/attention_utils.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/attention/attention_utils.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/attention/dtype_bfloat16.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/attention/dtype_bfloat16.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/attention/dtype_float16.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/attention/dtype_float16.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/attention/dtype_float32.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/attention/dtype_float32.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/attention/dtype_fp8.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/attention/dtype_fp8.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/cache.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cache.h


--------------------------------------------------------------------------------
/vllm_blend/csrc/cache_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cache_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/cpu/activation.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cpu/activation.cpp


--------------------------------------------------------------------------------
/vllm_blend/csrc/cpu/attention.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cpu/attention.cpp


--------------------------------------------------------------------------------
/vllm_blend/csrc/cpu/cache.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cpu/cache.cpp


--------------------------------------------------------------------------------
/vllm_blend/csrc/cpu/cpu_types.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cpu/cpu_types.hpp


--------------------------------------------------------------------------------
/vllm_blend/csrc/cpu/layernorm.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cpu/layernorm.cpp


--------------------------------------------------------------------------------
/vllm_blend/csrc/cpu/pos_encoding.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cpu/pos_encoding.cpp


--------------------------------------------------------------------------------
/vllm_blend/csrc/cpu/pybind.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cpu/pybind.cpp


--------------------------------------------------------------------------------
/vllm_blend/csrc/cuda_compat.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cuda_compat.h


--------------------------------------------------------------------------------
/vllm_blend/csrc/cuda_utils.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cuda_utils.h


--------------------------------------------------------------------------------
/vllm_blend/csrc/cuda_utils_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/cuda_utils_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/custom_all_reduce.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/custom_all_reduce.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/custom_all_reduce.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/custom_all_reduce.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/custom_all_reduce_test.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/custom_all_reduce_test.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/dispatch_utils.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/dispatch_utils.h


--------------------------------------------------------------------------------
/vllm_blend/csrc/layernorm_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/layernorm_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/moe/moe_ops.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/moe/moe_ops.cpp


--------------------------------------------------------------------------------
/vllm_blend/csrc/moe/moe_ops.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/moe/moe_ops.h


--------------------------------------------------------------------------------
/vllm_blend/csrc/moe/topk_softmax_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/moe/topk_softmax_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/moe_align_block_size_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/moe_align_block_size_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/ops.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/ops.h


--------------------------------------------------------------------------------
/vllm_blend/csrc/pos_encoding_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/pos_encoding_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/LICENSE


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/bgmv/bgmv_bf16_fp32_bf16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/bgmv/bgmv_bf16_fp32_bf16.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/bgmv/bgmv_config.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/bgmv/bgmv_config.h


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/bgmv/bgmv_fp16_fp16_fp16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/bgmv/bgmv_fp16_fp16_fp16.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/bgmv/bgmv_fp16_fp32_fp16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/bgmv/bgmv_fp16_fp32_fp16.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/bgmv/bgmv_fp32_bf16_bf16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/bgmv/bgmv_fp32_bf16_bf16.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/bgmv/bgmv_fp32_fp16_fp16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/bgmv/bgmv_fp32_fp16_fp16.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/bgmv/bgmv_impl.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/bgmv/bgmv_impl.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/bgmv/generator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/bgmv/generator.py


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/bgmv/vec_dtypes.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/bgmv/vec_dtypes.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/punica/punica_ops.cc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/punica/punica_ops.cc


--------------------------------------------------------------------------------
/vllm_blend/csrc/pybind.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/pybind.cpp


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/aqlm/gemm_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/aqlm/gemm_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/awq/dequantize.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/awq/dequantize.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/awq/gemm_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/awq/gemm_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/fp8/amd_detail/hip_float8.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/fp8/amd_detail/hip_float8.h


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/fp8/amd_detail/hip_float8_impl.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/fp8/amd_detail/hip_float8_impl.h


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/fp8/amd_detail/quant_utils.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/fp8/amd_detail/quant_utils.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/fp8/fp8_cuda_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/fp8/fp8_cuda_kernels.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/fp8_e5m2_kvcache/quant_utils.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/fp8_e5m2_kvcache/quant_utils.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/gptq/compat.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/gptq/compat.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/gptq/matrix_view.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/gptq/matrix_view.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/gptq/q_gemm.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/gptq/q_gemm.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/gptq/qdq_2.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/gptq/qdq_2.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/gptq/qdq_3.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/gptq/qdq_3.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/gptq/qdq_4.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/gptq/qdq_4.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/gptq/qdq_8.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/gptq/qdq_8.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/gptq/qdq_util.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/gptq/qdq_util.cuh


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/marlin/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/marlin/LICENSE


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/marlin/marlin_cuda_kernel.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/marlin/marlin_cuda_kernel.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/quantization/squeezellm/quant_cuda_kernel.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/quantization/squeezellm/quant_cuda_kernel.cu


--------------------------------------------------------------------------------
/vllm_blend/csrc/reduction_utils.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/csrc/reduction_utils.cuh


--------------------------------------------------------------------------------
/vllm_blend/docs/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/Makefile


--------------------------------------------------------------------------------
/vllm_blend/docs/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/README.md


--------------------------------------------------------------------------------
/vllm_blend/docs/make.bat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/make.bat


--------------------------------------------------------------------------------
/vllm_blend/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/requirements-docs.txt


--------------------------------------------------------------------------------
/vllm_blend/docs/source/assets/kernel/k_vecs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/assets/kernel/k_vecs.png


--------------------------------------------------------------------------------
/vllm_blend/docs/source/assets/kernel/key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/assets/kernel/key.png


--------------------------------------------------------------------------------
/vllm_blend/docs/source/assets/kernel/logits_vec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/assets/kernel/logits_vec.png


--------------------------------------------------------------------------------
/vllm_blend/docs/source/assets/kernel/q_vecs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/assets/kernel/q_vecs.png


--------------------------------------------------------------------------------
/vllm_blend/docs/source/assets/kernel/query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/assets/kernel/query.png


--------------------------------------------------------------------------------
/vllm_blend/docs/source/assets/kernel/v_vec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/assets/kernel/v_vec.png


--------------------------------------------------------------------------------
/vllm_blend/docs/source/assets/kernel/value.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/assets/kernel/value.png


--------------------------------------------------------------------------------
/vllm_blend/docs/source/assets/logos/vllm-logo-only-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/assets/logos/vllm-logo-only-light.png


--------------------------------------------------------------------------------
/vllm_blend/docs/source/assets/logos/vllm-logo-text-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/assets/logos/vllm-logo-text-dark.png


--------------------------------------------------------------------------------
/vllm_blend/docs/source/assets/logos/vllm-logo-text-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/assets/logos/vllm-logo-text-light.png


--------------------------------------------------------------------------------
/vllm_blend/docs/source/conf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/conf.py


--------------------------------------------------------------------------------
/vllm_blend/docs/source/dev/engine/async_llm_engine.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/dev/engine/async_llm_engine.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/dev/engine/engine_index.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/dev/engine/engine_index.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/dev/engine/llm_engine.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/dev/engine/llm_engine.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/dev/kernel/paged_attention.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/dev/kernel/paged_attention.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/dev/sampling_params.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/dev/sampling_params.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/generate_examples.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/generate_examples.py


--------------------------------------------------------------------------------
/vllm_blend/docs/source/getting_started/amd-installation.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/getting_started/amd-installation.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/getting_started/cpu-installation.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/getting_started/cpu-installation.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/getting_started/examples/examples_index.template.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/getting_started/examples/examples_index.template.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/getting_started/installation.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/getting_started/installation.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/getting_started/neuron-installation.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/getting_started/neuron-installation.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/getting_started/quickstart.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/getting_started/quickstart.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/index.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/index.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/models/adding_model.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/models/adding_model.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/models/engine_args.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/models/engine_args.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/models/lora.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/models/lora.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/models/supported_models.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/models/supported_models.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/quantization/auto_awq.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/quantization/auto_awq.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/quantization/fp8_e4m3_kvcache.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/quantization/fp8_e4m3_kvcache.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/quantization/fp8_e5m2_kvcache.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/quantization/fp8_e5m2_kvcache.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/deploying_with_bentoml.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/deploying_with_bentoml.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/deploying_with_docker.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/deploying_with_docker.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/deploying_with_kserve.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/deploying_with_kserve.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/deploying_with_triton.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/deploying_with_triton.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/distributed_serving.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/distributed_serving.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/integrations.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/integrations.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/metrics.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/metrics.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/openai_compatible_server.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/openai_compatible_server.md


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/run_on_sky.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/run_on_sky.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/serving_with_langchain.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/serving_with_langchain.rst


--------------------------------------------------------------------------------
/vllm_blend/docs/source/serving/usage_stats.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/docs/source/serving/usage_stats.md


--------------------------------------------------------------------------------
/vllm_blend/examples/api_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/api_client.py


--------------------------------------------------------------------------------
/vllm_blend/examples/aqlm_example.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/aqlm_example.py


--------------------------------------------------------------------------------
/vllm_blend/examples/fp8/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/fp8/README.md


--------------------------------------------------------------------------------
/vllm_blend/examples/fp8/extract_scales.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/fp8/extract_scales.py


--------------------------------------------------------------------------------
/vllm_blend/examples/fp8/quantizer/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/fp8/quantizer/README.md


--------------------------------------------------------------------------------
/vllm_blend/examples/fp8/quantizer/quantize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/fp8/quantizer/quantize.py


--------------------------------------------------------------------------------
/vllm_blend/examples/gradio_openai_chatbot_webserver.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/gradio_openai_chatbot_webserver.py


--------------------------------------------------------------------------------
/vllm_blend/examples/gradio_webserver.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/gradio_webserver.py


--------------------------------------------------------------------------------
/vllm_blend/examples/llava_example.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/llava_example.py


--------------------------------------------------------------------------------
/vllm_blend/examples/llm_engine_example.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/llm_engine_example.py


--------------------------------------------------------------------------------
/vllm_blend/examples/multilora_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/multilora_inference.py


--------------------------------------------------------------------------------
/vllm_blend/examples/offline_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/offline_inference.py


--------------------------------------------------------------------------------
/vllm_blend/examples/offline_inference_distributed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/offline_inference_distributed.py


--------------------------------------------------------------------------------
/vllm_blend/examples/offline_inference_neuron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/offline_inference_neuron.py


--------------------------------------------------------------------------------
/vllm_blend/examples/offline_inference_with_prefix.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/offline_inference_with_prefix.py


--------------------------------------------------------------------------------
/vllm_blend/examples/openai_chat_completion_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/openai_chat_completion_client.py


--------------------------------------------------------------------------------
/vllm_blend/examples/openai_completion_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/openai_completion_client.py


--------------------------------------------------------------------------------
/vllm_blend/examples/production_monitoring/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/production_monitoring/README.md


--------------------------------------------------------------------------------
/vllm_blend/examples/production_monitoring/docker-compose.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/production_monitoring/docker-compose.yaml


--------------------------------------------------------------------------------
/vllm_blend/examples/production_monitoring/prometheus.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/production_monitoring/prometheus.yaml


--------------------------------------------------------------------------------
/vllm_blend/examples/template_alpaca.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/template_alpaca.jinja


--------------------------------------------------------------------------------
/vllm_blend/examples/template_baichuan.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/template_baichuan.jinja


--------------------------------------------------------------------------------
/vllm_blend/examples/template_chatglm.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/template_chatglm.jinja


--------------------------------------------------------------------------------
/vllm_blend/examples/template_chatglm2.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/template_chatglm2.jinja


--------------------------------------------------------------------------------
/vllm_blend/examples/template_chatml.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/template_chatml.jinja


--------------------------------------------------------------------------------
/vllm_blend/examples/template_falcon.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/template_falcon.jinja


--------------------------------------------------------------------------------
/vllm_blend/examples/template_falcon_180b.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/template_falcon_180b.jinja


--------------------------------------------------------------------------------
/vllm_blend/examples/template_inkbot.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/template_inkbot.jinja


--------------------------------------------------------------------------------
/vllm_blend/examples/tensorize_vllm_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/examples/tensorize_vllm_model.py


--------------------------------------------------------------------------------
/vllm_blend/format.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/format.sh


--------------------------------------------------------------------------------
/vllm_blend/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/pyproject.toml


--------------------------------------------------------------------------------
/vllm_blend/requirements-build.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/requirements-build.txt


--------------------------------------------------------------------------------
/vllm_blend/requirements-common.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/requirements-common.txt


--------------------------------------------------------------------------------
/vllm_blend/requirements-cpu.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/requirements-cpu.txt


--------------------------------------------------------------------------------
/vllm_blend/requirements-cuda.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/requirements-cuda.txt


--------------------------------------------------------------------------------
/vllm_blend/requirements-dev.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/requirements-dev.txt


--------------------------------------------------------------------------------
/vllm_blend/requirements-neuron.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/requirements-neuron.txt


--------------------------------------------------------------------------------
/vllm_blend/requirements-rocm.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/requirements-rocm.txt


--------------------------------------------------------------------------------
/vllm_blend/rocm_patch/rocm_bf16.patch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/rocm_patch/rocm_bf16.patch


--------------------------------------------------------------------------------
/vllm_blend/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/setup.py


--------------------------------------------------------------------------------
/vllm_blend/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/tests/async_engine/api_server_async_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/async_engine/api_server_async_engine.py


--------------------------------------------------------------------------------
/vllm_blend/tests/async_engine/test_api_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/async_engine/test_api_server.py


--------------------------------------------------------------------------------
/vllm_blend/tests/async_engine/test_async_llm_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/async_engine/test_async_llm_engine.py


--------------------------------------------------------------------------------
/vllm_blend/tests/async_engine/test_chat_template.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/async_engine/test_chat_template.py


--------------------------------------------------------------------------------
/vllm_blend/tests/async_engine/test_request_tracker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/async_engine/test_request_tracker.py


--------------------------------------------------------------------------------
/vllm_blend/tests/basic_correctness/test_basic_correctness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/basic_correctness/test_basic_correctness.py


--------------------------------------------------------------------------------
/vllm_blend/tests/basic_correctness/test_chunked_prefill.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/basic_correctness/test_chunked_prefill.py


--------------------------------------------------------------------------------
/vllm_blend/tests/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/conftest.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/tests/core/block/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/tests/core/block/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/block/conftest.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/block/e2e/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/block/e2e/conftest.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/block/e2e/test_correctness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/block/e2e/test_correctness.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/block/test_block_manager_v2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/block/test_block_manager_v2.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/block/test_block_table.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/block/test_block_table.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/block/test_common.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/block/test_common.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/block/test_cpu_gpu_block_allocator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/block/test_cpu_gpu_block_allocator.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/block/test_naive_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/block/test_naive_block.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/block/test_prefix_caching_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/block/test_prefix_caching_block.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/test_block_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/test_block_manager.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/test_chunked_prefill_scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/test_chunked_prefill_scheduler.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/test_scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/test_scheduler.py


--------------------------------------------------------------------------------
/vllm_blend/tests/core/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/core/utils.py


--------------------------------------------------------------------------------
/vllm_blend/tests/distributed/test_basic_distributed_correctness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/distributed/test_basic_distributed_correctness.py


--------------------------------------------------------------------------------
/vllm_blend/tests/distributed/test_chunked_prefill_distributed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/distributed/test_chunked_prefill_distributed.py


--------------------------------------------------------------------------------
/vllm_blend/tests/distributed/test_comm_ops.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/distributed/test_comm_ops.py


--------------------------------------------------------------------------------
/vllm_blend/tests/distributed/test_custom_all_reduce.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/distributed/test_custom_all_reduce.py


--------------------------------------------------------------------------------
/vllm_blend/tests/distributed/test_pynccl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/distributed/test_pynccl.py


--------------------------------------------------------------------------------
/vllm_blend/tests/distributed/test_pynccl_library.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/distributed/test_pynccl_library.py


--------------------------------------------------------------------------------
/vllm_blend/tests/engine/output_processor/test_multi_step.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/engine/output_processor/test_multi_step.py


--------------------------------------------------------------------------------
/vllm_blend/tests/engine/test_computed_prefix_blocks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/engine/test_computed_prefix_blocks.py


--------------------------------------------------------------------------------
/vllm_blend/tests/engine/test_detokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/engine/test_detokenization.py


--------------------------------------------------------------------------------
/vllm_blend/tests/engine/test_skip_tokenizer_init.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/engine/test_skip_tokenizer_init.py


--------------------------------------------------------------------------------
/vllm_blend/tests/engine/test_stop_reason.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/engine/test_stop_reason.py


--------------------------------------------------------------------------------
/vllm_blend/tests/engine/test_stop_strings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/engine/test_stop_strings.py


--------------------------------------------------------------------------------
/vllm_blend/tests/entrypoints/test_guided_processors.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/entrypoints/test_guided_processors.py


--------------------------------------------------------------------------------
/vllm_blend/tests/entrypoints/test_llm_generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/entrypoints/test_llm_generate.py


--------------------------------------------------------------------------------
/vllm_blend/tests/entrypoints/test_openai_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/entrypoints/test_openai_server.py


--------------------------------------------------------------------------------
/vllm_blend/tests/entrypoints/test_server_oot_registration.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/entrypoints/test_server_oot_registration.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/allclose_default.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/allclose_default.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/conftest.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/test_activation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/test_activation.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/test_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/test_attention.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/test_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/test_cache.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/test_layernorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/test_layernorm.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/test_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/test_moe.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/test_pos_encoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/test_pos_encoding.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/test_prefix_prefill.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/test_prefix_prefill.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/test_rand.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/test_rand.py


--------------------------------------------------------------------------------
/vllm_blend/tests/kernels/test_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/kernels/test_sampler.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/conftest.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_baichuan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_baichuan.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_chatglm3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_chatglm3.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_gemma.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_gemma.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_layer_variation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_layer_variation.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_layers.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_llama.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_lora.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_lora_checkpoints.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_lora_checkpoints.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_lora_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_lora_manager.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_mixtral.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_mixtral.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_punica.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_punica.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_quant_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_quant_model.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_tokenizer_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_tokenizer_group.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_utils.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/test_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/test_worker.py


--------------------------------------------------------------------------------
/vllm_blend/tests/lora/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/lora/utils.py


--------------------------------------------------------------------------------
/vllm_blend/tests/metrics/test_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/metrics/test_metrics.py


--------------------------------------------------------------------------------
/vllm_blend/tests/model_executor/weight_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/model_executor/weight_utils.py


--------------------------------------------------------------------------------
/vllm_blend/tests/models/test_aqlm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/models/test_aqlm.py


--------------------------------------------------------------------------------
/vllm_blend/tests/models/test_big_models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/models/test_big_models.py


--------------------------------------------------------------------------------
/vllm_blend/tests/models/test_llava.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/models/test_llava.py


--------------------------------------------------------------------------------
/vllm_blend/tests/models/test_marlin.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/models/test_marlin.py


--------------------------------------------------------------------------------
/vllm_blend/tests/models/test_mistral.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/models/test_mistral.py


--------------------------------------------------------------------------------
/vllm_blend/tests/models/test_models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/models/test_models.py


--------------------------------------------------------------------------------
/vllm_blend/tests/models/test_oot_registration.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/models/test_oot_registration.py


--------------------------------------------------------------------------------
/vllm_blend/tests/prefix_caching/test_prefix_caching.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/prefix_caching/test_prefix_caching.py


--------------------------------------------------------------------------------
/vllm_blend/tests/prompts/example.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/prompts/example.txt


--------------------------------------------------------------------------------
/vllm_blend/tests/prompts/summary.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/prompts/summary.txt


--------------------------------------------------------------------------------
/vllm_blend/tests/quantization/test_autogptq_marlin_configs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/quantization/test_autogptq_marlin_configs.py


--------------------------------------------------------------------------------
/vllm_blend/tests/quantization/test_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/quantization/test_fp8.py


--------------------------------------------------------------------------------
/vllm_blend/tests/samplers/test_beam_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/samplers/test_beam_search.py


--------------------------------------------------------------------------------
/vllm_blend/tests/samplers/test_logits_processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/samplers/test_logits_processor.py


--------------------------------------------------------------------------------
/vllm_blend/tests/samplers/test_logprobs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/samplers/test_logprobs.py


--------------------------------------------------------------------------------
/vllm_blend/tests/samplers/test_ranks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/samplers/test_ranks.py


--------------------------------------------------------------------------------
/vllm_blend/tests/samplers/test_rejection_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/samplers/test_rejection_sampler.py


--------------------------------------------------------------------------------
/vllm_blend/tests/samplers/test_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/samplers/test_sampler.py


--------------------------------------------------------------------------------
/vllm_blend/tests/samplers/test_seeded_generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/samplers/test_seeded_generate.py


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/e2e/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/e2e/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/spec_decode/e2e/conftest.py


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/e2e/test_compatibility.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/spec_decode/e2e/test_compatibility.py


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/e2e/test_correctness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/spec_decode/e2e/test_correctness.py


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/test_batch_expansion.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/spec_decode/test_batch_expansion.py


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/test_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/spec_decode/test_metrics.py


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/test_multi_step_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/spec_decode/test_multi_step_worker.py


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/test_spec_decode_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/spec_decode/test_spec_decode_worker.py


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/spec_decode/test_utils.py


--------------------------------------------------------------------------------
/vllm_blend/tests/spec_decode/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/spec_decode/utils.py


--------------------------------------------------------------------------------
/vllm_blend/tests/tensorizer_loader/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/tests/tensorizer_loader/tensorize_vllm_model_for_testing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/tensorizer_loader/tensorize_vllm_model_for_testing.py


--------------------------------------------------------------------------------
/vllm_blend/tests/tensorizer_loader/test_tensorizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/tensorizer_loader/test_tensorizer.py


--------------------------------------------------------------------------------
/vllm_blend/tests/test_cache_block_hashing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/test_cache_block_hashing.py


--------------------------------------------------------------------------------
/vllm_blend/tests/test_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/test_config.py


--------------------------------------------------------------------------------
/vllm_blend/tests/test_logger.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/test_logger.py


--------------------------------------------------------------------------------
/vllm_blend/tests/test_logits_processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/test_logits_processor.py


--------------------------------------------------------------------------------
/vllm_blend/tests/test_regression.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/test_regression.py


--------------------------------------------------------------------------------
/vllm_blend/tests/test_sampling_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/test_sampling_params.py


--------------------------------------------------------------------------------
/vllm_blend/tests/test_sequence.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/test_sequence.py


--------------------------------------------------------------------------------
/vllm_blend/tests/tokenization/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/tests/tokenization/test_cached_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/tokenization/test_cached_tokenizer.py


--------------------------------------------------------------------------------
/vllm_blend/tests/tokenization/test_detokenize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/tokenization/test_detokenize.py


--------------------------------------------------------------------------------
/vllm_blend/tests/tokenization/test_tokenizer_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/tokenization/test_tokenizer_group.py


--------------------------------------------------------------------------------
/vllm_blend/tests/worker/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/tests/worker/test_model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/worker/test_model_runner.py


--------------------------------------------------------------------------------
/vllm_blend/tests/worker/test_swap.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/tests/worker/test_swap.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/_custom_ops.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/_custom_ops.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/backends/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/backends/abstract.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/backends/abstract.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/backends/flash_attn.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/backends/flash_attn.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/backends/rocm_flash_attn.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/backends/rocm_flash_attn.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/backends/torch_sdpa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/backends/torch_sdpa.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/backends/xformers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/backends/xformers.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/backends/xformers_org.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/backends/xformers_org.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/layer.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/layer_org.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/layer_org.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/ops/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/ops/paged_attn.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/ops/paged_attn.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/ops/prefix_prefill.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/ops/prefix_prefill.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/ops/triton_flash_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/ops/triton_flash_attention.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/attention/selector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/attention/selector.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/block.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/config.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/block/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/block/block_table.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/block/block_table.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/block/common.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/block/common.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/block/cpu_gpu_block_allocator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/block/cpu_gpu_block_allocator.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/block/interfaces.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/block/interfaces.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/block/naive_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/block/naive_block.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/block/prefix_caching_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/block/prefix_caching_block.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/block_manager_v1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/block_manager_v1.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/block_manager_v2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/block_manager_v2.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/evictor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/evictor.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/interfaces.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/interfaces.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/policy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/policy.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/core/scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/core/scheduler.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/distributed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/distributed/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/distributed/communication_op.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/distributed/communication_op.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/distributed/device_communicators/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/distributed/device_communicators/custom_all_reduce.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/distributed/device_communicators/custom_all_reduce.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/distributed/device_communicators/pynccl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/distributed/device_communicators/pynccl.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/distributed/device_communicators/pynccl_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/distributed/device_communicators/pynccl_utils.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/distributed/parallel_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/distributed/parallel_state.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/distributed/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/distributed/utils.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/arg_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/engine/arg_utils.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/async_llm_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/engine/async_llm_engine.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/llm_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/engine/llm_engine.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/engine/metrics.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/output_processor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/output_processor/interfaces.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/engine/output_processor/interfaces.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/output_processor/multi_step.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/engine/output_processor/multi_step.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/output_processor/single_step.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/engine/output_processor/single_step.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/output_processor/stop_checker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/engine/output_processor/stop_checker.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/output_processor/util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/engine/output_processor/util.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/engine/ray_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/engine/ray_utils.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/entrypoints/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/entrypoints/api_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/entrypoints/api_server.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/entrypoints/llm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/entrypoints/llm.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/entrypoints/openai/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/entrypoints/openai/api_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/entrypoints/openai/api_server.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/entrypoints/openai/cli_args.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/entrypoints/openai/cli_args.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/entrypoints/openai/protocol.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/entrypoints/openai/protocol.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/entrypoints/openai/serving_chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/entrypoints/openai/serving_chat.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/entrypoints/openai/serving_completion.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/entrypoints/openai/serving_completion.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/entrypoints/openai/serving_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/entrypoints/openai/serving_engine.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/executor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/executor/cpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/executor/cpu_executor.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/executor/executor_base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/executor/executor_base.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/executor/gpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/executor/gpu_executor.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/executor/neuron_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/executor/neuron_executor.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/executor/ray_gpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/executor/ray_gpu_executor.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/logger.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/logger.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/lora/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/lora/layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/lora/layers.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/lora/lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/lora/lora.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/lora/models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/lora/models.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/lora/punica.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/lora/punica.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/lora/request.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/lora/request.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/lora/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/lora/utils.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/lora/worker_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/lora/worker_manager.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/guided_decoding/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/guided_decoding/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/guided_decoding/outlines_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/guided_decoding/outlines_decoding.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/guided_decoding/outlines_logits_processors.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/guided_decoding/outlines_logits_processors.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/activation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/activation.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/fused_moe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/fused_moe/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/fused_moe/configs/README:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/fused_moe/configs/README


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/fused_moe/fused_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/fused_moe/fused_moe.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/layernorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/layernorm.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/linear.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/linear.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/logits_processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/logits_processor.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/ops/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/ops/rand.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/ops/rand.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/ops/sample.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/ops/sample.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/quantization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/quantization/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/quantization/aqlm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/quantization/aqlm.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/quantization/awq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/quantization/awq.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/quantization/base_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/quantization/base_config.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/quantization/fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/quantization/fp8.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/quantization/gptq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/quantization/gptq.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/quantization/marlin.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/quantization/marlin.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/quantization/schema.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/quantization/schema.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/quantization/squeezellm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/quantization/squeezellm.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/rejection_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/rejection_sampler.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/rotary_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/rotary_embedding.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/sampler.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/layers/vocab_parallel_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/layers/vocab_parallel_embedding.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/model_loader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/model_loader/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/model_loader/loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/model_loader/loader.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/model_loader/neuron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/model_loader/neuron.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/model_loader/tensorizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/model_loader/tensorizer.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/model_loader/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/model_loader/utils.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/model_loader/weight_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/model_loader/weight_utils.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/baichuan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/baichuan.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/bloom.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/bloom.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/chatglm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/chatglm.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/commandr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/commandr.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/dbrx.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/dbrx.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/decilm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/decilm.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/deepseek.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/deepseek.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/falcon.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/falcon.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/gemma.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/gemma.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/gpt2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/gpt2.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/gpt_bigcode.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/gpt_bigcode.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/gpt_j.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/gpt_j.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/gpt_neox.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/gpt_neox.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/internlm2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/internlm2.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/jais.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/jais.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/llama.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/llama_org.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/llama_org.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/llava.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/llava.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/minicpm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/minicpm.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/mixtral.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/mixtral.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/mixtral_quant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/mixtral_quant.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/mpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/mpt.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/olmo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/olmo.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/opt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/opt.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/orion.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/orion.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/phi.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/phi.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/qwen.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/qwen.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/qwen2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/qwen2.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/qwen2_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/qwen2_moe.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/stablelm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/stablelm.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/starcoder2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/starcoder2.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/models/xverse.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/models/xverse.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/sampling_metadata.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/sampling_metadata.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/model_executor/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/model_executor/utils.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/outputs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/outputs.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/py.typed:
--------------------------------------------------------------------------------
1 | # Marker file for PEP 561.
2 | # The vllm package uses inline types.
3 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/sampling_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/sampling_params.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/sequence.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/sequence.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/spec_decode/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/spec_decode/batch_expansion.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/spec_decode/batch_expansion.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/spec_decode/interfaces.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/spec_decode/interfaces.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/spec_decode/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/spec_decode/metrics.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/spec_decode/multi_step_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/spec_decode/multi_step_worker.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/spec_decode/spec_decode_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/spec_decode/spec_decode_worker.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/spec_decode/util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/spec_decode/util.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/test_utils.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/config.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/configs/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/configs/chatglm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/configs/chatglm.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/configs/dbrx.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/configs/dbrx.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/configs/falcon.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/configs/falcon.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/configs/jais.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/configs/jais.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/configs/mpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/configs/mpt.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/detokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/detokenizer.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/tokenizer.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/tokenizer_group/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/tokenizer_group/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/tokenizer_group/tokenizer_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/tokenizer_group/tokenizer_group.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/tokenizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/tokenizers/__init__.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/transformers_utils/tokenizers/baichuan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/transformers_utils/tokenizers/baichuan.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/usage/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/usage/usage_lib.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/usage/usage_lib.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/utils.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/worker/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vllm_blend/vllm/worker/cache_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/worker/cache_engine.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/worker/cpu_model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/worker/cpu_model_runner.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/worker/cpu_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/worker/cpu_worker.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/worker/model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/worker/model_runner.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/worker/neuron_model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/worker/neuron_model_runner.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/worker/neuron_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/worker/neuron_worker.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/worker/worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/worker/worker.py


--------------------------------------------------------------------------------
/vllm_blend/vllm/worker/worker_base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YaoJiayi/CacheBlend/HEAD/vllm_blend/vllm/worker/worker_base.py


--------------------------------------------------------------------------------