├── .buildkite ├── check-wheel-size.py ├── download-images.sh ├── run-amd-test.sh ├── run-benchmarks.sh ├── run-cpu-test.sh ├── run-neuron-test.sh ├── test-pipeline.yaml └── test-template.j2 ├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── 100-documentation.yml │ ├── 200-installation.yml │ ├── 300-usage.yml │ ├── 400-bug report.yml │ ├── 500-feature request.yml │ ├── 600-new model.yml │ ├── 700-performance discussion.yml │ ├── 750-RFC.yml │ ├── 800-misc discussion.yml │ └── config.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── mypy.yaml │ ├── publish.yml │ ├── ruff.yml │ ├── scripts │ ├── build.sh │ ├── create_release.js │ ├── cuda-install.sh │ ├── env.sh │ └── pytorch-install.sh │ └── yapf.yml ├── .gitignore ├── .readthedocs.yaml ├── .yapfignore ├── CMakeLists.txt ├── CONTRIBUTING.md ├── Dockerfile ├── Dockerfile.cpu ├── Dockerfile.neuron ├── Dockerfile.rocm ├── LICENSE ├── MANIFEST.in ├── README.md ├── README_vllm_musa.md ├── benchmarks ├── README.md ├── backend_request_func.py ├── benchmark_latency.py ├── benchmark_prefix_caching.py ├── benchmark_serving.py ├── benchmark_throughput.py ├── kernels │ ├── benchmark_aqlm.py │ ├── benchmark_mixtral_moe.py │ ├── benchmark_paged_attention.py │ └── benchmark_rope.py ├── launch_tgi_server.sh └── sonnet.txt ├── cmake ├── cpu_extension.cmake ├── hipify.py └── utils.cmake ├── collect_env.py ├── csrc_musa ├── activation_kernels.mu ├── attention │ ├── attention_dtypes.h │ ├── attention_generic.muh │ ├── attention_kernels.mu │ ├── attention_utils.muh │ ├── dtype_bfloat16.muh │ ├── dtype_float16.muh │ ├── dtype_float32.muh │ └── dtype_fp8.muh ├── cache.h ├── cache_kernels.mu ├── cpu │ ├── activation.cpp │ ├── attention.cpp │ ├── cache.cpp │ ├── cpu_types.hpp │ ├── layernorm.cpp │ ├── pos_encoding.cpp │ └── pybind.cpp ├── custom_all_reduce.mu ├── custom_all_reduce.muh ├── custom_all_reduce_test.mu ├── dispatch_utils.h ├── layernorm_kernels.mu ├── moe │ ├── moe_ops.cpp │ ├── moe_ops.h │ └── topk_softmax_kernels.mu ├── moe_align_block_size_kernels.mu ├── musa_compat.h ├── musa_utils.h ├── musa_utils_kernels.mu ├── ops.h ├── pos_encoding_kernels.mu ├── punica │ ├── .LICENSE │ ├── bgmv │ │ ├── bgmv_bf16_bf16_bf16.mu │ │ ├── bgmv_bf16_fp32_bf16.mu │ │ ├── bgmv_config.h │ │ ├── bgmv_fp16_fp16_fp16.mu │ │ ├── bgmv_fp16_fp32_fp16.mu │ │ ├── bgmv_fp32_bf16_bf16.mu │ │ ├── bgmv_fp32_fp16_fp16.mu │ │ ├── bgmv_impl.muh │ │ ├── generator.py │ │ └── vec_dtypes.muh │ └── punica_ops.cc ├── pybind.cpp ├── quantization │ ├── aqlm │ │ └── gemm_kernels.mu │ ├── awq │ │ ├── dequantize.muh │ │ └── gemm_kernels.mu │ ├── fp8 │ │ ├── amd_detail │ │ │ ├── hip_float8.h │ │ │ ├── hip_float8_impl.h │ │ │ └── quant_utils.muh │ │ └── fp8_cuda_kernels.mu │ ├── fp8_e5m2_kvcache │ │ └── quant_utils.muh │ ├── gptq │ │ ├── compat.muh │ │ ├── matrix_view.muh │ │ ├── q_gemm.mu │ │ ├── qdq_2.muh │ │ ├── qdq_3.muh │ │ ├── qdq_4.muh │ │ ├── qdq_8.muh │ │ └── qdq_util.muh │ ├── gptq_marlin │ │ ├── gptq_marlin.mu │ │ ├── gptq_marlin.muh │ │ └── gptq_marlin_repack.mu │ ├── marlin │ │ ├── .LICENSE │ │ └── marlin_cuda_kernel.mu │ └── squeezellm │ │ └── quant_cuda_kernel.mu └── reduction_utils.muh ├── docs ├── Makefile ├── README.md ├── make.bat ├── requirements-docs.txt └── source │ ├── assets │ ├── dev │ │ └── dockerfile-stages-dependency.png │ ├── kernel │ │ ├── k_vecs.png │ │ ├── key.png │ │ ├── logits_vec.png │ │ ├── q_vecs.png │ │ ├── query.png │ │ ├── v_vec.png │ │ └── value.png │ └── logos │ │ ├── vllm-logo-only-light.png │ │ ├── vllm-logo-text-dark.png │ │ └── vllm-logo-text-light.png │ ├── conf.py │ ├── dev │ ├── dockerfile │ │ └── dockerfile.rst │ ├── engine │ │ ├── async_llm_engine.rst │ │ ├── engine_index.rst │ │ └── llm_engine.rst │ ├── kernel │ │ └── paged_attention.rst │ └── sampling_params.rst │ ├── generate_examples.py │ ├── getting_started │ ├── amd-installation.rst │ ├── cpu-installation.rst │ ├── examples │ │ └── examples_index.template.rst │ ├── installation.rst │ ├── neuron-installation.rst │ └── quickstart.rst │ ├── index.rst │ ├── models │ ├── adding_model.rst │ ├── engine_args.rst │ ├── lora.rst │ ├── performance.rst │ └── supported_models.rst │ ├── quantization │ ├── auto_awq.rst │ ├── fp8_e4m3_kvcache.rst │ └── fp8_e5m2_kvcache.rst │ └── serving │ ├── deploying_with_bentoml.rst │ ├── deploying_with_docker.rst │ ├── deploying_with_kserve.rst │ ├── deploying_with_triton.rst │ ├── distributed_serving.rst │ ├── env_vars.rst │ ├── integrations.rst │ ├── metrics.rst │ ├── openai_compatible_server.md │ ├── run_on_sky.rst │ ├── serving_with_langchain.rst │ └── usage_stats.md ├── examples ├── api_client.py ├── aqlm_example.py ├── fp8 │ ├── README.md │ ├── extract_scales.py │ └── quantizer │ │ ├── README.md │ │ └── quantize.py ├── gradio_openai_chatbot_webserver.py ├── gradio_webserver.py ├── llava_example.py ├── llm_engine_example.py ├── logging_configuration.md ├── multilora_inference.py ├── offline_inference.py ├── offline_inference_distributed.py ├── offline_inference_neuron.py ├── offline_inference_with_prefix.py ├── openai_chat_completion_client.py ├── openai_completion_client.py ├── production_monitoring │ ├── README.md │ ├── docker-compose.yaml │ ├── grafana.json │ └── prometheus.yaml ├── template_alpaca.jinja ├── template_baichuan.jinja ├── template_chatglm.jinja ├── template_chatglm2.jinja ├── template_chatml.jinja ├── template_falcon.jinja ├── template_falcon_180b.jinja ├── template_inkbot.jinja └── tensorize_vllm_model.py ├── format.sh ├── musa_porting.py ├── pyproject.toml ├── requirements-build.txt ├── requirements-common.txt ├── requirements-cpu.txt ├── requirements-cuda.txt ├── requirements-dev.txt ├── requirements-musa.txt ├── requirements-neuron.txt ├── requirements-rocm.txt ├── rocm_patch └── rocm_bf16.patch ├── setup.py ├── tests ├── __init__.py ├── async_engine │ ├── api_server_async_engine.py │ ├── test_api_server.py │ ├── test_async_llm_engine.py │ ├── test_chat_template.py │ ├── test_merge_async_iterators.py │ ├── test_openapi_server_ray.py │ └── test_request_tracker.py ├── basic_correctness │ ├── test_basic_correctness.py │ ├── test_chunked_prefill.py │ └── test_preemption.py ├── conftest.py ├── core │ ├── __init__.py │ ├── block │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── e2e │ │ │ ├── conftest.py │ │ │ └── test_correctness.py │ │ ├── test_block_manager_v2.py │ │ ├── test_block_table.py │ │ ├── test_common.py │ │ ├── test_cpu_gpu_block_allocator.py │ │ ├── test_naive_block.py │ │ └── test_prefix_caching_block.py │ ├── test_block_manager.py │ ├── test_chunked_prefill_scheduler.py │ ├── test_scheduler.py │ └── utils.py ├── distributed │ ├── test_basic_distributed_correctness.py │ ├── test_chunked_prefill_distributed.py │ ├── test_comm_ops.py │ ├── test_custom_all_reduce.py │ ├── test_pynccl.py │ └── test_pynccl_library.py ├── engine │ ├── output_processor │ │ └── test_multi_step.py │ ├── test_computed_prefix_blocks.py │ ├── test_detokenization.py │ ├── test_multiproc_workers.py │ ├── test_skip_tokenizer_init.py │ ├── test_stop_reason.py │ └── test_stop_strings.py ├── entrypoints │ ├── openai │ │ └── test_serving_chat.py │ ├── test_guided_processors.py │ ├── test_llm_generate.py │ ├── test_openai_server.py │ └── test_server_oot_registration.py ├── fp8_kv │ ├── llama2-70b-fp8-kv │ │ └── kv_cache_scales.json │ └── llama2-7b-fp8-kv │ │ └── kv_cache_scales.json ├── kernels │ ├── allclose_default.py │ ├── conftest.py │ ├── test_activation.py │ ├── test_attention.py │ ├── test_cache.py │ ├── test_layernorm.py │ ├── test_moe.py │ ├── test_pos_encoding.py │ ├── test_prefix_prefill.py │ ├── test_rand.py │ └── test_sampler.py ├── lora │ ├── __init__.py │ ├── conftest.py │ ├── test_baichuan.py │ ├── test_chatglm3.py │ ├── test_gemma.py │ ├── test_layer_variation.py │ ├── test_layers.py │ ├── test_llama.py │ ├── test_lora.py │ ├── test_lora_checkpoints.py │ ├── test_lora_manager.py │ ├── test_mixtral.py │ ├── test_punica.py │ ├── test_quant_model.py │ ├── test_tokenizer_group.py │ ├── test_utils.py │ ├── test_worker.py │ └── utils.py ├── metrics │ └── test_metrics.py ├── model_executor │ └── weight_utils.py ├── models │ ├── test_aqlm.py │ ├── test_big_models.py │ ├── test_fp8.py │ ├── test_gptq_marlin.py │ ├── test_llava.py │ ├── test_marlin.py │ ├── test_mistral.py │ ├── test_models.py │ ├── test_oot_registration.py │ └── utils.py ├── prefix_caching │ └── test_prefix_caching.py ├── prompts │ ├── example.txt │ └── summary.txt ├── quantization │ ├── test_configs.py │ └── test_fp8.py ├── samplers │ ├── test_beam_search.py │ ├── test_ignore_eos.py │ ├── test_logits_processor.py │ ├── test_logprobs.py │ ├── test_ranks.py │ ├── test_rejection_sampler.py │ ├── test_sampler.py │ └── test_seeded_generate.py ├── spec_decode │ ├── __init__.py │ ├── e2e │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_compatibility.py │ │ ├── test_logprobs.py │ │ ├── test_multistep_correctness.py │ │ └── test_ngram_correctness.py │ ├── test_batch_expansion.py │ ├── test_metrics.py │ ├── test_multi_step_worker.py │ ├── test_ngram_worker.py │ ├── test_spec_decode_worker.py │ ├── test_utils.py │ └── utils.py ├── tensorizer_loader │ ├── __init__.py │ ├── tensorize_vllm_model_for_testing.py │ └── test_tensorizer.py ├── test_cache_block_hashing.py ├── test_config.py ├── test_logger.py ├── test_logits_processor.py ├── test_regression.py ├── test_sampling_params.py ├── test_sequence.py ├── tokenization │ ├── __init__.py │ ├── test_cached_tokenizer.py │ ├── test_detokenize.py │ ├── test_tokenizer.py │ └── test_tokenizer_group.py └── worker │ ├── __init__.py │ ├── test_model_runner.py │ └── test_swap.py └── vllm ├── __init__.py ├── _custom_ops.py ├── attention ├── __init__.py ├── backends │ ├── __init__.py │ ├── abstract.py │ ├── flash_attn.py │ ├── flashinfer.py │ ├── rocm_flash_attn.py │ ├── torch_sdpa.py │ └── xformers.py ├── layer.py ├── ops │ ├── __init__.py │ ├── paged_attn.py │ ├── prefix_prefill.py │ └── triton_flash_attention.py └── selector.py ├── block.py ├── config.py ├── core ├── __init__.py ├── block │ ├── __init__.py │ ├── block_table.py │ ├── common.py │ ├── cpu_gpu_block_allocator.py │ ├── interfaces.py │ ├── naive_block.py │ └── prefix_caching_block.py ├── block_manager_v1.py ├── block_manager_v2.py ├── evictor_v1.py ├── evictor_v2.py ├── interfaces.py ├── policy.py └── scheduler.py ├── distributed ├── __init__.py ├── communication_op.py ├── device_communicators │ ├── __init__.py │ ├── custom_all_reduce.py │ ├── pymccl.py │ ├── pymccl_utils.py │ └── pynccl.py ├── parallel_state.py └── utils.py ├── engine ├── __init__.py ├── arg_utils.py ├── async_llm_engine.py ├── llm_engine.py ├── metrics.py └── output_processor │ ├── __init__.py │ ├── interfaces.py │ ├── multi_step.py │ ├── single_step.py │ ├── stop_checker.py │ └── util.py ├── entrypoints ├── __init__.py ├── api_server.py ├── llm.py └── openai │ ├── __init__.py │ ├── api_server.py │ ├── cli_args.py │ ├── protocol.py │ ├── serving_chat.py │ ├── serving_completion.py │ └── serving_engine.py ├── envs.py ├── executor ├── __init__.py ├── cpu_executor.py ├── distributed_gpu_executor.py ├── executor_base.py ├── gpu_executor.py ├── multiproc_worker_utils.py ├── neuron_executor.py ├── ray_gpu_executor.py └── ray_utils.py ├── logger.py ├── logging ├── __init__.py └── formatter.py ├── lora ├── __init__.py ├── fully_sharded_layers.py ├── layers.py ├── lora.py ├── models.py ├── punica.py ├── request.py ├── utils.py └── worker_manager.py ├── model_executor ├── __init__.py ├── guided_decoding │ ├── __init__.py │ ├── lm_format_enforcer_decoding.py │ ├── outlines_decoding.py │ └── outlines_logits_processors.py ├── layers │ ├── __init__.py │ ├── activation.py │ ├── fused_moe │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ └── README │ │ └── fused_moe.py │ ├── layernorm.py │ ├── linear.py │ ├── logits_processor.py │ ├── ops │ │ ├── __init__.py │ │ ├── rand.py │ │ └── sample.py │ ├── quantization │ │ ├── __init__.py │ │ ├── aqlm.py │ │ ├── awq.py │ │ ├── base_config.py │ │ ├── fp8.py │ │ ├── gptq.py │ │ ├── gptq_marlin.py │ │ ├── marlin.py │ │ ├── schema.py │ │ └── squeezellm.py │ ├── rejection_sampler.py │ ├── rotary_embedding.py │ ├── sampler.py │ └── vocab_parallel_embedding.py ├── model_loader │ ├── __init__.py │ ├── loader.py │ ├── neuron.py │ ├── tensorizer.py │ ├── utils.py │ └── weight_utils.py ├── models │ ├── __init__.py │ ├── baichuan.py │ ├── bloom.py │ ├── chatglm.py │ ├── commandr.py │ ├── dbrx.py │ ├── decilm.py │ ├── deepseek.py │ ├── falcon.py │ ├── gemma.py │ ├── gpt2.py │ ├── gpt_bigcode.py │ ├── gpt_j.py │ ├── gpt_neox.py │ ├── internlm2.py │ ├── jais.py │ ├── llama.py │ ├── llava.py │ ├── minicpm.py │ ├── mixtral.py │ ├── mixtral_quant.py │ ├── mpt.py │ ├── olmo.py │ ├── opt.py │ ├── orion.py │ ├── phi.py │ ├── qwen.py │ ├── qwen2.py │ ├── qwen2_moe.py │ ├── stablelm.py │ ├── starcoder2.py │ └── xverse.py ├── sampling_metadata.py └── utils.py ├── outputs.py ├── py.typed ├── sampling_params.py ├── sequence.py ├── spec_decode ├── __init__.py ├── batch_expansion.py ├── interfaces.py ├── metrics.py ├── multi_step_worker.py ├── ngram_worker.py ├── spec_decode_worker.py ├── top1_proposer.py └── util.py ├── test_utils.py ├── transformers_utils ├── __init__.py ├── config.py ├── configs │ ├── __init__.py │ ├── chatglm.py │ ├── dbrx.py │ ├── falcon.py │ ├── jais.py │ └── mpt.py ├── detokenizer.py ├── tokenizer.py ├── tokenizer_group │ ├── __init__.py │ ├── base_tokenizer_group.py │ ├── ray_tokenizer_group.py │ └── tokenizer_group.py └── tokenizers │ ├── __init__.py │ └── baichuan.py ├── usage ├── __init__.py └── usage_lib.py ├── utils.py └── worker ├── __init__.py ├── cache_engine.py ├── cpu_model_runner.py ├── cpu_worker.py ├── model_runner.py ├── neuron_model_runner.py ├── neuron_worker.py ├── worker.py └── worker_base.py /.buildkite/check-wheel-size.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.buildkite/check-wheel-size.py -------------------------------------------------------------------------------- /.buildkite/download-images.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.buildkite/download-images.sh -------------------------------------------------------------------------------- /.buildkite/run-amd-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.buildkite/run-amd-test.sh -------------------------------------------------------------------------------- /.buildkite/run-benchmarks.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.buildkite/run-benchmarks.sh -------------------------------------------------------------------------------- /.buildkite/run-cpu-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.buildkite/run-cpu-test.sh -------------------------------------------------------------------------------- /.buildkite/run-neuron-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.buildkite/run-neuron-test.sh -------------------------------------------------------------------------------- /.buildkite/test-pipeline.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.buildkite/test-pipeline.yaml -------------------------------------------------------------------------------- /.buildkite/test-template.j2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.buildkite/test-template.j2 -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | vllm/*.so 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/100-documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/ISSUE_TEMPLATE/100-documentation.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/200-installation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/ISSUE_TEMPLATE/200-installation.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/300-usage.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/ISSUE_TEMPLATE/300-usage.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/400-bug report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/ISSUE_TEMPLATE/400-bug report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/500-feature request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/ISSUE_TEMPLATE/500-feature request.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/600-new model.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/ISSUE_TEMPLATE/600-new model.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/700-performance discussion.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/ISSUE_TEMPLATE/700-performance discussion.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/750-RFC.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/ISSUE_TEMPLATE/750-RFC.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/800-misc discussion.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/ISSUE_TEMPLATE/800-misc discussion.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/workflows/mypy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/workflows/mypy.yaml -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/workflows/publish.yml -------------------------------------------------------------------------------- /.github/workflows/ruff.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/workflows/ruff.yml -------------------------------------------------------------------------------- /.github/workflows/scripts/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/workflows/scripts/build.sh -------------------------------------------------------------------------------- /.github/workflows/scripts/create_release.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/workflows/scripts/create_release.js -------------------------------------------------------------------------------- /.github/workflows/scripts/cuda-install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/workflows/scripts/cuda-install.sh -------------------------------------------------------------------------------- /.github/workflows/scripts/env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/workflows/scripts/env.sh -------------------------------------------------------------------------------- /.github/workflows/scripts/pytorch-install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/workflows/scripts/pytorch-install.sh -------------------------------------------------------------------------------- /.github/workflows/yapf.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.github/workflows/yapf.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.gitignore -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /.yapfignore: -------------------------------------------------------------------------------- 1 | collect_env.py 2 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/Dockerfile -------------------------------------------------------------------------------- /Dockerfile.cpu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/Dockerfile.cpu -------------------------------------------------------------------------------- /Dockerfile.neuron: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/Dockerfile.neuron -------------------------------------------------------------------------------- /Dockerfile.rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/Dockerfile.rocm -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/README.md -------------------------------------------------------------------------------- /README_vllm_musa.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/README_vllm_musa.md -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/README.md -------------------------------------------------------------------------------- /benchmarks/backend_request_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/backend_request_func.py -------------------------------------------------------------------------------- /benchmarks/benchmark_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/benchmark_latency.py -------------------------------------------------------------------------------- /benchmarks/benchmark_prefix_caching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/benchmark_prefix_caching.py -------------------------------------------------------------------------------- /benchmarks/benchmark_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/benchmark_serving.py -------------------------------------------------------------------------------- /benchmarks/benchmark_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/benchmark_throughput.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_aqlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/kernels/benchmark_aqlm.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_mixtral_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/kernels/benchmark_mixtral_moe.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_paged_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/kernels/benchmark_paged_attention.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/kernels/benchmark_rope.py -------------------------------------------------------------------------------- /benchmarks/launch_tgi_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/launch_tgi_server.sh -------------------------------------------------------------------------------- /benchmarks/sonnet.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/benchmarks/sonnet.txt -------------------------------------------------------------------------------- /cmake/cpu_extension.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/cmake/cpu_extension.cmake -------------------------------------------------------------------------------- /cmake/hipify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/cmake/hipify.py -------------------------------------------------------------------------------- /cmake/utils.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/cmake/utils.cmake -------------------------------------------------------------------------------- /collect_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/collect_env.py -------------------------------------------------------------------------------- /csrc_musa/activation_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/activation_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/attention/attention_dtypes.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/attention/attention_dtypes.h -------------------------------------------------------------------------------- /csrc_musa/attention/attention_generic.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/attention/attention_generic.muh -------------------------------------------------------------------------------- /csrc_musa/attention/attention_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/attention/attention_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/attention/attention_utils.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/attention/attention_utils.muh -------------------------------------------------------------------------------- /csrc_musa/attention/dtype_bfloat16.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/attention/dtype_bfloat16.muh -------------------------------------------------------------------------------- /csrc_musa/attention/dtype_float16.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/attention/dtype_float16.muh -------------------------------------------------------------------------------- /csrc_musa/attention/dtype_float32.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/attention/dtype_float32.muh -------------------------------------------------------------------------------- /csrc_musa/attention/dtype_fp8.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/attention/dtype_fp8.muh -------------------------------------------------------------------------------- /csrc_musa/cache.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/cache.h -------------------------------------------------------------------------------- /csrc_musa/cache_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/cache_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/cpu/activation.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/cpu/activation.cpp -------------------------------------------------------------------------------- /csrc_musa/cpu/attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/cpu/attention.cpp -------------------------------------------------------------------------------- /csrc_musa/cpu/cache.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/cpu/cache.cpp -------------------------------------------------------------------------------- /csrc_musa/cpu/cpu_types.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/cpu/cpu_types.hpp -------------------------------------------------------------------------------- /csrc_musa/cpu/layernorm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/cpu/layernorm.cpp -------------------------------------------------------------------------------- /csrc_musa/cpu/pos_encoding.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/cpu/pos_encoding.cpp -------------------------------------------------------------------------------- /csrc_musa/cpu/pybind.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/cpu/pybind.cpp -------------------------------------------------------------------------------- /csrc_musa/custom_all_reduce.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/custom_all_reduce.mu -------------------------------------------------------------------------------- /csrc_musa/custom_all_reduce.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/custom_all_reduce.muh -------------------------------------------------------------------------------- /csrc_musa/custom_all_reduce_test.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/custom_all_reduce_test.mu -------------------------------------------------------------------------------- /csrc_musa/dispatch_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/dispatch_utils.h -------------------------------------------------------------------------------- /csrc_musa/layernorm_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/layernorm_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/moe/moe_ops.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/moe/moe_ops.cpp -------------------------------------------------------------------------------- /csrc_musa/moe/moe_ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/moe/moe_ops.h -------------------------------------------------------------------------------- /csrc_musa/moe/topk_softmax_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/moe/topk_softmax_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/moe_align_block_size_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/moe_align_block_size_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/musa_compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/musa_compat.h -------------------------------------------------------------------------------- /csrc_musa/musa_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/musa_utils.h -------------------------------------------------------------------------------- /csrc_musa/musa_utils_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/musa_utils_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/ops.h -------------------------------------------------------------------------------- /csrc_musa/pos_encoding_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/pos_encoding_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/punica/.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/.LICENSE -------------------------------------------------------------------------------- /csrc_musa/punica/bgmv/bgmv_bf16_bf16_bf16.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/bgmv/bgmv_bf16_bf16_bf16.mu -------------------------------------------------------------------------------- /csrc_musa/punica/bgmv/bgmv_bf16_fp32_bf16.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/bgmv/bgmv_bf16_fp32_bf16.mu -------------------------------------------------------------------------------- /csrc_musa/punica/bgmv/bgmv_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/bgmv/bgmv_config.h -------------------------------------------------------------------------------- /csrc_musa/punica/bgmv/bgmv_fp16_fp16_fp16.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/bgmv/bgmv_fp16_fp16_fp16.mu -------------------------------------------------------------------------------- /csrc_musa/punica/bgmv/bgmv_fp16_fp32_fp16.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/bgmv/bgmv_fp16_fp32_fp16.mu -------------------------------------------------------------------------------- /csrc_musa/punica/bgmv/bgmv_fp32_bf16_bf16.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/bgmv/bgmv_fp32_bf16_bf16.mu -------------------------------------------------------------------------------- /csrc_musa/punica/bgmv/bgmv_fp32_fp16_fp16.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/bgmv/bgmv_fp32_fp16_fp16.mu -------------------------------------------------------------------------------- /csrc_musa/punica/bgmv/bgmv_impl.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/bgmv/bgmv_impl.muh -------------------------------------------------------------------------------- /csrc_musa/punica/bgmv/generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/bgmv/generator.py -------------------------------------------------------------------------------- /csrc_musa/punica/bgmv/vec_dtypes.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/bgmv/vec_dtypes.muh -------------------------------------------------------------------------------- /csrc_musa/punica/punica_ops.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/punica/punica_ops.cc -------------------------------------------------------------------------------- /csrc_musa/pybind.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/pybind.cpp -------------------------------------------------------------------------------- /csrc_musa/quantization/aqlm/gemm_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/aqlm/gemm_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/quantization/awq/dequantize.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/awq/dequantize.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/awq/gemm_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/awq/gemm_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/quantization/fp8/amd_detail/hip_float8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/fp8/amd_detail/hip_float8.h -------------------------------------------------------------------------------- /csrc_musa/quantization/fp8/amd_detail/hip_float8_impl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/fp8/amd_detail/hip_float8_impl.h -------------------------------------------------------------------------------- /csrc_musa/quantization/fp8/amd_detail/quant_utils.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/fp8/amd_detail/quant_utils.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/fp8/fp8_cuda_kernels.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/fp8/fp8_cuda_kernels.mu -------------------------------------------------------------------------------- /csrc_musa/quantization/fp8_e5m2_kvcache/quant_utils.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/fp8_e5m2_kvcache/quant_utils.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq/compat.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq/compat.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq/matrix_view.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq/matrix_view.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq/q_gemm.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq/q_gemm.mu -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq/qdq_2.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq/qdq_2.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq/qdq_3.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq/qdq_3.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq/qdq_4.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq/qdq_4.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq/qdq_8.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq/qdq_8.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq/qdq_util.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq/qdq_util.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq_marlin/gptq_marlin.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq_marlin/gptq_marlin.mu -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq_marlin/gptq_marlin.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq_marlin/gptq_marlin.muh -------------------------------------------------------------------------------- /csrc_musa/quantization/gptq_marlin/gptq_marlin_repack.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/gptq_marlin/gptq_marlin_repack.mu -------------------------------------------------------------------------------- /csrc_musa/quantization/marlin/.LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/marlin/.LICENSE -------------------------------------------------------------------------------- /csrc_musa/quantization/marlin/marlin_cuda_kernel.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/marlin/marlin_cuda_kernel.mu -------------------------------------------------------------------------------- /csrc_musa/quantization/squeezellm/quant_cuda_kernel.mu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/quantization/squeezellm/quant_cuda_kernel.mu -------------------------------------------------------------------------------- /csrc_musa/reduction_utils.muh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/csrc_musa/reduction_utils.muh -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/requirements-docs.txt -------------------------------------------------------------------------------- /docs/source/assets/dev/dockerfile-stages-dependency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/dev/dockerfile-stages-dependency.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/k_vecs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/kernel/k_vecs.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/kernel/key.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/logits_vec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/kernel/logits_vec.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/q_vecs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/kernel/q_vecs.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/kernel/query.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/v_vec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/kernel/v_vec.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/value.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/kernel/value.png -------------------------------------------------------------------------------- /docs/source/assets/logos/vllm-logo-only-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/logos/vllm-logo-only-light.png -------------------------------------------------------------------------------- /docs/source/assets/logos/vllm-logo-text-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/logos/vllm-logo-text-dark.png -------------------------------------------------------------------------------- /docs/source/assets/logos/vllm-logo-text-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/assets/logos/vllm-logo-text-light.png -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/dev/dockerfile/dockerfile.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/dev/dockerfile/dockerfile.rst -------------------------------------------------------------------------------- /docs/source/dev/engine/async_llm_engine.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/dev/engine/async_llm_engine.rst -------------------------------------------------------------------------------- /docs/source/dev/engine/engine_index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/dev/engine/engine_index.rst -------------------------------------------------------------------------------- /docs/source/dev/engine/llm_engine.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/dev/engine/llm_engine.rst -------------------------------------------------------------------------------- /docs/source/dev/kernel/paged_attention.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/dev/kernel/paged_attention.rst -------------------------------------------------------------------------------- /docs/source/dev/sampling_params.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/dev/sampling_params.rst -------------------------------------------------------------------------------- /docs/source/generate_examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/generate_examples.py -------------------------------------------------------------------------------- /docs/source/getting_started/amd-installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/getting_started/amd-installation.rst -------------------------------------------------------------------------------- /docs/source/getting_started/cpu-installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/getting_started/cpu-installation.rst -------------------------------------------------------------------------------- /docs/source/getting_started/examples/examples_index.template.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/getting_started/examples/examples_index.template.rst -------------------------------------------------------------------------------- /docs/source/getting_started/installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/getting_started/installation.rst -------------------------------------------------------------------------------- /docs/source/getting_started/neuron-installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/getting_started/neuron-installation.rst -------------------------------------------------------------------------------- /docs/source/getting_started/quickstart.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/getting_started/quickstart.rst -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/models/adding_model.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/models/adding_model.rst -------------------------------------------------------------------------------- /docs/source/models/engine_args.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/models/engine_args.rst -------------------------------------------------------------------------------- /docs/source/models/lora.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/models/lora.rst -------------------------------------------------------------------------------- /docs/source/models/performance.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/models/performance.rst -------------------------------------------------------------------------------- /docs/source/models/supported_models.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/models/supported_models.rst -------------------------------------------------------------------------------- /docs/source/quantization/auto_awq.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/quantization/auto_awq.rst -------------------------------------------------------------------------------- /docs/source/quantization/fp8_e4m3_kvcache.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/quantization/fp8_e4m3_kvcache.rst -------------------------------------------------------------------------------- /docs/source/quantization/fp8_e5m2_kvcache.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/quantization/fp8_e5m2_kvcache.rst -------------------------------------------------------------------------------- /docs/source/serving/deploying_with_bentoml.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/deploying_with_bentoml.rst -------------------------------------------------------------------------------- /docs/source/serving/deploying_with_docker.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/deploying_with_docker.rst -------------------------------------------------------------------------------- /docs/source/serving/deploying_with_kserve.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/deploying_with_kserve.rst -------------------------------------------------------------------------------- /docs/source/serving/deploying_with_triton.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/deploying_with_triton.rst -------------------------------------------------------------------------------- /docs/source/serving/distributed_serving.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/distributed_serving.rst -------------------------------------------------------------------------------- /docs/source/serving/env_vars.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/env_vars.rst -------------------------------------------------------------------------------- /docs/source/serving/integrations.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/integrations.rst -------------------------------------------------------------------------------- /docs/source/serving/metrics.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/metrics.rst -------------------------------------------------------------------------------- /docs/source/serving/openai_compatible_server.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/openai_compatible_server.md -------------------------------------------------------------------------------- /docs/source/serving/run_on_sky.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/run_on_sky.rst -------------------------------------------------------------------------------- /docs/source/serving/serving_with_langchain.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/serving_with_langchain.rst -------------------------------------------------------------------------------- /docs/source/serving/usage_stats.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/docs/source/serving/usage_stats.md -------------------------------------------------------------------------------- /examples/api_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/api_client.py -------------------------------------------------------------------------------- /examples/aqlm_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/aqlm_example.py -------------------------------------------------------------------------------- /examples/fp8/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/fp8/README.md -------------------------------------------------------------------------------- /examples/fp8/extract_scales.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/fp8/extract_scales.py -------------------------------------------------------------------------------- /examples/fp8/quantizer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/fp8/quantizer/README.md -------------------------------------------------------------------------------- /examples/fp8/quantizer/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/fp8/quantizer/quantize.py -------------------------------------------------------------------------------- /examples/gradio_openai_chatbot_webserver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/gradio_openai_chatbot_webserver.py -------------------------------------------------------------------------------- /examples/gradio_webserver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/gradio_webserver.py -------------------------------------------------------------------------------- /examples/llava_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/llava_example.py -------------------------------------------------------------------------------- /examples/llm_engine_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/llm_engine_example.py -------------------------------------------------------------------------------- /examples/logging_configuration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/logging_configuration.md -------------------------------------------------------------------------------- /examples/multilora_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/multilora_inference.py -------------------------------------------------------------------------------- /examples/offline_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/offline_inference.py -------------------------------------------------------------------------------- /examples/offline_inference_distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/offline_inference_distributed.py -------------------------------------------------------------------------------- /examples/offline_inference_neuron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/offline_inference_neuron.py -------------------------------------------------------------------------------- /examples/offline_inference_with_prefix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/offline_inference_with_prefix.py -------------------------------------------------------------------------------- /examples/openai_chat_completion_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/openai_chat_completion_client.py -------------------------------------------------------------------------------- /examples/openai_completion_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/openai_completion_client.py -------------------------------------------------------------------------------- /examples/production_monitoring/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/production_monitoring/README.md -------------------------------------------------------------------------------- /examples/production_monitoring/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/production_monitoring/docker-compose.yaml -------------------------------------------------------------------------------- /examples/production_monitoring/grafana.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/production_monitoring/grafana.json -------------------------------------------------------------------------------- /examples/production_monitoring/prometheus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/production_monitoring/prometheus.yaml -------------------------------------------------------------------------------- /examples/template_alpaca.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/template_alpaca.jinja -------------------------------------------------------------------------------- /examples/template_baichuan.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/template_baichuan.jinja -------------------------------------------------------------------------------- /examples/template_chatglm.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/template_chatglm.jinja -------------------------------------------------------------------------------- /examples/template_chatglm2.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/template_chatglm2.jinja -------------------------------------------------------------------------------- /examples/template_chatml.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/template_chatml.jinja -------------------------------------------------------------------------------- /examples/template_falcon.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/template_falcon.jinja -------------------------------------------------------------------------------- /examples/template_falcon_180b.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/template_falcon_180b.jinja -------------------------------------------------------------------------------- /examples/template_inkbot.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/template_inkbot.jinja -------------------------------------------------------------------------------- /examples/tensorize_vllm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/examples/tensorize_vllm_model.py -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/format.sh -------------------------------------------------------------------------------- /musa_porting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/musa_porting.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements-build.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/requirements-build.txt -------------------------------------------------------------------------------- /requirements-common.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/requirements-common.txt -------------------------------------------------------------------------------- /requirements-cpu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/requirements-cpu.txt -------------------------------------------------------------------------------- /requirements-cuda.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/requirements-cuda.txt -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /requirements-musa.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/requirements-musa.txt -------------------------------------------------------------------------------- /requirements-neuron.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/requirements-neuron.txt -------------------------------------------------------------------------------- /requirements-rocm.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/requirements-rocm.txt -------------------------------------------------------------------------------- /rocm_patch/rocm_bf16.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/rocm_patch/rocm_bf16.patch -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/async_engine/api_server_async_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/async_engine/api_server_async_engine.py -------------------------------------------------------------------------------- /tests/async_engine/test_api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/async_engine/test_api_server.py -------------------------------------------------------------------------------- /tests/async_engine/test_async_llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/async_engine/test_async_llm_engine.py -------------------------------------------------------------------------------- /tests/async_engine/test_chat_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/async_engine/test_chat_template.py -------------------------------------------------------------------------------- /tests/async_engine/test_merge_async_iterators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/async_engine/test_merge_async_iterators.py -------------------------------------------------------------------------------- /tests/async_engine/test_openapi_server_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/async_engine/test_openapi_server_ray.py -------------------------------------------------------------------------------- /tests/async_engine/test_request_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/async_engine/test_request_tracker.py -------------------------------------------------------------------------------- /tests/basic_correctness/test_basic_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/basic_correctness/test_basic_correctness.py -------------------------------------------------------------------------------- /tests/basic_correctness/test_chunked_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/basic_correctness/test_chunked_prefill.py -------------------------------------------------------------------------------- /tests/basic_correctness/test_preemption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/basic_correctness/test_preemption.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/block/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/block/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/block/conftest.py -------------------------------------------------------------------------------- /tests/core/block/e2e/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/block/e2e/conftest.py -------------------------------------------------------------------------------- /tests/core/block/e2e/test_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/block/e2e/test_correctness.py -------------------------------------------------------------------------------- /tests/core/block/test_block_manager_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/block/test_block_manager_v2.py -------------------------------------------------------------------------------- /tests/core/block/test_block_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/block/test_block_table.py -------------------------------------------------------------------------------- /tests/core/block/test_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/block/test_common.py -------------------------------------------------------------------------------- /tests/core/block/test_cpu_gpu_block_allocator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/block/test_cpu_gpu_block_allocator.py -------------------------------------------------------------------------------- /tests/core/block/test_naive_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/block/test_naive_block.py -------------------------------------------------------------------------------- /tests/core/block/test_prefix_caching_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/block/test_prefix_caching_block.py -------------------------------------------------------------------------------- /tests/core/test_block_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/test_block_manager.py -------------------------------------------------------------------------------- /tests/core/test_chunked_prefill_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/test_chunked_prefill_scheduler.py -------------------------------------------------------------------------------- /tests/core/test_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/test_scheduler.py -------------------------------------------------------------------------------- /tests/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/core/utils.py -------------------------------------------------------------------------------- /tests/distributed/test_basic_distributed_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/distributed/test_basic_distributed_correctness.py -------------------------------------------------------------------------------- /tests/distributed/test_chunked_prefill_distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/distributed/test_chunked_prefill_distributed.py -------------------------------------------------------------------------------- /tests/distributed/test_comm_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/distributed/test_comm_ops.py -------------------------------------------------------------------------------- /tests/distributed/test_custom_all_reduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/distributed/test_custom_all_reduce.py -------------------------------------------------------------------------------- /tests/distributed/test_pynccl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/distributed/test_pynccl.py -------------------------------------------------------------------------------- /tests/distributed/test_pynccl_library.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/distributed/test_pynccl_library.py -------------------------------------------------------------------------------- /tests/engine/output_processor/test_multi_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/engine/output_processor/test_multi_step.py -------------------------------------------------------------------------------- /tests/engine/test_computed_prefix_blocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/engine/test_computed_prefix_blocks.py -------------------------------------------------------------------------------- /tests/engine/test_detokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/engine/test_detokenization.py -------------------------------------------------------------------------------- /tests/engine/test_multiproc_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/engine/test_multiproc_workers.py -------------------------------------------------------------------------------- /tests/engine/test_skip_tokenizer_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/engine/test_skip_tokenizer_init.py -------------------------------------------------------------------------------- /tests/engine/test_stop_reason.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/engine/test_stop_reason.py -------------------------------------------------------------------------------- /tests/engine/test_stop_strings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/engine/test_stop_strings.py -------------------------------------------------------------------------------- /tests/entrypoints/openai/test_serving_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/entrypoints/openai/test_serving_chat.py -------------------------------------------------------------------------------- /tests/entrypoints/test_guided_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/entrypoints/test_guided_processors.py -------------------------------------------------------------------------------- /tests/entrypoints/test_llm_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/entrypoints/test_llm_generate.py -------------------------------------------------------------------------------- /tests/entrypoints/test_openai_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/entrypoints/test_openai_server.py -------------------------------------------------------------------------------- /tests/entrypoints/test_server_oot_registration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/entrypoints/test_server_oot_registration.py -------------------------------------------------------------------------------- /tests/fp8_kv/llama2-70b-fp8-kv/kv_cache_scales.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/fp8_kv/llama2-70b-fp8-kv/kv_cache_scales.json -------------------------------------------------------------------------------- /tests/fp8_kv/llama2-7b-fp8-kv/kv_cache_scales.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/fp8_kv/llama2-7b-fp8-kv/kv_cache_scales.json -------------------------------------------------------------------------------- /tests/kernels/allclose_default.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/allclose_default.py -------------------------------------------------------------------------------- /tests/kernels/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/conftest.py -------------------------------------------------------------------------------- /tests/kernels/test_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/test_activation.py -------------------------------------------------------------------------------- /tests/kernels/test_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/test_attention.py -------------------------------------------------------------------------------- /tests/kernels/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/test_cache.py -------------------------------------------------------------------------------- /tests/kernels/test_layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/test_layernorm.py -------------------------------------------------------------------------------- /tests/kernels/test_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/test_moe.py -------------------------------------------------------------------------------- /tests/kernels/test_pos_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/test_pos_encoding.py -------------------------------------------------------------------------------- /tests/kernels/test_prefix_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/test_prefix_prefill.py -------------------------------------------------------------------------------- /tests/kernels/test_rand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/test_rand.py -------------------------------------------------------------------------------- /tests/kernels/test_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/kernels/test_sampler.py -------------------------------------------------------------------------------- /tests/lora/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/lora/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/conftest.py -------------------------------------------------------------------------------- /tests/lora/test_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_baichuan.py -------------------------------------------------------------------------------- /tests/lora/test_chatglm3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_chatglm3.py -------------------------------------------------------------------------------- /tests/lora/test_gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_gemma.py -------------------------------------------------------------------------------- /tests/lora/test_layer_variation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_layer_variation.py -------------------------------------------------------------------------------- /tests/lora/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_layers.py -------------------------------------------------------------------------------- /tests/lora/test_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_llama.py -------------------------------------------------------------------------------- /tests/lora/test_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_lora.py -------------------------------------------------------------------------------- /tests/lora/test_lora_checkpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_lora_checkpoints.py -------------------------------------------------------------------------------- /tests/lora/test_lora_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_lora_manager.py -------------------------------------------------------------------------------- /tests/lora/test_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_mixtral.py -------------------------------------------------------------------------------- /tests/lora/test_punica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_punica.py -------------------------------------------------------------------------------- /tests/lora/test_quant_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_quant_model.py -------------------------------------------------------------------------------- /tests/lora/test_tokenizer_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_tokenizer_group.py -------------------------------------------------------------------------------- /tests/lora/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_utils.py -------------------------------------------------------------------------------- /tests/lora/test_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/test_worker.py -------------------------------------------------------------------------------- /tests/lora/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/lora/utils.py -------------------------------------------------------------------------------- /tests/metrics/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/metrics/test_metrics.py -------------------------------------------------------------------------------- /tests/model_executor/weight_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/model_executor/weight_utils.py -------------------------------------------------------------------------------- /tests/models/test_aqlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/models/test_aqlm.py -------------------------------------------------------------------------------- /tests/models/test_big_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/models/test_big_models.py -------------------------------------------------------------------------------- /tests/models/test_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/models/test_fp8.py -------------------------------------------------------------------------------- /tests/models/test_gptq_marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/models/test_gptq_marlin.py -------------------------------------------------------------------------------- /tests/models/test_llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/models/test_llava.py -------------------------------------------------------------------------------- /tests/models/test_marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/models/test_marlin.py -------------------------------------------------------------------------------- /tests/models/test_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/models/test_mistral.py -------------------------------------------------------------------------------- /tests/models/test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/models/test_models.py -------------------------------------------------------------------------------- /tests/models/test_oot_registration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/models/test_oot_registration.py -------------------------------------------------------------------------------- /tests/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/models/utils.py -------------------------------------------------------------------------------- /tests/prefix_caching/test_prefix_caching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/prefix_caching/test_prefix_caching.py -------------------------------------------------------------------------------- /tests/prompts/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/prompts/example.txt -------------------------------------------------------------------------------- /tests/prompts/summary.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/prompts/summary.txt -------------------------------------------------------------------------------- /tests/quantization/test_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/quantization/test_configs.py -------------------------------------------------------------------------------- /tests/quantization/test_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/quantization/test_fp8.py -------------------------------------------------------------------------------- /tests/samplers/test_beam_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/samplers/test_beam_search.py -------------------------------------------------------------------------------- /tests/samplers/test_ignore_eos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/samplers/test_ignore_eos.py -------------------------------------------------------------------------------- /tests/samplers/test_logits_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/samplers/test_logits_processor.py -------------------------------------------------------------------------------- /tests/samplers/test_logprobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/samplers/test_logprobs.py -------------------------------------------------------------------------------- /tests/samplers/test_ranks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/samplers/test_ranks.py -------------------------------------------------------------------------------- /tests/samplers/test_rejection_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/samplers/test_rejection_sampler.py -------------------------------------------------------------------------------- /tests/samplers/test_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/samplers/test_sampler.py -------------------------------------------------------------------------------- /tests/samplers/test_seeded_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/samplers/test_seeded_generate.py -------------------------------------------------------------------------------- /tests/spec_decode/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/spec_decode/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/spec_decode/e2e/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/e2e/conftest.py -------------------------------------------------------------------------------- /tests/spec_decode/e2e/test_compatibility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/e2e/test_compatibility.py -------------------------------------------------------------------------------- /tests/spec_decode/e2e/test_logprobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/e2e/test_logprobs.py -------------------------------------------------------------------------------- /tests/spec_decode/e2e/test_multistep_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/e2e/test_multistep_correctness.py -------------------------------------------------------------------------------- /tests/spec_decode/e2e/test_ngram_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/e2e/test_ngram_correctness.py -------------------------------------------------------------------------------- /tests/spec_decode/test_batch_expansion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/test_batch_expansion.py -------------------------------------------------------------------------------- /tests/spec_decode/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/test_metrics.py -------------------------------------------------------------------------------- /tests/spec_decode/test_multi_step_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/test_multi_step_worker.py -------------------------------------------------------------------------------- /tests/spec_decode/test_ngram_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/test_ngram_worker.py -------------------------------------------------------------------------------- /tests/spec_decode/test_spec_decode_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/test_spec_decode_worker.py -------------------------------------------------------------------------------- /tests/spec_decode/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/test_utils.py -------------------------------------------------------------------------------- /tests/spec_decode/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/spec_decode/utils.py -------------------------------------------------------------------------------- /tests/tensorizer_loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tensorizer_loader/tensorize_vllm_model_for_testing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/tensorizer_loader/tensorize_vllm_model_for_testing.py -------------------------------------------------------------------------------- /tests/tensorizer_loader/test_tensorizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/tensorizer_loader/test_tensorizer.py -------------------------------------------------------------------------------- /tests/test_cache_block_hashing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/test_cache_block_hashing.py -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/test_config.py -------------------------------------------------------------------------------- /tests/test_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/test_logger.py -------------------------------------------------------------------------------- /tests/test_logits_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/test_logits_processor.py -------------------------------------------------------------------------------- /tests/test_regression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/test_regression.py -------------------------------------------------------------------------------- /tests/test_sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/test_sampling_params.py -------------------------------------------------------------------------------- /tests/test_sequence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/test_sequence.py -------------------------------------------------------------------------------- /tests/tokenization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tokenization/test_cached_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/tokenization/test_cached_tokenizer.py -------------------------------------------------------------------------------- /tests/tokenization/test_detokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/tokenization/test_detokenize.py -------------------------------------------------------------------------------- /tests/tokenization/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/tokenization/test_tokenizer.py -------------------------------------------------------------------------------- /tests/tokenization/test_tokenizer_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/tokenization/test_tokenizer_group.py -------------------------------------------------------------------------------- /tests/worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/worker/test_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/worker/test_model_runner.py -------------------------------------------------------------------------------- /tests/worker/test_swap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/tests/worker/test_swap.py -------------------------------------------------------------------------------- /vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/__init__.py -------------------------------------------------------------------------------- /vllm/_custom_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/_custom_ops.py -------------------------------------------------------------------------------- /vllm/attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/__init__.py -------------------------------------------------------------------------------- /vllm/attention/backends/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/attention/backends/abstract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/backends/abstract.py -------------------------------------------------------------------------------- /vllm/attention/backends/flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/backends/flash_attn.py -------------------------------------------------------------------------------- /vllm/attention/backends/flashinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/backends/flashinfer.py -------------------------------------------------------------------------------- /vllm/attention/backends/rocm_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/backends/rocm_flash_attn.py -------------------------------------------------------------------------------- /vllm/attention/backends/torch_sdpa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/backends/torch_sdpa.py -------------------------------------------------------------------------------- /vllm/attention/backends/xformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/backends/xformers.py -------------------------------------------------------------------------------- /vllm/attention/layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/layer.py -------------------------------------------------------------------------------- /vllm/attention/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/attention/ops/paged_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/ops/paged_attn.py -------------------------------------------------------------------------------- /vllm/attention/ops/prefix_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/ops/prefix_prefill.py -------------------------------------------------------------------------------- /vllm/attention/ops/triton_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/ops/triton_flash_attention.py -------------------------------------------------------------------------------- /vllm/attention/selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/attention/selector.py -------------------------------------------------------------------------------- /vllm/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/block.py -------------------------------------------------------------------------------- /vllm/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/config.py -------------------------------------------------------------------------------- /vllm/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/core/block/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/core/block/block_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/block/block_table.py -------------------------------------------------------------------------------- /vllm/core/block/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/block/common.py -------------------------------------------------------------------------------- /vllm/core/block/cpu_gpu_block_allocator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/block/cpu_gpu_block_allocator.py -------------------------------------------------------------------------------- /vllm/core/block/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/block/interfaces.py -------------------------------------------------------------------------------- /vllm/core/block/naive_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/block/naive_block.py -------------------------------------------------------------------------------- /vllm/core/block/prefix_caching_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/block/prefix_caching_block.py -------------------------------------------------------------------------------- /vllm/core/block_manager_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/block_manager_v1.py -------------------------------------------------------------------------------- /vllm/core/block_manager_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/block_manager_v2.py -------------------------------------------------------------------------------- /vllm/core/evictor_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/evictor_v1.py -------------------------------------------------------------------------------- /vllm/core/evictor_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/evictor_v2.py -------------------------------------------------------------------------------- /vllm/core/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/interfaces.py -------------------------------------------------------------------------------- /vllm/core/policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/policy.py -------------------------------------------------------------------------------- /vllm/core/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/core/scheduler.py -------------------------------------------------------------------------------- /vllm/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/distributed/__init__.py -------------------------------------------------------------------------------- /vllm/distributed/communication_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/distributed/communication_op.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/custom_all_reduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/distributed/device_communicators/custom_all_reduce.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/pymccl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/distributed/device_communicators/pymccl.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/pymccl_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/distributed/device_communicators/pymccl_utils.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/pynccl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/distributed/device_communicators/pynccl.py -------------------------------------------------------------------------------- /vllm/distributed/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/distributed/parallel_state.py -------------------------------------------------------------------------------- /vllm/distributed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/distributed/utils.py -------------------------------------------------------------------------------- /vllm/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/engine/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/engine/arg_utils.py -------------------------------------------------------------------------------- /vllm/engine/async_llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/engine/async_llm_engine.py -------------------------------------------------------------------------------- /vllm/engine/llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/engine/llm_engine.py -------------------------------------------------------------------------------- /vllm/engine/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/engine/metrics.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/engine/output_processor/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/engine/output_processor/interfaces.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/multi_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/engine/output_processor/multi_step.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/single_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/engine/output_processor/single_step.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/stop_checker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/engine/output_processor/stop_checker.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/engine/output_processor/util.py -------------------------------------------------------------------------------- /vllm/entrypoints/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/entrypoints/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/entrypoints/api_server.py -------------------------------------------------------------------------------- /vllm/entrypoints/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/entrypoints/llm.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/entrypoints/openai/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/entrypoints/openai/api_server.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/cli_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/entrypoints/openai/cli_args.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/entrypoints/openai/protocol.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/serving_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/entrypoints/openai/serving_chat.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/serving_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/entrypoints/openai/serving_completion.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/serving_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/entrypoints/openai/serving_engine.py -------------------------------------------------------------------------------- /vllm/envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/envs.py -------------------------------------------------------------------------------- /vllm/executor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/executor/cpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/executor/cpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/distributed_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/executor/distributed_gpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/executor_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/executor/executor_base.py -------------------------------------------------------------------------------- /vllm/executor/gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/executor/gpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/multiproc_worker_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/executor/multiproc_worker_utils.py -------------------------------------------------------------------------------- /vllm/executor/neuron_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/executor/neuron_executor.py -------------------------------------------------------------------------------- /vllm/executor/ray_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/executor/ray_gpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/executor/ray_utils.py -------------------------------------------------------------------------------- /vllm/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/logger.py -------------------------------------------------------------------------------- /vllm/logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/logging/__init__.py -------------------------------------------------------------------------------- /vllm/logging/formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/logging/formatter.py -------------------------------------------------------------------------------- /vllm/lora/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/lora/fully_sharded_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/lora/fully_sharded_layers.py -------------------------------------------------------------------------------- /vllm/lora/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/lora/layers.py -------------------------------------------------------------------------------- /vllm/lora/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/lora/lora.py -------------------------------------------------------------------------------- /vllm/lora/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/lora/models.py -------------------------------------------------------------------------------- /vllm/lora/punica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/lora/punica.py -------------------------------------------------------------------------------- /vllm/lora/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/lora/request.py -------------------------------------------------------------------------------- /vllm/lora/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/lora/utils.py -------------------------------------------------------------------------------- /vllm/lora/worker_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/lora/worker_manager.py -------------------------------------------------------------------------------- /vllm/model_executor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/guided_decoding/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/outlines_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/guided_decoding/outlines_decoding.py -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/outlines_logits_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/guided_decoding/outlines_logits_processors.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/activation.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/configs/README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/configs/README -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/fused_moe/fused_moe.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/layernorm.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/linear.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/logits_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/logits_processor.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/ops/rand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/ops/rand.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/ops/sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/ops/sample.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/quantization/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/aqlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/quantization/aqlm.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/awq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/quantization/awq.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/base_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/quantization/base_config.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/quantization/fp8.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/quantization/gptq.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/gptq_marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/quantization/gptq_marlin.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/quantization/marlin.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/quantization/schema.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/squeezellm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/quantization/squeezellm.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/rejection_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/rejection_sampler.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/rotary_embedding.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/sampler.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/vocab_parallel_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/layers/vocab_parallel_embedding.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/model_loader/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/model_loader/loader.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/neuron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/model_loader/neuron.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/tensorizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/model_loader/tensorizer.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/model_loader/utils.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/weight_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/model_loader/weight_utils.py -------------------------------------------------------------------------------- /vllm/model_executor/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/models/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/baichuan.py -------------------------------------------------------------------------------- /vllm/model_executor/models/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/bloom.py -------------------------------------------------------------------------------- /vllm/model_executor/models/chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/chatglm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/commandr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/commandr.py -------------------------------------------------------------------------------- /vllm/model_executor/models/dbrx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/dbrx.py -------------------------------------------------------------------------------- /vllm/model_executor/models/decilm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/decilm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/deepseek.py -------------------------------------------------------------------------------- /vllm/model_executor/models/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/falcon.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/gemma.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/gpt2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_bigcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/gpt_bigcode.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_j.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/gpt_j.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/gpt_neox.py -------------------------------------------------------------------------------- /vllm/model_executor/models/internlm2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/internlm2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/jais.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/jais.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/llama.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/llava.py -------------------------------------------------------------------------------- /vllm/model_executor/models/minicpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/minicpm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/mixtral.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mixtral_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/mixtral_quant.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/mpt.py -------------------------------------------------------------------------------- /vllm/model_executor/models/olmo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/olmo.py -------------------------------------------------------------------------------- /vllm/model_executor/models/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/opt.py -------------------------------------------------------------------------------- /vllm/model_executor/models/orion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/orion.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/phi.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/qwen.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/qwen2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/qwen2_moe.py -------------------------------------------------------------------------------- /vllm/model_executor/models/stablelm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/stablelm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/starcoder2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/starcoder2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/xverse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/models/xverse.py -------------------------------------------------------------------------------- /vllm/model_executor/sampling_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/sampling_metadata.py -------------------------------------------------------------------------------- /vllm/model_executor/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/model_executor/utils.py -------------------------------------------------------------------------------- /vllm/outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/outputs.py -------------------------------------------------------------------------------- /vllm/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561. 2 | # The vllm package uses inline types. 3 | -------------------------------------------------------------------------------- /vllm/sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/sampling_params.py -------------------------------------------------------------------------------- /vllm/sequence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/sequence.py -------------------------------------------------------------------------------- /vllm/spec_decode/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/spec_decode/batch_expansion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/spec_decode/batch_expansion.py -------------------------------------------------------------------------------- /vllm/spec_decode/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/spec_decode/interfaces.py -------------------------------------------------------------------------------- /vllm/spec_decode/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/spec_decode/metrics.py -------------------------------------------------------------------------------- /vllm/spec_decode/multi_step_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/spec_decode/multi_step_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/ngram_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/spec_decode/ngram_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/spec_decode_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/spec_decode/spec_decode_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/top1_proposer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/spec_decode/top1_proposer.py -------------------------------------------------------------------------------- /vllm/spec_decode/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/spec_decode/util.py -------------------------------------------------------------------------------- /vllm/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/test_utils.py -------------------------------------------------------------------------------- /vllm/transformers_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/transformers_utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/config.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/configs/__init__.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/configs/chatglm.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/dbrx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/configs/dbrx.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/configs/falcon.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/jais.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/configs/jais.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/configs/mpt.py -------------------------------------------------------------------------------- /vllm/transformers_utils/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/detokenizer.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/tokenizer.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer_group/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/tokenizer_group/__init__.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer_group/tokenizer_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/tokenizer_group/tokenizer_group.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/tokenizers/__init__.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizers/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/transformers_utils/tokenizers/baichuan.py -------------------------------------------------------------------------------- /vllm/usage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/usage/usage_lib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/usage/usage_lib.py -------------------------------------------------------------------------------- /vllm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/utils.py -------------------------------------------------------------------------------- /vllm/worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/worker/cache_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/worker/cache_engine.py -------------------------------------------------------------------------------- /vllm/worker/cpu_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/worker/cpu_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/cpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/worker/cpu_worker.py -------------------------------------------------------------------------------- /vllm/worker/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/worker/model_runner.py -------------------------------------------------------------------------------- /vllm/worker/neuron_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/worker/neuron_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/neuron_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/worker/neuron_worker.py -------------------------------------------------------------------------------- /vllm/worker/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/worker/worker.py -------------------------------------------------------------------------------- /vllm/worker/worker_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/vllm_musa/HEAD/vllm/worker/worker_base.py --------------------------------------------------------------------------------