├── .buildkite ├── check-wheel-size.py ├── lm-eval-harness │ ├── configs │ │ ├── DeepSeek-V2-Lite-Chat.yaml │ │ ├── Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml │ │ ├── Meta-Llama-3-70B-Instruct.yaml │ │ ├── Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml │ │ ├── Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml │ │ ├── Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml │ │ ├── Meta-Llama-3-8B-Instruct-FP8.yaml │ │ ├── Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml │ │ ├── Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml │ │ ├── Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml │ │ ├── Meta-Llama-3-8B-Instruct.yaml │ │ ├── Meta-Llama-3-8B-QQQ.yaml │ │ ├── Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml │ │ ├── Minitron-4B-Base-FP8.yaml │ │ ├── Mixtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml │ │ ├── Mixtral-8x7B-Instruct-v0.1-FP8.yaml │ │ ├── Mixtral-8x7B-Instruct-v0.1.yaml │ │ ├── Qwen2-1.5B-Instruct-FP8W8.yaml │ │ ├── Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml │ │ ├── Qwen2-1.5B-Instruct-W8A16-compressed-tensors.yaml │ │ ├── Qwen2-57B-A14-Instruct.yaml │ │ ├── models-large.txt │ │ └── models-small.txt │ ├── run-lm-eval-gsm-hf-baseline.sh │ ├── run-lm-eval-gsm-vllm-baseline.sh │ ├── run-tests.sh │ └── test_lm_eval_correctness.py ├── nightly-benchmarks │ ├── README.md │ ├── benchmark-pipeline.yaml │ ├── nightly-annotation.md │ ├── nightly-descriptions.md │ ├── nightly-pipeline.yaml │ ├── performance-benchmarks-descriptions.md │ ├── scripts │ │ ├── convert-results-json-to-markdown.py │ │ ├── download-tokenizer.py │ │ ├── generate-nightly-markdown.py │ │ ├── get-lmdeploy-modelname.py │ │ ├── launch-server.sh │ │ ├── nightly-annotate.sh │ │ ├── run-nightly-benchmarks.sh │ │ ├── run-performance-benchmarks.sh │ │ ├── summary-nightly-results.py │ │ └── wait-for-image.sh │ └── tests │ │ ├── latency-tests.json │ │ ├── nightly-tests.json │ │ ├── serving-tests.json │ │ └── throughput-tests.json ├── release-pipeline.yaml ├── run-amd-test.sh ├── run-benchmarks.sh ├── run-cpu-test-ppc64le.sh ├── run-cpu-test.sh ├── run-multi-node-test.sh ├── run-neuron-test.sh ├── run-openvino-test.sh ├── run-tpu-test.sh ├── run-xpu-test.sh └── test-pipeline.yaml ├── .clang-format ├── .dockerignore ├── .github ├── CODEOWNERS ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── 100-documentation.yml │ ├── 200-installation.yml │ ├── 300-usage.yml │ ├── 400-bug report.yml │ ├── 500-feature request.yml │ ├── 600-new model.yml │ ├── 700-performance discussion.yml │ ├── 750-RFC.yml │ ├── 800-misc discussion.yml │ └── config.yml ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml ├── mergify.yml └── workflows │ ├── actionlint.yml │ ├── add_label_automerge.yml │ ├── clang-format.yml │ ├── matchers │ ├── actionlint.json │ ├── mypy.json │ └── ruff.json │ ├── mypy.yaml │ ├── publish.yml │ ├── reminder_comment.yml │ ├── ruff.yml │ ├── scripts │ ├── build.sh │ ├── create_release.js │ ├── cuda-install.sh │ ├── env.sh │ └── pytorch-install.sh │ ├── stale.yml │ └── yapf.yml ├── .gitignore ├── .readthedocs.yaml ├── .yapfignore ├── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DCO ├── Dockerfile ├── Dockerfile.cpu ├── Dockerfile.hpu ├── Dockerfile.neuron ├── Dockerfile.openvino ├── Dockerfile.ppc64le ├── Dockerfile.rocm ├── Dockerfile.tpu ├── Dockerfile.xpu ├── LICENSE ├── MANIFEST.in ├── README.md ├── SECURITY.md ├── adrenaline ├── Makefile ├── assets │ ├── PD_disaggregationn.png │ ├── adrenaline.png │ └── disaggregation_adrenaline_comparison.png ├── attention │ ├── backends │ │ └── flashinfer.py │ └── layer.py ├── config.py ├── csrc │ ├── ipc_utils.cpp │ └── torch_bindings.cpp ├── custom_ops.py ├── entrypoints │ └── adrenaline_proxy_server.py ├── include │ ├── cuda_utils.h │ └── ipc_utils.h ├── model_loader │ ├── model_loader.py │ └── models │ │ ├── llama.py │ │ └── offload_attn.py ├── model_runner │ ├── attn_runner.py │ └── model_runner.py ├── profiler │ ├── attention_bandwidth_profiler.py │ ├── decode_gemm_profiler.py │ └── delta_profiler.py ├── proxy │ ├── load_estimator.py │ ├── offload_manager │ │ ├── backends │ │ │ ├── always.py │ │ │ ├── backend.py │ │ │ ├── loadaware.py │ │ │ ├── loadaware_v1.py │ │ │ ├── never.py │ │ │ └── percentage.py │ │ └── offload_manager.py │ ├── request_dispatcher.py │ ├── request_tracer.py │ ├── storage_manager.py │ └── utils.py ├── requirements.txt ├── resource_manager │ └── mps_context.py ├── scripts │ ├── profile_attention_bandwidth.sh │ ├── profile_gemm_time.sh │ ├── start_mps.sh │ └── stop_mps.sh ├── setup.py └── utils │ ├── csv_utils.py │ ├── input_factory.py │ ├── profile_utils.py │ └── utils.py ├── benchmarks ├── README.md ├── backend_request_func.py ├── bench_dataset_serving.sh ├── bench_random_serving.sh ├── benchmark_latency.py ├── benchmark_prefix_caching.py ├── benchmark_prioritization.py ├── benchmark_serving.py ├── benchmark_throughput.py ├── cutlass_benchmarks │ ├── w8a8_benchmarks.py │ └── weight_shapes.py ├── dataset_utils.py ├── disagg_benchmarks │ ├── disagg_overhead_benchmark.sh │ ├── disagg_performance_benchmark.sh │ ├── disagg_prefill_proxy_server.py │ ├── round_robin_proxy.py │ └── visualize_benchmark_results.py ├── kernels │ ├── benchmark_aqlm.py │ ├── benchmark_layernorm.py │ ├── benchmark_machete.py │ ├── benchmark_marlin.py │ ├── benchmark_moe.py │ ├── benchmark_paged_attention.py │ ├── benchmark_quant.py │ ├── benchmark_rope.py │ ├── benchmark_shapes.py │ ├── graph_machete_bench.py │ ├── requirements.txt │ └── weight_shapes.py ├── launch_tgi_server.sh ├── overheads │ └── benchmark_hashing.py └── sonnet.txt ├── cmake ├── cpu_extension.cmake ├── hipify.py └── utils.cmake ├── collect_env.py ├── csrc ├── activation_kernels.cu ├── attention │ ├── attention_dtypes.h │ ├── attention_generic.cuh │ ├── attention_kernels.cu │ ├── attention_utils.cuh │ ├── dtype_bfloat16.cuh │ ├── dtype_float16.cuh │ ├── dtype_float32.cuh │ └── dtype_fp8.cuh ├── cache.h ├── cache_kernels.cu ├── core │ ├── exception.hpp │ ├── registration.h │ └── scalar_type.hpp ├── cpu │ ├── activation.cpp │ ├── attention.cpp │ ├── cache.cpp │ ├── cpu_types.hpp │ ├── cpu_types_vsx.hpp │ ├── cpu_types_x86.hpp │ ├── dnnl_helper.hpp │ ├── layernorm.cpp │ ├── pos_encoding.cpp │ ├── quant.cpp │ ├── torch_bindings.cpp │ └── utils.cpp ├── cuda_compat.h ├── cuda_utils.h ├── cuda_utils_kernels.cu ├── custom_all_reduce.cu ├── custom_all_reduce.cuh ├── custom_all_reduce_test.cu ├── cutlass_extensions │ ├── cute_utils.cuh │ ├── torch_utils.hpp │ ├── vllm_collective_builder.cuh │ ├── vllm_custom_types.cuh │ ├── vllm_cutlass_library_extension.py │ └── vllm_numeric_conversion.cuh ├── dispatch_utils.h ├── layernorm_kernels.cu ├── mamba │ ├── causal_conv1d │ │ ├── causal_conv1d.cu │ │ ├── causal_conv1d.h │ │ └── static_switch.h │ └── mamba_ssm │ │ ├── selective_scan.h │ │ ├── selective_scan_fwd.cu │ │ └── static_switch.h ├── moe │ ├── marlin_kernels │ │ ├── marlin_moe_kernel.h │ │ ├── marlin_moe_kernel_ku4.cu │ │ ├── marlin_moe_kernel_ku4.h │ │ ├── marlin_moe_kernel_ku4b8.cu │ │ ├── marlin_moe_kernel_ku4b8.h │ │ ├── marlin_moe_kernel_ku8b128.cu │ │ └── marlin_moe_kernel_ku8b128.h │ ├── marlin_moe_ops.cu │ ├── moe_align_sum_kernels.cu │ ├── moe_ops.h │ ├── topk_softmax_kernels.cu │ └── torch_bindings.cpp ├── ops.h ├── permute_cols.cu ├── pos_encoding_kernels.cu ├── prepare_inputs │ ├── advance_step.cu │ └── advance_step.cuh ├── quantization │ ├── aqlm │ │ └── gemm_kernels.cu │ ├── awq │ │ ├── dequantize.cuh │ │ └── gemm_kernels.cu │ ├── compressed_tensors │ │ └── int8_quant_kernels.cu │ ├── cutlass_w8a8 │ │ ├── Epilogues.md │ │ ├── broadcast_load_epilogue_c2x.hpp │ │ ├── broadcast_load_epilogue_c3x.hpp │ │ ├── common.hpp │ │ ├── scaled_mm_c2x.cu │ │ ├── scaled_mm_c2x.cuh │ │ ├── scaled_mm_c2x_sm75_dispatch.cuh │ │ ├── scaled_mm_c2x_sm80_dispatch.cuh │ │ ├── scaled_mm_c2x_sm89_fp8_dispatch.cuh │ │ ├── scaled_mm_c2x_sm89_int8_dispatch.cuh │ │ ├── scaled_mm_c3x.cu │ │ └── scaled_mm_entry.cu │ ├── fp8 │ │ ├── amd │ │ │ ├── hip_float8.h │ │ │ ├── hip_float8_impl.h │ │ │ └── quant_utils.cuh │ │ ├── common.cu │ │ ├── fp8_marlin.cu │ │ └── nvidia │ │ │ └── quant_utils.cuh │ ├── gguf │ │ ├── dequantize.cuh │ │ ├── ggml-common.h │ │ ├── gguf_kernel.cu │ │ ├── mmq.cuh │ │ ├── mmvq.cuh │ │ └── vecdotq.cuh │ ├── gptq │ │ ├── compat.cuh │ │ ├── matrix_view.cuh │ │ ├── q_gemm.cu │ │ ├── qdq_2.cuh │ │ ├── qdq_3.cuh │ │ ├── qdq_4.cuh │ │ ├── qdq_8.cuh │ │ └── qdq_util.cuh │ ├── gptq_marlin │ │ ├── awq_marlin_repack.cu │ │ ├── gptq_marlin.cu │ │ ├── gptq_marlin_repack.cu │ │ ├── marlin.cuh │ │ └── marlin_dtypes.cuh │ ├── machete │ │ ├── Readme.md │ │ ├── generate.py │ │ ├── machete_collective_builder.cuh │ │ ├── machete_interleaving_utils.cuh │ │ ├── machete_mainloop.cuh │ │ ├── machete_mm_kernel.cuh │ │ ├── machete_mm_launcher.cuh │ │ ├── machete_prepack_kernel.cuh │ │ ├── machete_prepack_launcher.cuh │ │ ├── machete_prepacked_layout.cuh │ │ └── machete_pytorch.cu │ └── marlin │ │ ├── dense │ │ ├── LICENSE │ │ ├── common │ │ │ ├── base.h │ │ │ └── mem.h │ │ └── marlin_cuda_kernel.cu │ │ ├── qqq │ │ └── marlin_qqq_gemm_kernel.cu │ │ └── sparse │ │ ├── LICENSE │ │ ├── common │ │ ├── base.h │ │ ├── mem.h │ │ └── mma.h │ │ └── marlin_24_cuda_kernel.cu ├── rocm │ ├── attention.cu │ ├── ops.h │ └── torch_bindings.cpp └── torch_bindings.cpp ├── docs ├── Makefile ├── README.md ├── make.bat ├── requirements-docs.txt └── source │ ├── _static │ └── custom.js │ ├── _templates │ └── sections │ │ └── header.html │ ├── assets │ ├── dev │ │ └── dockerfile-stages-dependency.png │ ├── kernel │ │ ├── k_vecs.png │ │ ├── key.png │ │ ├── logits_vec.png │ │ ├── q_vecs.png │ │ ├── query.png │ │ ├── v_vec.png │ │ └── value.png │ └── logos │ │ ├── vllm-logo-only-light.png │ │ ├── vllm-logo-text-dark.png │ │ └── vllm-logo-text-light.png │ ├── automatic_prefix_caching │ ├── apc.rst │ └── details.md │ ├── community │ ├── meetups.rst │ └── sponsors.md │ ├── conf.py │ ├── dev │ ├── dockerfile │ │ └── dockerfile.rst │ ├── engine │ │ ├── async_llm_engine.rst │ │ ├── engine_index.rst │ │ └── llm_engine.rst │ ├── input_processing │ │ ├── input_processing_pipeline.rst │ │ └── model_inputs_index.rst │ ├── kernel │ │ └── paged_attention.rst │ ├── multimodal │ │ ├── adding_multimodal_plugin.rst │ │ └── multimodal_index.rst │ ├── offline_inference │ │ ├── llm.rst │ │ ├── llm_inputs.rst │ │ └── offline_index.rst │ ├── pooling_params.rst │ ├── profiling │ │ └── profiling_index.rst │ └── sampling_params.rst │ ├── generate_examples.py │ ├── getting_started │ ├── amd-installation.rst │ ├── cpu-installation.rst │ ├── debugging.rst │ ├── examples │ │ └── examples_index.template.rst │ ├── gaudi-installation.rst │ ├── installation.rst │ ├── neuron-installation.rst │ ├── openvino-installation.rst │ ├── quickstart.rst │ ├── tpu-installation.rst │ └── xpu-installation.rst │ ├── index.rst │ ├── models │ ├── adding_model.rst │ ├── enabling_multimodal_inputs.rst │ ├── engine_args.rst │ ├── lora.rst │ ├── performance.rst │ ├── spec_decode.rst │ ├── supported_models.rst │ └── vlm.rst │ ├── performance_benchmark │ └── benchmarks.rst │ ├── quantization │ ├── auto_awq.rst │ ├── bnb.rst │ ├── fp8.rst │ ├── fp8_e4m3_kvcache.rst │ ├── fp8_e5m2_kvcache.rst │ ├── gguf.rst │ ├── int8.rst │ └── supported_hardware.rst │ └── serving │ ├── compatibility_matrix.rst │ ├── deploying_with_bentoml.rst │ ├── deploying_with_cerebrium.rst │ ├── deploying_with_docker.rst │ ├── deploying_with_dstack.rst │ ├── deploying_with_k8s.rst │ ├── deploying_with_kserve.rst │ ├── deploying_with_lws.rst │ ├── deploying_with_nginx.rst │ ├── deploying_with_triton.rst │ ├── distributed_serving.rst │ ├── env_vars.rst │ ├── faq.rst │ ├── integrations.rst │ ├── metrics.rst │ ├── openai_compatible_server.md │ ├── run_on_sky.rst │ ├── serving_with_langchain.rst │ ├── serving_with_llamaindex.rst │ ├── tensorizer.rst │ └── usage_stats.md ├── evaluation ├── .gitignore ├── README.md ├── assets │ ├── mooncake_13b.png │ └── mooncake_8b.png ├── plots │ ├── dataset_performance │ │ ├── parse_performance_metrics.py │ │ ├── plot_mooncake_llama_2_13b.py │ │ ├── plot_mooncake_llama_3_1_8b.py │ │ └── utils.py │ ├── prepare_datasets │ │ └── dataset_histogram.py │ └── utils │ │ └── plot_utils.py ├── requirements.txt ├── run │ ├── 0_prepare_dataset.sh │ ├── dataset_performance │ │ ├── mooncake_llama_2_13b_adrenaline_evaluation.sh │ │ ├── mooncake_llama_2_13b_disaggregation_evaluation.sh │ │ ├── mooncake_llama_3_1_8b_adrenaline_evaluation.sh │ │ └── mooncake_llama_3_1_8b_disaggregation_evaluation.sh │ └── python │ │ └── prepare_dataset.py └── utils │ ├── client_utils.sh │ ├── evaluation_utils.sh │ ├── output_parser.py │ ├── peak_throughput.py │ └── stable_throughput.py ├── examples ├── adrenaline │ ├── adrenaline_instances_start.sh │ ├── attention_instance_start.sh │ ├── decode_instance_start.sh │ ├── download_model.py │ ├── prefill_instance_start.sh │ ├── proxy_instance_start.sh │ ├── remote_adrenaline_instances_start.sh │ ├── start_clients.sh │ ├── start_demo_servers.sh │ ├── stop_demo_servers.sh │ └── xp1d │ │ ├── adrenaline_instances_start.sh │ │ ├── attention_instance_start.sh │ │ ├── decode_instance_start.sh │ │ ├── prefill_instance_start.sh │ │ └── proxy_instance_start.sh ├── api_client.py ├── aqlm_example.py ├── cpu_offload.py ├── distributed_kv │ ├── decode_instance_start.sh │ ├── disagg_prefill_example.sh │ ├── disagg_prefill_instances_start.sh │ ├── prefill_instance_start.sh │ ├── proxy_instance_start.sh │ ├── remote_disagg_prefill_instances_start.sh │ ├── send_request.sh │ └── xp1d │ │ ├── decode_instance_start.sh │ │ ├── disagg_instances_start.sh │ │ ├── prefill_instance_start.sh │ │ └── proxy_instance_start.sh ├── distributed_utils │ └── utils.sh ├── florence2_inference.py ├── fp8 │ ├── README.md │ ├── extract_scales.py │ └── quantizer │ │ ├── README.md │ │ └── quantize.py ├── gguf_inference.py ├── gradio_openai_chatbot_webserver.py ├── gradio_webserver.py ├── llm_engine_example.py ├── logging_configuration.md ├── lora_with_quantization_inference.py ├── multilora_inference.py ├── offline_chat_with_tools.py ├── offline_inference.py ├── offline_inference_arctic.py ├── offline_inference_audio_language.py ├── offline_inference_chat.py ├── offline_inference_distributed.py ├── offline_inference_embedding.py ├── offline_inference_encoder_decoder.py ├── offline_inference_mlpspeculator.py ├── offline_inference_neuron.py ├── offline_inference_neuron_int8_quantization.py ├── offline_inference_openai.md ├── offline_inference_pixtral.py ├── offline_inference_tpu.py ├── offline_inference_vision_language.py ├── offline_inference_vision_language_embedding.py ├── offline_inference_vision_language_multi_image.py ├── offline_inference_with_prefix.py ├── offline_inference_with_profiler.py ├── offline_profile.py ├── openai_chat_completion_client.py ├── openai_chat_completion_client_for_multimodal.py ├── openai_chat_completion_client_with_tools.py ├── openai_chat_embedding_client_for_multimodal.py ├── openai_completion_client.py ├── openai_embedding_client.py ├── openai_example_batch.jsonl ├── production_monitoring │ ├── Otel.md │ ├── README.md │ ├── docker-compose.yaml │ ├── dummy_client.py │ ├── grafana.json │ └── prometheus.yaml ├── run_cluster.sh ├── save_sharded_state.py ├── template_alpaca.jinja ├── template_baichuan.jinja ├── template_blip2.jinja ├── template_chatglm.jinja ├── template_chatglm2.jinja ├── template_chatml.jinja ├── template_falcon.jinja ├── template_falcon_180b.jinja ├── template_inkbot.jinja ├── template_llava.jinja ├── template_vlm2vec.jinja ├── tensorize_vllm_model.py ├── tool_chat_template_granite_20b_fc.jinja ├── tool_chat_template_hermes.jinja ├── tool_chat_template_internlm2_tool.jinja ├── tool_chat_template_llama3.1_json.jinja ├── tool_chat_template_llama3.2_json.jinja ├── tool_chat_template_mistral.jinja └── tool_chat_template_mistral_parallel.jinja ├── find_cuda_init.py ├── format.sh ├── pyproject.toml ├── python_only_dev.py ├── requirements-build.txt ├── requirements-common.txt ├── requirements-cpu.txt ├── requirements-cuda.txt ├── requirements-dev.txt ├── requirements-hpu.txt ├── requirements-lint.txt ├── requirements-neuron.txt ├── requirements-openvino.txt ├── requirements-rocm.txt ├── requirements-test.in ├── requirements-test.txt ├── requirements-tpu.txt ├── requirements-xpu.txt ├── setup.py ├── tests ├── __init__.py ├── async_engine │ ├── __init__.py │ ├── api_server_async_engine.py │ ├── test_api_server.py │ ├── test_async_llm_engine.py │ └── test_request_tracker.py ├── basic_correctness │ ├── __init__.py │ ├── test_basic_correctness.py │ ├── test_chunked_prefill.py │ ├── test_cpu_offload.py │ └── test_preemption.py ├── compile │ ├── __init__.py │ ├── piecewise │ │ ├── __init__.py │ │ ├── piecewise_compilation_config.json │ │ ├── test_simple.py │ │ └── test_toy_llama.py │ ├── test_basic_correctness.py │ ├── test_full_graph.py │ ├── test_wrapper.py │ └── utils.py ├── conftest.py ├── core │ ├── __init__.py │ ├── block │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── e2e │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_correctness.py │ │ │ └── test_correctness_sliding_window.py │ │ ├── test_block_manager.py │ │ ├── test_block_table.py │ │ ├── test_common.py │ │ ├── test_cpu_gpu_block_allocator.py │ │ ├── test_naive_block.py │ │ └── test_prefix_caching_block.py │ ├── test_chunked_prefill_scheduler.py │ ├── test_num_computed_tokens_update.py │ ├── test_scheduler.py │ ├── test_scheduler_encoder_decoder.py │ ├── test_serialization.py │ └── utils.py ├── data │ └── test_config.yaml ├── distributed │ ├── __init__.py │ ├── test_ca_buffer_sharing.py │ ├── test_comm_ops.py │ ├── test_custom_all_reduce.py │ ├── test_distributed_oot.py │ ├── test_multi_node_assignment.py │ ├── test_pipeline_parallel.py │ ├── test_pipeline_partition.py │ ├── test_pp_cudagraph.py │ ├── test_pynccl.py │ ├── test_same_node.py │ ├── test_shm_broadcast.py │ └── test_utils.py ├── encoder_decoder │ ├── __init__.py │ └── test_e2e_correctness.py ├── engine │ ├── __init__.py │ ├── output_processor │ │ ├── __init__.py │ │ ├── test_multi_step.py │ │ └── test_stop_checker.py │ ├── test_arg_utils.py │ ├── test_computed_prefix_blocks.py │ ├── test_custom_executor.py │ ├── test_detokenization.py │ ├── test_multiproc_workers.py │ ├── test_short_mm_context.py │ ├── test_skip_tokenizer_init.py │ ├── test_stop_reason.py │ └── test_stop_strings.py ├── entrypoints │ ├── __init__.py │ ├── conftest.py │ ├── llm │ │ ├── __init__.py │ │ ├── test_chat.py │ │ ├── test_encode.py │ │ ├── test_generate.py │ │ ├── test_generate_multiple_loras.py │ │ ├── test_guided_generate.py │ │ ├── test_init.py │ │ ├── test_lazy_outlines.py │ │ └── test_prompt_validation.py │ ├── offline_mode │ │ ├── __init__.py │ │ └── test_offline_mode.py │ ├── openai │ │ ├── __init__.py │ │ ├── test_accuracy.py │ │ ├── test_audio.py │ │ ├── test_basic.py │ │ ├── test_chat.py │ │ ├── test_chat_template.py │ │ ├── test_chunked_prompt.py │ │ ├── test_cli_args.py │ │ ├── test_completion.py │ │ ├── test_embedding.py │ │ ├── test_encoder_decoder.py │ │ ├── test_lora_lineage.py │ │ ├── test_metrics.py │ │ ├── test_models.py │ │ ├── test_oot_registration.py │ │ ├── test_prompt_validation.py │ │ ├── test_return_tokens_as_ids.py │ │ ├── test_run_batch.py │ │ ├── test_serving_chat.py │ │ ├── test_serving_engine.py │ │ ├── test_shutdown.py │ │ ├── test_tokenization.py │ │ ├── test_vision.py │ │ └── test_vision_embedding.py │ └── test_chat_utils.py ├── fp8_kv │ ├── llama2-70b-fp8-kv │ │ └── kv_cache_scales.json │ └── llama2-7b-fp8-kv │ │ └── kv_cache_scales.json ├── kernels │ ├── __init__.py │ ├── allclose_default.py │ ├── conftest.py │ ├── quant_utils.py │ ├── test_activation.py │ ├── test_aqlm.py │ ├── test_attention.py │ ├── test_attention_selector.py │ ├── test_awq.py │ ├── test_awq_marlin.py │ ├── test_awq_triton.py │ ├── test_blocksparse_attention.py │ ├── test_cache.py │ ├── test_causal_conv1d.py │ ├── test_cutlass.py │ ├── test_encoder_decoder_attn.py │ ├── test_flash_attn.py │ ├── test_flashinfer.py │ ├── test_fp8_quant.py │ ├── test_ggml.py │ ├── test_gguf.py │ ├── test_gptq.py │ ├── test_int8_quant.py │ ├── test_layernorm.py │ ├── test_machete_gemm.py │ ├── test_mamba_ssm.py │ ├── test_marlin_gemm.py │ ├── test_moe.py │ ├── test_permute_cols.py │ ├── test_pos_encoding.py │ ├── test_prefix_prefill.py │ ├── test_rotary_embedding.py │ ├── test_utils.py │ └── utils.py ├── kv_transfer │ ├── disagg_test.py │ ├── module_test.py │ ├── test_lookup_buffer.py │ └── test_send_recv.py ├── lora │ ├── __init__.py │ ├── conftest.py │ ├── data │ │ ├── __init__.py │ │ └── long_context_test_data.py │ ├── test_baichuan.py │ ├── test_chatglm3.py │ ├── test_gemma.py │ ├── test_layers.py │ ├── test_llama.py │ ├── test_long_context.py │ ├── test_lora_checkpoints.py │ ├── test_lora_huggingface.py │ ├── test_lora_manager.py │ ├── test_minicpmv.py │ ├── test_minicpmv_tp.py │ ├── test_mixtral.py │ ├── test_phi.py │ ├── test_punica_sizes.py │ ├── test_punica_variation.py │ ├── test_quant_model.py │ ├── test_tokenizer_group.py │ ├── test_utils.py │ ├── test_worker.py │ └── utils.py ├── metrics │ ├── __init__.py │ └── test_metrics.py ├── model_executor │ ├── __init__.py │ ├── conftest.py │ ├── test_enabled_custom_ops.py │ ├── test_guided_processors.py │ └── weight_utils.py ├── models │ ├── __init__.py │ ├── decoder_only │ │ ├── __init__.py │ │ ├── audio_language │ │ │ ├── __init__.py │ │ │ └── test_ultravox.py │ │ ├── language │ │ │ ├── __init__.py │ │ │ ├── test_aqlm.py │ │ │ ├── test_fp8.py │ │ │ ├── test_gguf.py │ │ │ ├── test_gptq_marlin.py │ │ │ ├── test_gptq_marlin_24.py │ │ │ ├── test_granite.py │ │ │ ├── test_granitemoe.py │ │ │ ├── test_jamba.py │ │ │ ├── test_mamba.py │ │ │ ├── test_mistral.py │ │ │ ├── test_modelopt.py │ │ │ ├── test_models.py │ │ │ └── test_phimoe.py │ │ └── vision_language │ │ │ ├── __init__.py │ │ │ ├── mm_processor_kwargs │ │ │ ├── __init__.py │ │ │ ├── test_llava_next.py │ │ │ ├── test_phi3v.py │ │ │ ├── test_qwen.py │ │ │ └── test_qwen2_vl.py │ │ │ ├── test_h2ovl.py │ │ │ ├── test_intern_vit.py │ │ │ ├── test_internvl.py │ │ │ ├── test_models.py │ │ │ ├── test_phi3v.py │ │ │ ├── test_pixtral.py │ │ │ └── vlm_utils │ │ │ ├── __init__.py │ │ │ ├── builders.py │ │ │ ├── case_filtering.py │ │ │ ├── core.py │ │ │ ├── custom_inputs.py │ │ │ ├── model_utils.py │ │ │ ├── runners.py │ │ │ └── types.py │ ├── embedding │ │ ├── __init__.py │ │ ├── language │ │ │ ├── __init__.py │ │ │ ├── test_cls_models.py │ │ │ └── test_embedding.py │ │ ├── utils.py │ │ └── vision_language │ │ │ ├── __init__.py │ │ │ ├── test_llava_next.py │ │ │ └── test_phi3v.py │ ├── encoder_decoder │ │ ├── __init__.py │ │ ├── language │ │ │ ├── __init__.py │ │ │ └── test_bart.py │ │ └── vision_language │ │ │ ├── __init__.py │ │ │ ├── test_broadcast.py │ │ │ ├── test_florence2.py │ │ │ └── test_mllama.py │ ├── fixtures │ │ ├── pixtral_chat.json │ │ └── pixtral_chat_engine.json │ ├── test_oot_registration.py │ ├── test_registry.py │ └── utils.py ├── mq_llm_engine │ ├── __init__.py │ ├── test_abort.py │ ├── test_error_handling.py │ ├── test_load.py │ └── utils.py ├── multi_step │ ├── __init__.py │ ├── test_correctness_async_llm.py │ └── test_correctness_llm.py ├── multimodal │ ├── __init__.py │ ├── test_base.py │ ├── test_mapper.py │ ├── test_processor_kwargs.py │ └── test_utils.py ├── plugins │ └── vllm_add_dummy_model │ │ ├── setup.py │ │ └── vllm_add_dummy_model │ │ ├── __init__.py │ │ ├── my_gemma_embedding.py │ │ ├── my_llava.py │ │ └── my_opt.py ├── prefix_caching │ ├── __init__.py │ ├── test_disable_sliding_window.py │ └── test_prefix_caching.py ├── prompt_adapter │ ├── test_bloom.py │ ├── test_multi_adapter_inference.py │ └── test_pa_lora.py ├── prompts │ ├── example.txt │ └── summary.txt ├── quantization │ ├── __init__.py │ ├── test_bitsandbytes.py │ ├── test_compressed_tensors.py │ ├── test_configs.py │ ├── test_cpu_offload.py │ ├── test_experts_int8.py │ ├── test_fp8.py │ ├── test_ipex_quant.py │ ├── test_lm_head.py │ └── utils.py ├── samplers │ ├── __init__.py │ ├── test_beam_search.py │ ├── test_ignore_eos.py │ ├── test_logits_processor.py │ ├── test_logprobs.py │ ├── test_no_bad_words.py │ ├── test_ranks.py │ ├── test_rejection_sampler.py │ ├── test_sampler.py │ ├── test_seeded_generate.py │ └── test_typical_acceptance_sampler.py ├── spec_decode │ ├── __init__.py │ ├── e2e │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_compatibility.py │ │ ├── test_eagle_correctness.py │ │ ├── test_integration.py │ │ ├── test_integration_dist_tp2.py │ │ ├── test_integration_dist_tp4.py │ │ ├── test_logprobs.py │ │ ├── test_medusa_correctness.py │ │ ├── test_mlp_correctness.py │ │ ├── test_multistep_correctness.py │ │ ├── test_ngram_correctness.py │ │ └── test_seed.py │ ├── test_batch_expansion.py │ ├── test_dynamic_spec_decode.py │ ├── test_metrics.py │ ├── test_multi_step_worker.py │ ├── test_ngram_worker.py │ ├── test_scorer.py │ ├── test_spec_decode_worker.py │ ├── test_utils.py │ └── utils.py ├── tensorizer_loader │ ├── __init__.py │ ├── conftest.py │ └── test_tensorizer.py ├── test_cache_block_hashing.py ├── test_config.py ├── test_embedded_commit.py ├── test_inputs.py ├── test_logger.py ├── test_logits_processor.py ├── test_regression.py ├── test_sampling_params.py ├── test_scalartype.py ├── test_sequence.py ├── test_sharded_state_loader.py ├── test_utils.py ├── tokenization │ ├── __init__.py │ ├── test_cached_tokenizer.py │ ├── test_detokenize.py │ ├── test_get_eos.py │ ├── test_tokenizer.py │ └── test_tokenizer_group.py ├── tool_use │ ├── __init__.py │ ├── conftest.py │ ├── test_chat_completion_request_validations.py │ ├── test_chat_completions.py │ ├── test_jamba_tool_parser.py │ ├── test_parallel_tool_calls.py │ ├── test_tool_calls.py │ └── utils.py ├── tpu │ ├── __init__.py │ ├── test_compilation.py │ └── test_custom_dispatcher.py ├── tracing │ ├── __init__.py │ └── test_tracing.py ├── utils.py ├── weight_loading │ ├── models-large.txt │ ├── models.txt │ ├── run_model_weight_loading_test.sh │ └── test_weight_loading.py └── worker │ ├── __init__.py │ ├── test_encoder_decoder_model_runner.py │ ├── test_model_input.py │ ├── test_model_runner.py │ ├── test_profile.py │ └── test_swap.py ├── tools ├── actionlint.sh ├── check_repo.sh ├── mypy.sh ├── profiler │ ├── print_layerwise_table.py │ └── visualize_layerwise_profile.py └── report_build_time_ninja.py ├── use_existing_torch.py ├── vllm-README.md └── vllm ├── __init__.py ├── _custom_ops.py ├── _ipex_ops.py ├── adapter_commons ├── __init__.py ├── layers.py ├── models.py ├── request.py ├── utils.py └── worker_manager.py ├── assets ├── __init__.py ├── audio.py ├── base.py ├── image.py └── video.py ├── attention ├── __init__.py ├── backends │ ├── __init__.py │ ├── abstract.py │ ├── blocksparse_attn.py │ ├── flash_attn.py │ ├── flashinfer.py │ ├── hpu_attn.py │ ├── ipex_attn.py │ ├── openvino.py │ ├── pallas.py │ ├── placeholder_attn.py │ ├── rocm_flash_attn.py │ ├── torch_sdpa.py │ ├── utils.py │ └── xformers.py ├── layer.py ├── ops │ ├── __init__.py │ ├── blocksparse_attention │ │ ├── __init__.py │ │ ├── blocksparse_attention_kernel.py │ │ ├── interface.py │ │ └── utils.py │ ├── hpu_paged_attn.py │ ├── ipex_attn.py │ ├── paged_attn.py │ ├── prefix_prefill.py │ └── triton_flash_attention.py └── selector.py ├── beam_search.py ├── block.py ├── compilation ├── __init__.py ├── backends.py ├── compile_context.py ├── config.py ├── counter.py ├── decorators.py ├── levels.py └── wrapper.py ├── config.py ├── connections.py ├── core ├── __init__.py ├── block │ ├── __init__.py │ ├── block_table.py │ ├── common.py │ ├── cpu_gpu_block_allocator.py │ ├── interfaces.py │ ├── naive_block.py │ ├── prefix_caching_block.py │ └── utils.py ├── block_manager.py ├── evictor.py ├── interfaces.py ├── placeholder_block_space_manager.py └── scheduler.py ├── distributed ├── __init__.py ├── communication_op.py ├── device_communicators │ ├── __init__.py │ ├── cuda_wrapper.py │ ├── custom_all_reduce.py │ ├── custom_all_reduce_utils.py │ ├── hpu_communicator.py │ ├── pynccl.py │ ├── pynccl_wrapper.py │ ├── shm_broadcast.py │ └── tpu_communicator.py ├── kv_transfer │ ├── __init__.py │ ├── agent │ │ ├── kv_transfer.py │ │ ├── kv_transfer_in_device.py │ │ ├── offload_exec_agent.py │ │ └── utils.py │ ├── kv_lookup_buffer │ │ ├── __init__.py │ │ ├── base.py │ │ ├── ipc_buffer.py │ │ └── simple_buffer.py │ ├── kv_pipe │ │ ├── __init__.py │ │ ├── base.py │ │ ├── offload_comm.py │ │ ├── py_pipe.py │ │ ├── torch_distributed_pipe.py │ │ └── torch_within_device_pipe.py │ └── vllm_adapter.py ├── parallel_state.py └── utils.py ├── engine ├── __init__.py ├── arg_utils.py ├── async_llm_engine.py ├── async_timeout.py ├── llm_engine.py ├── metrics.py ├── metrics_types.py ├── multiprocessing │ ├── __init__.py │ ├── client.py │ └── engine.py ├── output_processor │ ├── __init__.py │ ├── interfaces.py │ ├── multi_step.py │ ├── single_step.py │ ├── stop_checker.py │ └── util.py └── protocol.py ├── entrypoints ├── __init__.py ├── api_server.py ├── chat_utils.py ├── launcher.py ├── llm.py ├── logger.py └── openai │ ├── __init__.py │ ├── api_server.py │ ├── cli_args.py │ ├── logits_processors.py │ ├── protocol.py │ ├── run_batch.py │ ├── serving_chat.py │ ├── serving_completion.py │ ├── serving_embedding.py │ ├── serving_engine.py │ ├── serving_tokenization.py │ └── tool_parsers │ ├── __init__.py │ ├── abstract_tool_parser.py │ ├── granite_20b_fc_tool_parser.py │ ├── hermes_tool_parser.py │ ├── internlm2_tool_parser.py │ ├── jamba_tool_parser.py │ ├── llama_tool_parser.py │ ├── mistral_tool_parser.py │ └── utils.py ├── envs.py ├── executor ├── __init__.py ├── cpu_executor.py ├── distributed_gpu_executor.py ├── executor_base.py ├── gpu_executor.py ├── hpu_executor.py ├── msgspec_utils.py ├── multiproc_gpu_executor.py ├── multiproc_worker_utils.py ├── multiproc_xpu_executor.py ├── neuron_executor.py ├── openvino_executor.py ├── ray_gpu_executor.py ├── ray_hpu_executor.py ├── ray_tpu_executor.py ├── ray_utils.py ├── ray_xpu_executor.py ├── tpu_executor.py └── xpu_executor.py ├── forward_context.py ├── inputs ├── __init__.py ├── data.py ├── parse.py ├── preprocess.py └── registry.py ├── logger.py ├── logging ├── __init__.py └── formatter.py ├── logits_process.py ├── lora ├── __init__.py ├── fully_sharded_layers.py ├── layers.py ├── lora.py ├── models.py ├── ops │ ├── __init__.py │ ├── bgmv_expand.py │ ├── bgmv_expand_slice.py │ ├── bgmv_shrink.py │ ├── sgmv_expand.py │ ├── sgmv_expand_slice.py │ ├── sgmv_shrink.py │ └── utils.py ├── punica.py ├── request.py ├── utils.py └── worker_manager.py ├── model_executor ├── __init__.py ├── custom_op.py ├── guided_decoding │ ├── __init__.py │ ├── guided_fields.py │ ├── lm_format_enforcer_decoding.py │ ├── outlines_decoding.py │ └── outlines_logits_processors.py ├── layers │ ├── __init__.py │ ├── activation.py │ ├── fused_moe │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ ├── E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ ├── E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ ├── E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=14336,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=1792,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=3584,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_L40S.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=7168,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ └── README │ │ ├── fused_marlin_moe.py │ │ ├── fused_moe.py │ │ ├── layer.py │ │ └── moe_pallas.py │ ├── layernorm.py │ ├── linear.py │ ├── logits_processor.py │ ├── mamba │ │ ├── __init__.py │ │ ├── mamba_mixer.py │ │ └── ops │ │ │ ├── __init__.py │ │ │ ├── causal_conv1d.py │ │ │ └── mamba_ssm.py │ ├── pooler.py │ ├── quantization │ │ ├── __init__.py │ │ ├── aqlm.py │ │ ├── awq.py │ │ ├── awq_marlin.py │ │ ├── awq_triton.py │ │ ├── base_config.py │ │ ├── bitsandbytes.py │ │ ├── compressed_tensors │ │ │ ├── __init__.py │ │ │ ├── compressed_tensors.py │ │ │ ├── compressed_tensors_moe.py │ │ │ ├── schemes │ │ │ │ ├── __init__.py │ │ │ │ ├── compressed_tensors_scheme.py │ │ │ │ ├── compressed_tensors_w4a16_24.py │ │ │ │ ├── compressed_tensors_w8a16_fp8.py │ │ │ │ ├── compressed_tensors_w8a8_fp8.py │ │ │ │ ├── compressed_tensors_w8a8_int8.py │ │ │ │ └── compressed_tensors_wNa16.py │ │ │ └── utils.py │ │ ├── deepspeedfp.py │ │ ├── experts_int8.py │ │ ├── fbgemm_fp8.py │ │ ├── fp8.py │ │ ├── gguf.py │ │ ├── gptq.py │ │ ├── gptq_marlin.py │ │ ├── gptq_marlin_24.py │ │ ├── ipex_quant.py │ │ ├── kernels │ │ │ ├── MPLinearKernel.py │ │ │ ├── __init__.py │ │ │ ├── exllama.py │ │ │ ├── machete.py │ │ │ └── marlin.py │ │ ├── kv_cache.py │ │ ├── marlin.py │ │ ├── modelopt.py │ │ ├── neuron_quant.py │ │ ├── qqq.py │ │ ├── schema.py │ │ ├── tpu_int8.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── layer_utils.py │ │ │ ├── machete_utils.py │ │ │ ├── marlin_utils.py │ │ │ ├── marlin_utils_fp8.py │ │ │ ├── marlin_utils_test.py │ │ │ ├── marlin_utils_test_24.py │ │ │ ├── marlin_utils_test_qqq.py │ │ │ ├── quant_utils.py │ │ │ └── w8a8_utils.py │ ├── rejection_sampler.py │ ├── resampler.py │ ├── rotary_embedding.py │ ├── sampler.py │ ├── spec_decode_base_sampler.py │ ├── typical_acceptance_sampler.py │ └── vocab_parallel_embedding.py ├── model_loader │ ├── __init__.py │ ├── loader.py │ ├── neuron.py │ ├── openvino.py │ ├── tensorizer.py │ ├── utils.py │ └── weight_utils.py ├── models │ ├── __init__.py │ ├── arctic.py │ ├── baichuan.py │ ├── bart.py │ ├── bert.py │ ├── blip.py │ ├── blip2.py │ ├── bloom.py │ ├── chameleon.py │ ├── chatglm.py │ ├── clip.py │ ├── commandr.py │ ├── dbrx.py │ ├── decilm.py │ ├── deepseek.py │ ├── deepseek_v2.py │ ├── eagle.py │ ├── exaone.py │ ├── falcon.py │ ├── florence2.py │ ├── fuyu.py │ ├── gemma.py │ ├── gemma2.py │ ├── glm4_vision_encoder.py │ ├── gpt2.py │ ├── gpt_bigcode.py │ ├── gpt_j.py │ ├── gpt_neox.py │ ├── granite.py │ ├── granitemoe.py │ ├── h2ovl.py │ ├── idefics2_vision_model.py │ ├── idefics3.py │ ├── interfaces.py │ ├── interfaces_base.py │ ├── intern_vit.py │ ├── internlm2.py │ ├── internlm2_ve.py │ ├── internvl.py │ ├── jais.py │ ├── jamba.py │ ├── llama.py │ ├── llava.py │ ├── llava_next.py │ ├── llava_next_video.py │ ├── llava_onevision.py │ ├── mamba.py │ ├── mamba_cache.py │ ├── medusa.py │ ├── minicpm.py │ ├── minicpm3.py │ ├── minicpmv.py │ ├── mixtral.py │ ├── mixtral_quant.py │ ├── mllama.py │ ├── mlp_speculator.py │ ├── module_mapping.py │ ├── molmo.py │ ├── mpt.py │ ├── nemotron.py │ ├── nvlm_d.py │ ├── olmo.py │ ├── olmoe.py │ ├── opt.py │ ├── orion.py │ ├── paligemma.py │ ├── persimmon.py │ ├── phi.py │ ├── phi3.py │ ├── phi3_small.py │ ├── phi3v.py │ ├── phimoe.py │ ├── pixtral.py │ ├── qwen.py │ ├── qwen2.py │ ├── qwen2_audio.py │ ├── qwen2_cls.py │ ├── qwen2_moe.py │ ├── qwen2_rm.py │ ├── qwen2_vl.py │ ├── registry.py │ ├── siglip.py │ ├── solar.py │ ├── stablelm.py │ ├── starcoder2.py │ ├── ultravox.py │ ├── utils.py │ └── xverse.py ├── parameter.py ├── pooling_metadata.py ├── sampling_metadata.py └── utils.py ├── multimodal ├── __init__.py ├── audio.py ├── base.py ├── image.py ├── registry.py ├── utils.py └── video.py ├── outputs.py ├── platforms ├── __init__.py ├── cpu.py ├── cuda.py ├── hpu.py ├── interface.py ├── neuron.py ├── openvino.py ├── rocm.py ├── tpu.py └── xpu.py ├── plugins └── __init__.py ├── pooling_params.py ├── profiler ├── __init__.py ├── layerwise_profile.py └── utils.py ├── prompt_adapter ├── __init__.py ├── layers.py ├── models.py ├── request.py ├── utils.py └── worker_manager.py ├── py.typed ├── sampling_params.py ├── scalar_type.py ├── scripts.py ├── sequence.py ├── spec_decode ├── __init__.py ├── batch_expansion.py ├── draft_model_runner.py ├── interfaces.py ├── medusa_worker.py ├── metrics.py ├── mlp_speculator_worker.py ├── mqa_scorer.py ├── multi_step_worker.py ├── ngram_worker.py ├── proposer_worker_base.py ├── smaller_tp_proposer_worker.py ├── spec_decode_worker.py ├── target_model_runner.py ├── top1_proposer.py └── util.py ├── tracing.py ├── transformers_utils ├── __init__.py ├── config.py ├── configs │ ├── __init__.py │ ├── arctic.py │ ├── chatglm.py │ ├── dbrx.py │ ├── eagle.py │ ├── exaone.py │ ├── falcon.py │ ├── h2ovl.py │ ├── internvl.py │ ├── jais.py │ ├── medusa.py │ ├── mllama.py │ ├── mlp_speculator.py │ ├── mpt.py │ ├── nemotron.py │ ├── nvlm_d.py │ ├── solar.py │ └── ultravox.py ├── detokenizer.py ├── detokenizer_utils.py ├── processor.py ├── tokenizer.py ├── tokenizer_group │ ├── __init__.py │ ├── base_tokenizer_group.py │ ├── ray_tokenizer_group.py │ └── tokenizer_group.py ├── tokenizers │ ├── __init__.py │ └── mistral.py └── utils.py ├── triton_utils ├── __init__.py ├── custom_cache_manager.py ├── importing.py └── libentry.py ├── usage ├── __init__.py └── usage_lib.py ├── utils.py ├── v1 ├── attention │ ├── __init__.py │ └── backends │ │ ├── __init__.py │ │ └── flash_attn.py ├── core │ ├── __init__.py │ ├── kv_cache_manager.py │ └── scheduler.py ├── engine │ ├── __init__.py │ └── llm_engine.py ├── executor │ ├── __init__.py │ └── gpu_executor.py ├── outputs.py ├── request.py ├── sample │ ├── __init__.py │ ├── metadata.py │ └── sampler.py ├── tokenizer │ ├── __init__.py │ └── detokenizer.py └── worker │ ├── __init__.py │ ├── gpu_model_runner.py │ └── gpu_worker.py ├── version.py ├── vllm_flash_attn └── .gitkeep └── worker ├── __init__.py ├── cache_engine.py ├── cpu_enc_dec_model_runner.py ├── cpu_model_runner.py ├── cpu_worker.py ├── embedding_model_runner.py ├── enc_dec_model_runner.py ├── hpu_model_runner.py ├── hpu_worker.py ├── model_runner.py ├── model_runner_base.py ├── multi_step_model_runner.py ├── multi_step_tpu_worker.py ├── multi_step_worker.py ├── neuron_model_runner.py ├── neuron_worker.py ├── openvino_model_runner.py ├── openvino_worker.py ├── tpu_model_runner.py ├── tpu_worker.py ├── utils.py ├── worker.py ├── worker_base.py ├── xpu_model_runner.py └── xpu_worker.py /.buildkite/check-wheel-size.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/check-wheel-size.py -------------------------------------------------------------------------------- /.buildkite/lm-eval-harness/run-tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/lm-eval-harness/run-tests.sh -------------------------------------------------------------------------------- /.buildkite/nightly-benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/nightly-benchmarks/README.md -------------------------------------------------------------------------------- /.buildkite/release-pipeline.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/release-pipeline.yaml -------------------------------------------------------------------------------- /.buildkite/run-amd-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/run-amd-test.sh -------------------------------------------------------------------------------- /.buildkite/run-benchmarks.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/run-benchmarks.sh -------------------------------------------------------------------------------- /.buildkite/run-cpu-test-ppc64le.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/run-cpu-test-ppc64le.sh -------------------------------------------------------------------------------- /.buildkite/run-cpu-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/run-cpu-test.sh -------------------------------------------------------------------------------- /.buildkite/run-multi-node-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/run-multi-node-test.sh -------------------------------------------------------------------------------- /.buildkite/run-neuron-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/run-neuron-test.sh -------------------------------------------------------------------------------- /.buildkite/run-openvino-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/run-openvino-test.sh -------------------------------------------------------------------------------- /.buildkite/run-tpu-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/run-tpu-test.sh -------------------------------------------------------------------------------- /.buildkite/run-xpu-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/run-xpu-test.sh -------------------------------------------------------------------------------- /.buildkite/test-pipeline.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.buildkite/test-pipeline.yaml -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.clang-format -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.dockerignore -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/FUNDING.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/300-usage.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/ISSUE_TEMPLATE/300-usage.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/400-bug report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/ISSUE_TEMPLATE/400-bug report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/600-new model.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/ISSUE_TEMPLATE/600-new model.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/750-RFC.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/ISSUE_TEMPLATE/750-RFC.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/mergify.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/mergify.yml -------------------------------------------------------------------------------- /.github/workflows/actionlint.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/actionlint.yml -------------------------------------------------------------------------------- /.github/workflows/add_label_automerge.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/add_label_automerge.yml -------------------------------------------------------------------------------- /.github/workflows/clang-format.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/clang-format.yml -------------------------------------------------------------------------------- /.github/workflows/matchers/actionlint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/matchers/actionlint.json -------------------------------------------------------------------------------- /.github/workflows/matchers/mypy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/matchers/mypy.json -------------------------------------------------------------------------------- /.github/workflows/matchers/ruff.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/matchers/ruff.json -------------------------------------------------------------------------------- /.github/workflows/mypy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/mypy.yaml -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/publish.yml -------------------------------------------------------------------------------- /.github/workflows/reminder_comment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/reminder_comment.yml -------------------------------------------------------------------------------- /.github/workflows/ruff.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/ruff.yml -------------------------------------------------------------------------------- /.github/workflows/scripts/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/scripts/build.sh -------------------------------------------------------------------------------- /.github/workflows/scripts/cuda-install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/scripts/cuda-install.sh -------------------------------------------------------------------------------- /.github/workflows/scripts/env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/scripts/env.sh -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/stale.yml -------------------------------------------------------------------------------- /.github/workflows/yapf.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.github/workflows/yapf.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.gitignore -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /.yapfignore: -------------------------------------------------------------------------------- 1 | collect_env.py 2 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /DCO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/DCO -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/Dockerfile -------------------------------------------------------------------------------- /Dockerfile.cpu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/Dockerfile.cpu -------------------------------------------------------------------------------- /Dockerfile.hpu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/Dockerfile.hpu -------------------------------------------------------------------------------- /Dockerfile.neuron: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/Dockerfile.neuron -------------------------------------------------------------------------------- /Dockerfile.openvino: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/Dockerfile.openvino -------------------------------------------------------------------------------- /Dockerfile.ppc64le: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/Dockerfile.ppc64le -------------------------------------------------------------------------------- /Dockerfile.rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/Dockerfile.rocm -------------------------------------------------------------------------------- /Dockerfile.tpu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/Dockerfile.tpu -------------------------------------------------------------------------------- /Dockerfile.xpu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/Dockerfile.xpu -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/SECURITY.md -------------------------------------------------------------------------------- /adrenaline/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/Makefile -------------------------------------------------------------------------------- /adrenaline/assets/PD_disaggregationn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/assets/PD_disaggregationn.png -------------------------------------------------------------------------------- /adrenaline/assets/adrenaline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/assets/adrenaline.png -------------------------------------------------------------------------------- /adrenaline/attention/layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/attention/layer.py -------------------------------------------------------------------------------- /adrenaline/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/config.py -------------------------------------------------------------------------------- /adrenaline/csrc/ipc_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/csrc/ipc_utils.cpp -------------------------------------------------------------------------------- /adrenaline/csrc/torch_bindings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/csrc/torch_bindings.cpp -------------------------------------------------------------------------------- /adrenaline/custom_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/custom_ops.py -------------------------------------------------------------------------------- /adrenaline/include/cuda_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/include/cuda_utils.h -------------------------------------------------------------------------------- /adrenaline/include/ipc_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/include/ipc_utils.h -------------------------------------------------------------------------------- /adrenaline/model_loader/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/model_loader/model_loader.py -------------------------------------------------------------------------------- /adrenaline/model_loader/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/model_loader/models/llama.py -------------------------------------------------------------------------------- /adrenaline/model_runner/attn_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/model_runner/attn_runner.py -------------------------------------------------------------------------------- /adrenaline/model_runner/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/model_runner/model_runner.py -------------------------------------------------------------------------------- /adrenaline/profiler/delta_profiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/profiler/delta_profiler.py -------------------------------------------------------------------------------- /adrenaline/proxy/load_estimator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/proxy/load_estimator.py -------------------------------------------------------------------------------- /adrenaline/proxy/request_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/proxy/request_dispatcher.py -------------------------------------------------------------------------------- /adrenaline/proxy/request_tracer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/proxy/request_tracer.py -------------------------------------------------------------------------------- /adrenaline/proxy/storage_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/proxy/storage_manager.py -------------------------------------------------------------------------------- /adrenaline/proxy/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/proxy/utils.py -------------------------------------------------------------------------------- /adrenaline/requirements.txt: -------------------------------------------------------------------------------- 1 | quart 2 | -------------------------------------------------------------------------------- /adrenaline/resource_manager/mps_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/resource_manager/mps_context.py -------------------------------------------------------------------------------- /adrenaline/scripts/profile_gemm_time.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/scripts/profile_gemm_time.sh -------------------------------------------------------------------------------- /adrenaline/scripts/start_mps.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/scripts/start_mps.sh -------------------------------------------------------------------------------- /adrenaline/scripts/stop_mps.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/scripts/stop_mps.sh -------------------------------------------------------------------------------- /adrenaline/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/setup.py -------------------------------------------------------------------------------- /adrenaline/utils/csv_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/utils/csv_utils.py -------------------------------------------------------------------------------- /adrenaline/utils/input_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/utils/input_factory.py -------------------------------------------------------------------------------- /adrenaline/utils/profile_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/utils/profile_utils.py -------------------------------------------------------------------------------- /adrenaline/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/adrenaline/utils/utils.py -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/README.md -------------------------------------------------------------------------------- /benchmarks/backend_request_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/backend_request_func.py -------------------------------------------------------------------------------- /benchmarks/bench_dataset_serving.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/bench_dataset_serving.sh -------------------------------------------------------------------------------- /benchmarks/bench_random_serving.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/bench_random_serving.sh -------------------------------------------------------------------------------- /benchmarks/benchmark_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/benchmark_latency.py -------------------------------------------------------------------------------- /benchmarks/benchmark_prefix_caching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/benchmark_prefix_caching.py -------------------------------------------------------------------------------- /benchmarks/benchmark_prioritization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/benchmark_prioritization.py -------------------------------------------------------------------------------- /benchmarks/benchmark_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/benchmark_serving.py -------------------------------------------------------------------------------- /benchmarks/benchmark_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/benchmark_throughput.py -------------------------------------------------------------------------------- /benchmarks/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/dataset_utils.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_aqlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/kernels/benchmark_aqlm.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/kernels/benchmark_layernorm.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_machete.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/kernels/benchmark_machete.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/kernels/benchmark_marlin.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/kernels/benchmark_moe.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/kernels/benchmark_quant.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/kernels/benchmark_rope.py -------------------------------------------------------------------------------- /benchmarks/kernels/benchmark_shapes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/kernels/benchmark_shapes.py -------------------------------------------------------------------------------- /benchmarks/kernels/graph_machete_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/kernels/graph_machete_bench.py -------------------------------------------------------------------------------- /benchmarks/kernels/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas -------------------------------------------------------------------------------- /benchmarks/kernels/weight_shapes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/kernels/weight_shapes.py -------------------------------------------------------------------------------- /benchmarks/launch_tgi_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/launch_tgi_server.sh -------------------------------------------------------------------------------- /benchmarks/overheads/benchmark_hashing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/overheads/benchmark_hashing.py -------------------------------------------------------------------------------- /benchmarks/sonnet.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/benchmarks/sonnet.txt -------------------------------------------------------------------------------- /cmake/cpu_extension.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/cmake/cpu_extension.cmake -------------------------------------------------------------------------------- /cmake/hipify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/cmake/hipify.py -------------------------------------------------------------------------------- /cmake/utils.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/cmake/utils.cmake -------------------------------------------------------------------------------- /collect_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/collect_env.py -------------------------------------------------------------------------------- /csrc/activation_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/activation_kernels.cu -------------------------------------------------------------------------------- /csrc/attention/attention_dtypes.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/attention/attention_dtypes.h -------------------------------------------------------------------------------- /csrc/attention/attention_generic.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/attention/attention_generic.cuh -------------------------------------------------------------------------------- /csrc/attention/attention_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/attention/attention_kernels.cu -------------------------------------------------------------------------------- /csrc/attention/attention_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/attention/attention_utils.cuh -------------------------------------------------------------------------------- /csrc/attention/dtype_bfloat16.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/attention/dtype_bfloat16.cuh -------------------------------------------------------------------------------- /csrc/attention/dtype_float16.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/attention/dtype_float16.cuh -------------------------------------------------------------------------------- /csrc/attention/dtype_float32.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/attention/dtype_float32.cuh -------------------------------------------------------------------------------- /csrc/attention/dtype_fp8.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/attention/dtype_fp8.cuh -------------------------------------------------------------------------------- /csrc/cache.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cache.h -------------------------------------------------------------------------------- /csrc/cache_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cache_kernels.cu -------------------------------------------------------------------------------- /csrc/core/exception.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define VLLM_IMPLIES(p, q) (!(p) || (q)) 4 | -------------------------------------------------------------------------------- /csrc/core/registration.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/core/registration.h -------------------------------------------------------------------------------- /csrc/core/scalar_type.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/core/scalar_type.hpp -------------------------------------------------------------------------------- /csrc/cpu/activation.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/activation.cpp -------------------------------------------------------------------------------- /csrc/cpu/attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/attention.cpp -------------------------------------------------------------------------------- /csrc/cpu/cache.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/cache.cpp -------------------------------------------------------------------------------- /csrc/cpu/cpu_types.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/cpu_types.hpp -------------------------------------------------------------------------------- /csrc/cpu/cpu_types_vsx.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/cpu_types_vsx.hpp -------------------------------------------------------------------------------- /csrc/cpu/cpu_types_x86.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/cpu_types_x86.hpp -------------------------------------------------------------------------------- /csrc/cpu/dnnl_helper.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/dnnl_helper.hpp -------------------------------------------------------------------------------- /csrc/cpu/layernorm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/layernorm.cpp -------------------------------------------------------------------------------- /csrc/cpu/pos_encoding.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/pos_encoding.cpp -------------------------------------------------------------------------------- /csrc/cpu/quant.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/quant.cpp -------------------------------------------------------------------------------- /csrc/cpu/torch_bindings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/torch_bindings.cpp -------------------------------------------------------------------------------- /csrc/cpu/utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cpu/utils.cpp -------------------------------------------------------------------------------- /csrc/cuda_compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cuda_compat.h -------------------------------------------------------------------------------- /csrc/cuda_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cuda_utils.h -------------------------------------------------------------------------------- /csrc/cuda_utils_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cuda_utils_kernels.cu -------------------------------------------------------------------------------- /csrc/custom_all_reduce.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/custom_all_reduce.cu -------------------------------------------------------------------------------- /csrc/custom_all_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/custom_all_reduce.cuh -------------------------------------------------------------------------------- /csrc/custom_all_reduce_test.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/custom_all_reduce_test.cu -------------------------------------------------------------------------------- /csrc/cutlass_extensions/cute_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cutlass_extensions/cute_utils.cuh -------------------------------------------------------------------------------- /csrc/cutlass_extensions/torch_utils.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/cutlass_extensions/torch_utils.hpp -------------------------------------------------------------------------------- /csrc/dispatch_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/dispatch_utils.h -------------------------------------------------------------------------------- /csrc/layernorm_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/layernorm_kernels.cu -------------------------------------------------------------------------------- /csrc/mamba/causal_conv1d/causal_conv1d.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/mamba/causal_conv1d/causal_conv1d.cu -------------------------------------------------------------------------------- /csrc/mamba/causal_conv1d/causal_conv1d.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/mamba/causal_conv1d/causal_conv1d.h -------------------------------------------------------------------------------- /csrc/mamba/causal_conv1d/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/mamba/causal_conv1d/static_switch.h -------------------------------------------------------------------------------- /csrc/mamba/mamba_ssm/selective_scan.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/mamba/mamba_ssm/selective_scan.h -------------------------------------------------------------------------------- /csrc/mamba/mamba_ssm/selective_scan_fwd.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/mamba/mamba_ssm/selective_scan_fwd.cu -------------------------------------------------------------------------------- /csrc/mamba/mamba_ssm/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/mamba/mamba_ssm/static_switch.h -------------------------------------------------------------------------------- /csrc/moe/marlin_moe_ops.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/moe/marlin_moe_ops.cu -------------------------------------------------------------------------------- /csrc/moe/moe_align_sum_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/moe/moe_align_sum_kernels.cu -------------------------------------------------------------------------------- /csrc/moe/moe_ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/moe/moe_ops.h -------------------------------------------------------------------------------- /csrc/moe/topk_softmax_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/moe/topk_softmax_kernels.cu -------------------------------------------------------------------------------- /csrc/moe/torch_bindings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/moe/torch_bindings.cpp -------------------------------------------------------------------------------- /csrc/ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/ops.h -------------------------------------------------------------------------------- /csrc/permute_cols.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/permute_cols.cu -------------------------------------------------------------------------------- /csrc/pos_encoding_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/pos_encoding_kernels.cu -------------------------------------------------------------------------------- /csrc/prepare_inputs/advance_step.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/prepare_inputs/advance_step.cu -------------------------------------------------------------------------------- /csrc/prepare_inputs/advance_step.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/prepare_inputs/advance_step.cuh -------------------------------------------------------------------------------- /csrc/quantization/aqlm/gemm_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/aqlm/gemm_kernels.cu -------------------------------------------------------------------------------- /csrc/quantization/awq/dequantize.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/awq/dequantize.cuh -------------------------------------------------------------------------------- /csrc/quantization/awq/gemm_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/awq/gemm_kernels.cu -------------------------------------------------------------------------------- /csrc/quantization/cutlass_w8a8/common.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/cutlass_w8a8/common.hpp -------------------------------------------------------------------------------- /csrc/quantization/fp8/amd/hip_float8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/fp8/amd/hip_float8.h -------------------------------------------------------------------------------- /csrc/quantization/fp8/amd/quant_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/fp8/amd/quant_utils.cuh -------------------------------------------------------------------------------- /csrc/quantization/fp8/common.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/fp8/common.cu -------------------------------------------------------------------------------- /csrc/quantization/fp8/fp8_marlin.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/fp8/fp8_marlin.cu -------------------------------------------------------------------------------- /csrc/quantization/gguf/dequantize.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gguf/dequantize.cuh -------------------------------------------------------------------------------- /csrc/quantization/gguf/ggml-common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gguf/ggml-common.h -------------------------------------------------------------------------------- /csrc/quantization/gguf/gguf_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gguf/gguf_kernel.cu -------------------------------------------------------------------------------- /csrc/quantization/gguf/mmq.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gguf/mmq.cuh -------------------------------------------------------------------------------- /csrc/quantization/gguf/mmvq.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gguf/mmvq.cuh -------------------------------------------------------------------------------- /csrc/quantization/gguf/vecdotq.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gguf/vecdotq.cuh -------------------------------------------------------------------------------- /csrc/quantization/gptq/compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gptq/compat.cuh -------------------------------------------------------------------------------- /csrc/quantization/gptq/matrix_view.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gptq/matrix_view.cuh -------------------------------------------------------------------------------- /csrc/quantization/gptq/q_gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gptq/q_gemm.cu -------------------------------------------------------------------------------- /csrc/quantization/gptq/qdq_2.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gptq/qdq_2.cuh -------------------------------------------------------------------------------- /csrc/quantization/gptq/qdq_3.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gptq/qdq_3.cuh -------------------------------------------------------------------------------- /csrc/quantization/gptq/qdq_4.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gptq/qdq_4.cuh -------------------------------------------------------------------------------- /csrc/quantization/gptq/qdq_8.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gptq/qdq_8.cuh -------------------------------------------------------------------------------- /csrc/quantization/gptq/qdq_util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gptq/qdq_util.cuh -------------------------------------------------------------------------------- /csrc/quantization/gptq_marlin/marlin.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/gptq_marlin/marlin.cuh -------------------------------------------------------------------------------- /csrc/quantization/machete/Readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/machete/Readme.md -------------------------------------------------------------------------------- /csrc/quantization/machete/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/machete/generate.py -------------------------------------------------------------------------------- /csrc/quantization/marlin/dense/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/marlin/dense/LICENSE -------------------------------------------------------------------------------- /csrc/quantization/marlin/sparse/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/quantization/marlin/sparse/LICENSE -------------------------------------------------------------------------------- /csrc/rocm/attention.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/rocm/attention.cu -------------------------------------------------------------------------------- /csrc/rocm/ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/rocm/ops.h -------------------------------------------------------------------------------- /csrc/rocm/torch_bindings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/rocm/torch_bindings.cpp -------------------------------------------------------------------------------- /csrc/torch_bindings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/csrc/torch_bindings.cpp -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/requirements-docs.txt -------------------------------------------------------------------------------- /docs/source/_static/custom.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/_static/custom.js -------------------------------------------------------------------------------- /docs/source/assets/kernel/k_vecs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/assets/kernel/k_vecs.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/assets/kernel/key.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/logits_vec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/assets/kernel/logits_vec.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/q_vecs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/assets/kernel/q_vecs.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/assets/kernel/query.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/v_vec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/assets/kernel/v_vec.png -------------------------------------------------------------------------------- /docs/source/assets/kernel/value.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/assets/kernel/value.png -------------------------------------------------------------------------------- /docs/source/community/meetups.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/community/meetups.rst -------------------------------------------------------------------------------- /docs/source/community/sponsors.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/community/sponsors.md -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/dev/engine/engine_index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/dev/engine/engine_index.rst -------------------------------------------------------------------------------- /docs/source/dev/engine/llm_engine.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/dev/engine/llm_engine.rst -------------------------------------------------------------------------------- /docs/source/dev/pooling_params.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/dev/pooling_params.rst -------------------------------------------------------------------------------- /docs/source/dev/sampling_params.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/dev/sampling_params.rst -------------------------------------------------------------------------------- /docs/source/generate_examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/generate_examples.py -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/models/adding_model.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/models/adding_model.rst -------------------------------------------------------------------------------- /docs/source/models/engine_args.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/models/engine_args.rst -------------------------------------------------------------------------------- /docs/source/models/lora.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/models/lora.rst -------------------------------------------------------------------------------- /docs/source/models/performance.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/models/performance.rst -------------------------------------------------------------------------------- /docs/source/models/spec_decode.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/models/spec_decode.rst -------------------------------------------------------------------------------- /docs/source/models/supported_models.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/models/supported_models.rst -------------------------------------------------------------------------------- /docs/source/models/vlm.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/models/vlm.rst -------------------------------------------------------------------------------- /docs/source/quantization/auto_awq.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/quantization/auto_awq.rst -------------------------------------------------------------------------------- /docs/source/quantization/bnb.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/quantization/bnb.rst -------------------------------------------------------------------------------- /docs/source/quantization/fp8.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/quantization/fp8.rst -------------------------------------------------------------------------------- /docs/source/quantization/gguf.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/quantization/gguf.rst -------------------------------------------------------------------------------- /docs/source/quantization/int8.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/quantization/int8.rst -------------------------------------------------------------------------------- /docs/source/serving/env_vars.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/serving/env_vars.rst -------------------------------------------------------------------------------- /docs/source/serving/faq.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/serving/faq.rst -------------------------------------------------------------------------------- /docs/source/serving/integrations.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/serving/integrations.rst -------------------------------------------------------------------------------- /docs/source/serving/metrics.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/serving/metrics.rst -------------------------------------------------------------------------------- /docs/source/serving/run_on_sky.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/serving/run_on_sky.rst -------------------------------------------------------------------------------- /docs/source/serving/tensorizer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/serving/tensorizer.rst -------------------------------------------------------------------------------- /docs/source/serving/usage_stats.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/docs/source/serving/usage_stats.md -------------------------------------------------------------------------------- /evaluation/.gitignore: -------------------------------------------------------------------------------- 1 | datasets 2 | results 3 | plots/**/*.pdf 4 | -------------------------------------------------------------------------------- /evaluation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/README.md -------------------------------------------------------------------------------- /evaluation/assets/mooncake_13b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/assets/mooncake_13b.png -------------------------------------------------------------------------------- /evaluation/assets/mooncake_8b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/assets/mooncake_8b.png -------------------------------------------------------------------------------- /evaluation/plots/utils/plot_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/plots/utils/plot_utils.py -------------------------------------------------------------------------------- /evaluation/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | pytest 3 | nvtx 4 | pandas 5 | datasets 6 | -------------------------------------------------------------------------------- /evaluation/run/0_prepare_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/run/0_prepare_dataset.sh -------------------------------------------------------------------------------- /evaluation/run/python/prepare_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/run/python/prepare_dataset.py -------------------------------------------------------------------------------- /evaluation/utils/client_utils.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/utils/client_utils.sh -------------------------------------------------------------------------------- /evaluation/utils/evaluation_utils.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/utils/evaluation_utils.sh -------------------------------------------------------------------------------- /evaluation/utils/output_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/utils/output_parser.py -------------------------------------------------------------------------------- /evaluation/utils/peak_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/utils/peak_throughput.py -------------------------------------------------------------------------------- /evaluation/utils/stable_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/evaluation/utils/stable_throughput.py -------------------------------------------------------------------------------- /examples/adrenaline/download_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/adrenaline/download_model.py -------------------------------------------------------------------------------- /examples/adrenaline/start_clients.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/adrenaline/start_clients.sh -------------------------------------------------------------------------------- /examples/adrenaline/stop_demo_servers.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/adrenaline/stop_demo_servers.sh -------------------------------------------------------------------------------- /examples/api_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/api_client.py -------------------------------------------------------------------------------- /examples/aqlm_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/aqlm_example.py -------------------------------------------------------------------------------- /examples/cpu_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/cpu_offload.py -------------------------------------------------------------------------------- /examples/distributed_kv/send_request.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/distributed_kv/send_request.sh -------------------------------------------------------------------------------- /examples/distributed_utils/utils.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/distributed_utils/utils.sh -------------------------------------------------------------------------------- /examples/florence2_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/florence2_inference.py -------------------------------------------------------------------------------- /examples/fp8/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/fp8/README.md -------------------------------------------------------------------------------- /examples/fp8/extract_scales.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/fp8/extract_scales.py -------------------------------------------------------------------------------- /examples/fp8/quantizer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/fp8/quantizer/README.md -------------------------------------------------------------------------------- /examples/fp8/quantizer/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/fp8/quantizer/quantize.py -------------------------------------------------------------------------------- /examples/gguf_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/gguf_inference.py -------------------------------------------------------------------------------- /examples/gradio_webserver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/gradio_webserver.py -------------------------------------------------------------------------------- /examples/llm_engine_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/llm_engine_example.py -------------------------------------------------------------------------------- /examples/logging_configuration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/logging_configuration.md -------------------------------------------------------------------------------- /examples/multilora_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/multilora_inference.py -------------------------------------------------------------------------------- /examples/offline_chat_with_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/offline_chat_with_tools.py -------------------------------------------------------------------------------- /examples/offline_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/offline_inference.py -------------------------------------------------------------------------------- /examples/offline_inference_arctic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/offline_inference_arctic.py -------------------------------------------------------------------------------- /examples/offline_inference_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/offline_inference_chat.py -------------------------------------------------------------------------------- /examples/offline_inference_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/offline_inference_embedding.py -------------------------------------------------------------------------------- /examples/offline_inference_neuron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/offline_inference_neuron.py -------------------------------------------------------------------------------- /examples/offline_inference_openai.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/offline_inference_openai.md -------------------------------------------------------------------------------- /examples/offline_inference_pixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/offline_inference_pixtral.py -------------------------------------------------------------------------------- /examples/offline_inference_tpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/offline_inference_tpu.py -------------------------------------------------------------------------------- /examples/offline_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/offline_profile.py -------------------------------------------------------------------------------- /examples/openai_completion_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/openai_completion_client.py -------------------------------------------------------------------------------- /examples/openai_embedding_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/openai_embedding_client.py -------------------------------------------------------------------------------- /examples/openai_example_batch.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/openai_example_batch.jsonl -------------------------------------------------------------------------------- /examples/production_monitoring/Otel.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/production_monitoring/Otel.md -------------------------------------------------------------------------------- /examples/production_monitoring/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/production_monitoring/README.md -------------------------------------------------------------------------------- /examples/run_cluster.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/run_cluster.sh -------------------------------------------------------------------------------- /examples/save_sharded_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/save_sharded_state.py -------------------------------------------------------------------------------- /examples/template_alpaca.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_alpaca.jinja -------------------------------------------------------------------------------- /examples/template_baichuan.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_baichuan.jinja -------------------------------------------------------------------------------- /examples/template_blip2.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_blip2.jinja -------------------------------------------------------------------------------- /examples/template_chatglm.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_chatglm.jinja -------------------------------------------------------------------------------- /examples/template_chatglm2.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_chatglm2.jinja -------------------------------------------------------------------------------- /examples/template_chatml.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_chatml.jinja -------------------------------------------------------------------------------- /examples/template_falcon.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_falcon.jinja -------------------------------------------------------------------------------- /examples/template_falcon_180b.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_falcon_180b.jinja -------------------------------------------------------------------------------- /examples/template_inkbot.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_inkbot.jinja -------------------------------------------------------------------------------- /examples/template_llava.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_llava.jinja -------------------------------------------------------------------------------- /examples/template_vlm2vec.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/template_vlm2vec.jinja -------------------------------------------------------------------------------- /examples/tensorize_vllm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/tensorize_vllm_model.py -------------------------------------------------------------------------------- /examples/tool_chat_template_hermes.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/examples/tool_chat_template_hermes.jinja -------------------------------------------------------------------------------- /find_cuda_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/find_cuda_init.py -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/format.sh -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/pyproject.toml -------------------------------------------------------------------------------- /python_only_dev.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/python_only_dev.py -------------------------------------------------------------------------------- /requirements-build.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-build.txt -------------------------------------------------------------------------------- /requirements-common.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-common.txt -------------------------------------------------------------------------------- /requirements-cpu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-cpu.txt -------------------------------------------------------------------------------- /requirements-cuda.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-cuda.txt -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /requirements-hpu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-hpu.txt -------------------------------------------------------------------------------- /requirements-lint.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-lint.txt -------------------------------------------------------------------------------- /requirements-neuron.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-neuron.txt -------------------------------------------------------------------------------- /requirements-openvino.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-openvino.txt -------------------------------------------------------------------------------- /requirements-rocm.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-rocm.txt -------------------------------------------------------------------------------- /requirements-test.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-test.in -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-test.txt -------------------------------------------------------------------------------- /requirements-tpu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-tpu.txt -------------------------------------------------------------------------------- /requirements-xpu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/requirements-xpu.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/async_engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/async_engine/test_api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/async_engine/test_api_server.py -------------------------------------------------------------------------------- /tests/basic_correctness/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/compile/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/compile/piecewise/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/compile/piecewise/test_simple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/compile/piecewise/test_simple.py -------------------------------------------------------------------------------- /tests/compile/test_basic_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/compile/test_basic_correctness.py -------------------------------------------------------------------------------- /tests/compile/test_full_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/compile/test_full_graph.py -------------------------------------------------------------------------------- /tests/compile/test_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/compile/test_wrapper.py -------------------------------------------------------------------------------- /tests/compile/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/compile/utils.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/block/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/block/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/core/block/conftest.py -------------------------------------------------------------------------------- /tests/core/block/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/block/e2e/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/core/block/e2e/conftest.py -------------------------------------------------------------------------------- /tests/core/block/e2e/test_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/core/block/e2e/test_correctness.py -------------------------------------------------------------------------------- /tests/core/block/test_block_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/core/block/test_block_manager.py -------------------------------------------------------------------------------- /tests/core/block/test_block_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/core/block/test_block_table.py -------------------------------------------------------------------------------- /tests/core/block/test_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/core/block/test_common.py -------------------------------------------------------------------------------- /tests/core/block/test_naive_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/core/block/test_naive_block.py -------------------------------------------------------------------------------- /tests/core/test_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/core/test_scheduler.py -------------------------------------------------------------------------------- /tests/core/test_serialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/core/test_serialization.py -------------------------------------------------------------------------------- /tests/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/core/utils.py -------------------------------------------------------------------------------- /tests/data/test_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/data/test_config.yaml -------------------------------------------------------------------------------- /tests/distributed/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/distributed/test_comm_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/distributed/test_comm_ops.py -------------------------------------------------------------------------------- /tests/distributed/test_pp_cudagraph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/distributed/test_pp_cudagraph.py -------------------------------------------------------------------------------- /tests/distributed/test_pynccl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/distributed/test_pynccl.py -------------------------------------------------------------------------------- /tests/distributed/test_same_node.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/distributed/test_same_node.py -------------------------------------------------------------------------------- /tests/distributed/test_shm_broadcast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/distributed/test_shm_broadcast.py -------------------------------------------------------------------------------- /tests/distributed/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/distributed/test_utils.py -------------------------------------------------------------------------------- /tests/encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/engine/output_processor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/engine/test_arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/engine/test_arg_utils.py -------------------------------------------------------------------------------- /tests/engine/test_custom_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/engine/test_custom_executor.py -------------------------------------------------------------------------------- /tests/engine/test_detokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/engine/test_detokenization.py -------------------------------------------------------------------------------- /tests/engine/test_multiproc_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/engine/test_multiproc_workers.py -------------------------------------------------------------------------------- /tests/engine/test_short_mm_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/engine/test_short_mm_context.py -------------------------------------------------------------------------------- /tests/engine/test_skip_tokenizer_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/engine/test_skip_tokenizer_init.py -------------------------------------------------------------------------------- /tests/engine/test_stop_reason.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/engine/test_stop_reason.py -------------------------------------------------------------------------------- /tests/engine/test_stop_strings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/engine/test_stop_strings.py -------------------------------------------------------------------------------- /tests/entrypoints/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/entrypoints/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/conftest.py -------------------------------------------------------------------------------- /tests/entrypoints/llm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/entrypoints/llm/test_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/llm/test_chat.py -------------------------------------------------------------------------------- /tests/entrypoints/llm/test_encode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/llm/test_encode.py -------------------------------------------------------------------------------- /tests/entrypoints/llm/test_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/llm/test_generate.py -------------------------------------------------------------------------------- /tests/entrypoints/llm/test_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/llm/test_init.py -------------------------------------------------------------------------------- /tests/entrypoints/offline_mode/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/entrypoints/openai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/entrypoints/openai/test_audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/openai/test_audio.py -------------------------------------------------------------------------------- /tests/entrypoints/openai/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/openai/test_basic.py -------------------------------------------------------------------------------- /tests/entrypoints/openai/test_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/openai/test_chat.py -------------------------------------------------------------------------------- /tests/entrypoints/openai/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/openai/test_metrics.py -------------------------------------------------------------------------------- /tests/entrypoints/openai/test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/openai/test_models.py -------------------------------------------------------------------------------- /tests/entrypoints/openai/test_vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/openai/test_vision.py -------------------------------------------------------------------------------- /tests/entrypoints/test_chat_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/entrypoints/test_chat_utils.py -------------------------------------------------------------------------------- /tests/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/kernels/allclose_default.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/allclose_default.py -------------------------------------------------------------------------------- /tests/kernels/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/conftest.py -------------------------------------------------------------------------------- /tests/kernels/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/quant_utils.py -------------------------------------------------------------------------------- /tests/kernels/test_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_activation.py -------------------------------------------------------------------------------- /tests/kernels/test_aqlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_aqlm.py -------------------------------------------------------------------------------- /tests/kernels/test_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_attention.py -------------------------------------------------------------------------------- /tests/kernels/test_attention_selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_attention_selector.py -------------------------------------------------------------------------------- /tests/kernels/test_awq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_awq.py -------------------------------------------------------------------------------- /tests/kernels/test_awq_marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_awq_marlin.py -------------------------------------------------------------------------------- /tests/kernels/test_awq_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_awq_triton.py -------------------------------------------------------------------------------- /tests/kernels/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_cache.py -------------------------------------------------------------------------------- /tests/kernels/test_causal_conv1d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_causal_conv1d.py -------------------------------------------------------------------------------- /tests/kernels/test_cutlass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_cutlass.py -------------------------------------------------------------------------------- /tests/kernels/test_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_flash_attn.py -------------------------------------------------------------------------------- /tests/kernels/test_flashinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_flashinfer.py -------------------------------------------------------------------------------- /tests/kernels/test_fp8_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_fp8_quant.py -------------------------------------------------------------------------------- /tests/kernels/test_ggml.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_ggml.py -------------------------------------------------------------------------------- /tests/kernels/test_gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_gguf.py -------------------------------------------------------------------------------- /tests/kernels/test_gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_gptq.py -------------------------------------------------------------------------------- /tests/kernels/test_int8_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_int8_quant.py -------------------------------------------------------------------------------- /tests/kernels/test_layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_layernorm.py -------------------------------------------------------------------------------- /tests/kernels/test_machete_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_machete_gemm.py -------------------------------------------------------------------------------- /tests/kernels/test_mamba_ssm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_mamba_ssm.py -------------------------------------------------------------------------------- /tests/kernels/test_marlin_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_marlin_gemm.py -------------------------------------------------------------------------------- /tests/kernels/test_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_moe.py -------------------------------------------------------------------------------- /tests/kernels/test_permute_cols.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_permute_cols.py -------------------------------------------------------------------------------- /tests/kernels/test_pos_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_pos_encoding.py -------------------------------------------------------------------------------- /tests/kernels/test_prefix_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_prefix_prefill.py -------------------------------------------------------------------------------- /tests/kernels/test_rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_rotary_embedding.py -------------------------------------------------------------------------------- /tests/kernels/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/test_utils.py -------------------------------------------------------------------------------- /tests/kernels/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kernels/utils.py -------------------------------------------------------------------------------- /tests/kv_transfer/disagg_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kv_transfer/disagg_test.py -------------------------------------------------------------------------------- /tests/kv_transfer/module_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kv_transfer/module_test.py -------------------------------------------------------------------------------- /tests/kv_transfer/test_lookup_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kv_transfer/test_lookup_buffer.py -------------------------------------------------------------------------------- /tests/kv_transfer/test_send_recv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/kv_transfer/test_send_recv.py -------------------------------------------------------------------------------- /tests/lora/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/lora/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/conftest.py -------------------------------------------------------------------------------- /tests/lora/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/lora/test_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_baichuan.py -------------------------------------------------------------------------------- /tests/lora/test_chatglm3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_chatglm3.py -------------------------------------------------------------------------------- /tests/lora/test_gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_gemma.py -------------------------------------------------------------------------------- /tests/lora/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_layers.py -------------------------------------------------------------------------------- /tests/lora/test_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_llama.py -------------------------------------------------------------------------------- /tests/lora/test_long_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_long_context.py -------------------------------------------------------------------------------- /tests/lora/test_lora_checkpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_lora_checkpoints.py -------------------------------------------------------------------------------- /tests/lora/test_lora_huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_lora_huggingface.py -------------------------------------------------------------------------------- /tests/lora/test_lora_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_lora_manager.py -------------------------------------------------------------------------------- /tests/lora/test_minicpmv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_minicpmv.py -------------------------------------------------------------------------------- /tests/lora/test_minicpmv_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_minicpmv_tp.py -------------------------------------------------------------------------------- /tests/lora/test_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_mixtral.py -------------------------------------------------------------------------------- /tests/lora/test_phi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_phi.py -------------------------------------------------------------------------------- /tests/lora/test_punica_sizes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_punica_sizes.py -------------------------------------------------------------------------------- /tests/lora/test_punica_variation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_punica_variation.py -------------------------------------------------------------------------------- /tests/lora/test_quant_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_quant_model.py -------------------------------------------------------------------------------- /tests/lora/test_tokenizer_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_tokenizer_group.py -------------------------------------------------------------------------------- /tests/lora/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_utils.py -------------------------------------------------------------------------------- /tests/lora/test_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/test_worker.py -------------------------------------------------------------------------------- /tests/lora/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/lora/utils.py -------------------------------------------------------------------------------- /tests/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/metrics/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/metrics/test_metrics.py -------------------------------------------------------------------------------- /tests/model_executor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/model_executor/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/model_executor/conftest.py -------------------------------------------------------------------------------- /tests/model_executor/weight_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/model_executor/weight_utils.py -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/decoder_only/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/decoder_only/audio_language/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/decoder_only/language/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/decoder_only/vision_language/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/decoder_only/vision_language/mm_processor_kwargs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/decoder_only/vision_language/vlm_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/embedding/language/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/embedding/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/models/embedding/utils.py -------------------------------------------------------------------------------- /tests/models/embedding/vision_language/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/encoder_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/encoder_decoder/language/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/encoder_decoder/vision_language/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/fixtures/pixtral_chat.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/models/fixtures/pixtral_chat.json -------------------------------------------------------------------------------- /tests/models/test_oot_registration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/models/test_oot_registration.py -------------------------------------------------------------------------------- /tests/models/test_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/models/test_registry.py -------------------------------------------------------------------------------- /tests/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/models/utils.py -------------------------------------------------------------------------------- /tests/mq_llm_engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/mq_llm_engine/test_abort.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/mq_llm_engine/test_abort.py -------------------------------------------------------------------------------- /tests/mq_llm_engine/test_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/mq_llm_engine/test_load.py -------------------------------------------------------------------------------- /tests/mq_llm_engine/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/mq_llm_engine/utils.py -------------------------------------------------------------------------------- /tests/multi_step/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/multi_step/test_correctness_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/multi_step/test_correctness_llm.py -------------------------------------------------------------------------------- /tests/multimodal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/multimodal/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/multimodal/test_base.py -------------------------------------------------------------------------------- /tests/multimodal/test_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/multimodal/test_mapper.py -------------------------------------------------------------------------------- /tests/multimodal/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/multimodal/test_utils.py -------------------------------------------------------------------------------- /tests/prefix_caching/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/prompt_adapter/test_bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/prompt_adapter/test_bloom.py -------------------------------------------------------------------------------- /tests/prompt_adapter/test_pa_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/prompt_adapter/test_pa_lora.py -------------------------------------------------------------------------------- /tests/prompts/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/prompts/example.txt -------------------------------------------------------------------------------- /tests/prompts/summary.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/prompts/summary.txt -------------------------------------------------------------------------------- /tests/quantization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/quantization/test_bitsandbytes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/quantization/test_bitsandbytes.py -------------------------------------------------------------------------------- /tests/quantization/test_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/quantization/test_configs.py -------------------------------------------------------------------------------- /tests/quantization/test_cpu_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/quantization/test_cpu_offload.py -------------------------------------------------------------------------------- /tests/quantization/test_experts_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/quantization/test_experts_int8.py -------------------------------------------------------------------------------- /tests/quantization/test_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/quantization/test_fp8.py -------------------------------------------------------------------------------- /tests/quantization/test_ipex_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/quantization/test_ipex_quant.py -------------------------------------------------------------------------------- /tests/quantization/test_lm_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/quantization/test_lm_head.py -------------------------------------------------------------------------------- /tests/quantization/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/quantization/utils.py -------------------------------------------------------------------------------- /tests/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/samplers/test_beam_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/samplers/test_beam_search.py -------------------------------------------------------------------------------- /tests/samplers/test_ignore_eos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/samplers/test_ignore_eos.py -------------------------------------------------------------------------------- /tests/samplers/test_logits_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/samplers/test_logits_processor.py -------------------------------------------------------------------------------- /tests/samplers/test_logprobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/samplers/test_logprobs.py -------------------------------------------------------------------------------- /tests/samplers/test_no_bad_words.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/samplers/test_no_bad_words.py -------------------------------------------------------------------------------- /tests/samplers/test_ranks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/samplers/test_ranks.py -------------------------------------------------------------------------------- /tests/samplers/test_rejection_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/samplers/test_rejection_sampler.py -------------------------------------------------------------------------------- /tests/samplers/test_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/samplers/test_sampler.py -------------------------------------------------------------------------------- /tests/samplers/test_seeded_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/samplers/test_seeded_generate.py -------------------------------------------------------------------------------- /tests/spec_decode/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/spec_decode/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/spec_decode/e2e/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/spec_decode/e2e/conftest.py -------------------------------------------------------------------------------- /tests/spec_decode/e2e/test_logprobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/spec_decode/e2e/test_logprobs.py -------------------------------------------------------------------------------- /tests/spec_decode/e2e/test_seed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/spec_decode/e2e/test_seed.py -------------------------------------------------------------------------------- /tests/spec_decode/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/spec_decode/test_metrics.py -------------------------------------------------------------------------------- /tests/spec_decode/test_ngram_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/spec_decode/test_ngram_worker.py -------------------------------------------------------------------------------- /tests/spec_decode/test_scorer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/spec_decode/test_scorer.py -------------------------------------------------------------------------------- /tests/spec_decode/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/spec_decode/test_utils.py -------------------------------------------------------------------------------- /tests/spec_decode/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/spec_decode/utils.py -------------------------------------------------------------------------------- /tests/tensorizer_loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tensorizer_loader/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tensorizer_loader/conftest.py -------------------------------------------------------------------------------- /tests/test_cache_block_hashing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_cache_block_hashing.py -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_config.py -------------------------------------------------------------------------------- /tests/test_embedded_commit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_embedded_commit.py -------------------------------------------------------------------------------- /tests/test_inputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_inputs.py -------------------------------------------------------------------------------- /tests/test_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_logger.py -------------------------------------------------------------------------------- /tests/test_logits_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_logits_processor.py -------------------------------------------------------------------------------- /tests/test_regression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_regression.py -------------------------------------------------------------------------------- /tests/test_sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_sampling_params.py -------------------------------------------------------------------------------- /tests/test_scalartype.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_scalartype.py -------------------------------------------------------------------------------- /tests/test_sequence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_sequence.py -------------------------------------------------------------------------------- /tests/test_sharded_state_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_sharded_state_loader.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/test_utils.py -------------------------------------------------------------------------------- /tests/tokenization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tokenization/test_detokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tokenization/test_detokenize.py -------------------------------------------------------------------------------- /tests/tokenization/test_get_eos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tokenization/test_get_eos.py -------------------------------------------------------------------------------- /tests/tokenization/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tokenization/test_tokenizer.py -------------------------------------------------------------------------------- /tests/tool_use/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tool_use/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tool_use/conftest.py -------------------------------------------------------------------------------- /tests/tool_use/test_chat_completions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tool_use/test_chat_completions.py -------------------------------------------------------------------------------- /tests/tool_use/test_jamba_tool_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tool_use/test_jamba_tool_parser.py -------------------------------------------------------------------------------- /tests/tool_use/test_tool_calls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tool_use/test_tool_calls.py -------------------------------------------------------------------------------- /tests/tool_use/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tool_use/utils.py -------------------------------------------------------------------------------- /tests/tpu/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tpu/test_compilation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tpu/test_compilation.py -------------------------------------------------------------------------------- /tests/tpu/test_custom_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tpu/test_custom_dispatcher.py -------------------------------------------------------------------------------- /tests/tracing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tracing/test_tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/tracing/test_tracing.py -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/utils.py -------------------------------------------------------------------------------- /tests/weight_loading/models-large.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/weight_loading/models-large.txt -------------------------------------------------------------------------------- /tests/weight_loading/models.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/weight_loading/models.txt -------------------------------------------------------------------------------- /tests/worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/worker/test_model_input.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/worker/test_model_input.py -------------------------------------------------------------------------------- /tests/worker/test_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/worker/test_model_runner.py -------------------------------------------------------------------------------- /tests/worker/test_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/worker/test_profile.py -------------------------------------------------------------------------------- /tests/worker/test_swap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tests/worker/test_swap.py -------------------------------------------------------------------------------- /tools/actionlint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tools/actionlint.sh -------------------------------------------------------------------------------- /tools/check_repo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tools/check_repo.sh -------------------------------------------------------------------------------- /tools/mypy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tools/mypy.sh -------------------------------------------------------------------------------- /tools/profiler/print_layerwise_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tools/profiler/print_layerwise_table.py -------------------------------------------------------------------------------- /tools/report_build_time_ninja.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/tools/report_build_time_ninja.py -------------------------------------------------------------------------------- /use_existing_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/use_existing_torch.py -------------------------------------------------------------------------------- /vllm-README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm-README.md -------------------------------------------------------------------------------- /vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/__init__.py -------------------------------------------------------------------------------- /vllm/_custom_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/_custom_ops.py -------------------------------------------------------------------------------- /vllm/_ipex_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/_ipex_ops.py -------------------------------------------------------------------------------- /vllm/adapter_commons/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/adapter_commons/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/adapter_commons/layers.py -------------------------------------------------------------------------------- /vllm/adapter_commons/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/adapter_commons/models.py -------------------------------------------------------------------------------- /vllm/adapter_commons/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/adapter_commons/request.py -------------------------------------------------------------------------------- /vllm/adapter_commons/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/adapter_commons/utils.py -------------------------------------------------------------------------------- /vllm/adapter_commons/worker_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/adapter_commons/worker_manager.py -------------------------------------------------------------------------------- /vllm/assets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/assets/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/assets/audio.py -------------------------------------------------------------------------------- /vllm/assets/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/assets/base.py -------------------------------------------------------------------------------- /vllm/assets/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/assets/image.py -------------------------------------------------------------------------------- /vllm/assets/video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/assets/video.py -------------------------------------------------------------------------------- /vllm/attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/__init__.py -------------------------------------------------------------------------------- /vllm/attention/backends/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/attention/backends/abstract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/backends/abstract.py -------------------------------------------------------------------------------- /vllm/attention/backends/flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/backends/flash_attn.py -------------------------------------------------------------------------------- /vllm/attention/backends/flashinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/backends/flashinfer.py -------------------------------------------------------------------------------- /vllm/attention/backends/hpu_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/backends/hpu_attn.py -------------------------------------------------------------------------------- /vllm/attention/backends/ipex_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/backends/ipex_attn.py -------------------------------------------------------------------------------- /vllm/attention/backends/openvino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/backends/openvino.py -------------------------------------------------------------------------------- /vllm/attention/backends/pallas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/backends/pallas.py -------------------------------------------------------------------------------- /vllm/attention/backends/torch_sdpa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/backends/torch_sdpa.py -------------------------------------------------------------------------------- /vllm/attention/backends/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/backends/utils.py -------------------------------------------------------------------------------- /vllm/attention/backends/xformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/backends/xformers.py -------------------------------------------------------------------------------- /vllm/attention/layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/layer.py -------------------------------------------------------------------------------- /vllm/attention/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/attention/ops/blocksparse_attention/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/attention/ops/hpu_paged_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/ops/hpu_paged_attn.py -------------------------------------------------------------------------------- /vllm/attention/ops/ipex_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/ops/ipex_attn.py -------------------------------------------------------------------------------- /vllm/attention/ops/paged_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/ops/paged_attn.py -------------------------------------------------------------------------------- /vllm/attention/ops/prefix_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/ops/prefix_prefill.py -------------------------------------------------------------------------------- /vllm/attention/selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/attention/selector.py -------------------------------------------------------------------------------- /vllm/beam_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/beam_search.py -------------------------------------------------------------------------------- /vllm/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/block.py -------------------------------------------------------------------------------- /vllm/compilation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/compilation/backends.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/compilation/backends.py -------------------------------------------------------------------------------- /vllm/compilation/compile_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/compilation/compile_context.py -------------------------------------------------------------------------------- /vllm/compilation/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/compilation/config.py -------------------------------------------------------------------------------- /vllm/compilation/counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/compilation/counter.py -------------------------------------------------------------------------------- /vllm/compilation/decorators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/compilation/decorators.py -------------------------------------------------------------------------------- /vllm/compilation/levels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/compilation/levels.py -------------------------------------------------------------------------------- /vllm/compilation/wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/compilation/wrapper.py -------------------------------------------------------------------------------- /vllm/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/config.py -------------------------------------------------------------------------------- /vllm/connections.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/connections.py -------------------------------------------------------------------------------- /vllm/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/core/block/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/core/block/block_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/core/block/block_table.py -------------------------------------------------------------------------------- /vllm/core/block/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/core/block/common.py -------------------------------------------------------------------------------- /vllm/core/block/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/core/block/interfaces.py -------------------------------------------------------------------------------- /vllm/core/block/naive_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/core/block/naive_block.py -------------------------------------------------------------------------------- /vllm/core/block/prefix_caching_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/core/block/prefix_caching_block.py -------------------------------------------------------------------------------- /vllm/core/block/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/core/block/utils.py -------------------------------------------------------------------------------- /vllm/core/block_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/core/block_manager.py -------------------------------------------------------------------------------- /vllm/core/evictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/core/evictor.py -------------------------------------------------------------------------------- /vllm/core/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/core/interfaces.py -------------------------------------------------------------------------------- /vllm/core/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/core/scheduler.py -------------------------------------------------------------------------------- /vllm/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/distributed/__init__.py -------------------------------------------------------------------------------- /vllm/distributed/communication_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/distributed/communication_op.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/distributed/kv_transfer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/distributed/kv_transfer/kv_lookup_buffer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/distributed/kv_transfer/kv_pipe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/distributed/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/distributed/parallel_state.py -------------------------------------------------------------------------------- /vllm/distributed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/distributed/utils.py -------------------------------------------------------------------------------- /vllm/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/engine/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/arg_utils.py -------------------------------------------------------------------------------- /vllm/engine/async_llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/async_llm_engine.py -------------------------------------------------------------------------------- /vllm/engine/async_timeout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/async_timeout.py -------------------------------------------------------------------------------- /vllm/engine/llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/llm_engine.py -------------------------------------------------------------------------------- /vllm/engine/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/metrics.py -------------------------------------------------------------------------------- /vllm/engine/metrics_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/metrics_types.py -------------------------------------------------------------------------------- /vllm/engine/multiprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/multiprocessing/__init__.py -------------------------------------------------------------------------------- /vllm/engine/multiprocessing/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/multiprocessing/client.py -------------------------------------------------------------------------------- /vllm/engine/multiprocessing/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/multiprocessing/engine.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/engine/output_processor/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/output_processor/util.py -------------------------------------------------------------------------------- /vllm/engine/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/engine/protocol.py -------------------------------------------------------------------------------- /vllm/entrypoints/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/entrypoints/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/entrypoints/api_server.py -------------------------------------------------------------------------------- /vllm/entrypoints/chat_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/entrypoints/chat_utils.py -------------------------------------------------------------------------------- /vllm/entrypoints/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/entrypoints/launcher.py -------------------------------------------------------------------------------- /vllm/entrypoints/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/entrypoints/llm.py -------------------------------------------------------------------------------- /vllm/entrypoints/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/entrypoints/logger.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/entrypoints/openai/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/entrypoints/openai/api_server.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/cli_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/entrypoints/openai/cli_args.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/entrypoints/openai/protocol.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/run_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/entrypoints/openai/run_batch.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/serving_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/entrypoints/openai/serving_chat.py -------------------------------------------------------------------------------- /vllm/envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/envs.py -------------------------------------------------------------------------------- /vllm/executor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/executor/cpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/cpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/executor_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/executor_base.py -------------------------------------------------------------------------------- /vllm/executor/gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/gpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/hpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/hpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/msgspec_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/msgspec_utils.py -------------------------------------------------------------------------------- /vllm/executor/multiproc_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/multiproc_gpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/multiproc_worker_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/multiproc_worker_utils.py -------------------------------------------------------------------------------- /vllm/executor/multiproc_xpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/multiproc_xpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/neuron_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/neuron_executor.py -------------------------------------------------------------------------------- /vllm/executor/openvino_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/openvino_executor.py -------------------------------------------------------------------------------- /vllm/executor/ray_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/ray_gpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/ray_hpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/ray_hpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/ray_tpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/ray_tpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/ray_utils.py -------------------------------------------------------------------------------- /vllm/executor/ray_xpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/ray_xpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/tpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/tpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/xpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/executor/xpu_executor.py -------------------------------------------------------------------------------- /vllm/forward_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/forward_context.py -------------------------------------------------------------------------------- /vllm/inputs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/inputs/__init__.py -------------------------------------------------------------------------------- /vllm/inputs/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/inputs/data.py -------------------------------------------------------------------------------- /vllm/inputs/parse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/inputs/parse.py -------------------------------------------------------------------------------- /vllm/inputs/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/inputs/preprocess.py -------------------------------------------------------------------------------- /vllm/inputs/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/inputs/registry.py -------------------------------------------------------------------------------- /vllm/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/logger.py -------------------------------------------------------------------------------- /vllm/logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/logging/__init__.py -------------------------------------------------------------------------------- /vllm/logging/formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/logging/formatter.py -------------------------------------------------------------------------------- /vllm/logits_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/logits_process.py -------------------------------------------------------------------------------- /vllm/lora/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/lora/fully_sharded_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/fully_sharded_layers.py -------------------------------------------------------------------------------- /vllm/lora/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/layers.py -------------------------------------------------------------------------------- /vllm/lora/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/lora.py -------------------------------------------------------------------------------- /vllm/lora/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/models.py -------------------------------------------------------------------------------- /vllm/lora/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/lora/ops/bgmv_expand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/ops/bgmv_expand.py -------------------------------------------------------------------------------- /vllm/lora/ops/bgmv_expand_slice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/ops/bgmv_expand_slice.py -------------------------------------------------------------------------------- /vllm/lora/ops/bgmv_shrink.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/ops/bgmv_shrink.py -------------------------------------------------------------------------------- /vllm/lora/ops/sgmv_expand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/ops/sgmv_expand.py -------------------------------------------------------------------------------- /vllm/lora/ops/sgmv_expand_slice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/ops/sgmv_expand_slice.py -------------------------------------------------------------------------------- /vllm/lora/ops/sgmv_shrink.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/ops/sgmv_shrink.py -------------------------------------------------------------------------------- /vllm/lora/ops/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/ops/utils.py -------------------------------------------------------------------------------- /vllm/lora/punica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/punica.py -------------------------------------------------------------------------------- /vllm/lora/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/request.py -------------------------------------------------------------------------------- /vllm/lora/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/utils.py -------------------------------------------------------------------------------- /vllm/lora/worker_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/lora/worker_manager.py -------------------------------------------------------------------------------- /vllm/model_executor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/custom_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/custom_op.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/layers/activation.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/layers/layernorm.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/layers/linear.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/pooler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/layers/pooler.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/resampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/layers/resampler.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/layers/sampler.py -------------------------------------------------------------------------------- /vllm/model_executor/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/models/arctic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/arctic.py -------------------------------------------------------------------------------- /vllm/model_executor/models/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/baichuan.py -------------------------------------------------------------------------------- /vllm/model_executor/models/bart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/bart.py -------------------------------------------------------------------------------- /vllm/model_executor/models/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/bert.py -------------------------------------------------------------------------------- /vllm/model_executor/models/blip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/blip.py -------------------------------------------------------------------------------- /vllm/model_executor/models/blip2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/blip2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/bloom.py -------------------------------------------------------------------------------- /vllm/model_executor/models/chameleon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/chameleon.py -------------------------------------------------------------------------------- /vllm/model_executor/models/chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/chatglm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/clip.py -------------------------------------------------------------------------------- /vllm/model_executor/models/commandr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/commandr.py -------------------------------------------------------------------------------- /vllm/model_executor/models/dbrx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/dbrx.py -------------------------------------------------------------------------------- /vllm/model_executor/models/decilm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/decilm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/deepseek.py -------------------------------------------------------------------------------- /vllm/model_executor/models/eagle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/eagle.py -------------------------------------------------------------------------------- /vllm/model_executor/models/exaone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/exaone.py -------------------------------------------------------------------------------- /vllm/model_executor/models/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/falcon.py -------------------------------------------------------------------------------- /vllm/model_executor/models/florence2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/florence2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/fuyu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/fuyu.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/gemma.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gemma2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/gemma2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/gpt2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_j.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/gpt_j.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/gpt_neox.py -------------------------------------------------------------------------------- /vllm/model_executor/models/granite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/granite.py -------------------------------------------------------------------------------- /vllm/model_executor/models/granitemoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/granitemoe.py -------------------------------------------------------------------------------- /vllm/model_executor/models/h2ovl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/h2ovl.py -------------------------------------------------------------------------------- /vllm/model_executor/models/idefics3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/idefics3.py -------------------------------------------------------------------------------- /vllm/model_executor/models/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/interfaces.py -------------------------------------------------------------------------------- /vllm/model_executor/models/intern_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/intern_vit.py -------------------------------------------------------------------------------- /vllm/model_executor/models/internlm2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/internlm2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/internvl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/internvl.py -------------------------------------------------------------------------------- /vllm/model_executor/models/jais.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/jais.py -------------------------------------------------------------------------------- /vllm/model_executor/models/jamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/jamba.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/llama.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/llava.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llava_next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/llava_next.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/mamba.py -------------------------------------------------------------------------------- /vllm/model_executor/models/medusa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/medusa.py -------------------------------------------------------------------------------- /vllm/model_executor/models/minicpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/minicpm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/minicpm3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/minicpm3.py -------------------------------------------------------------------------------- /vllm/model_executor/models/minicpmv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/minicpmv.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/mixtral.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mllama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/mllama.py -------------------------------------------------------------------------------- /vllm/model_executor/models/molmo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/molmo.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/mpt.py -------------------------------------------------------------------------------- /vllm/model_executor/models/nemotron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/nemotron.py -------------------------------------------------------------------------------- /vllm/model_executor/models/nvlm_d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/nvlm_d.py -------------------------------------------------------------------------------- /vllm/model_executor/models/olmo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/olmo.py -------------------------------------------------------------------------------- /vllm/model_executor/models/olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/olmoe.py -------------------------------------------------------------------------------- /vllm/model_executor/models/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/opt.py -------------------------------------------------------------------------------- /vllm/model_executor/models/orion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/orion.py -------------------------------------------------------------------------------- /vllm/model_executor/models/paligemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/paligemma.py -------------------------------------------------------------------------------- /vllm/model_executor/models/persimmon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/persimmon.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/phi.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phi3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/phi3.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phi3_small.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/phi3_small.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phi3v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/phi3v.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phimoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/phimoe.py -------------------------------------------------------------------------------- /vllm/model_executor/models/pixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/pixtral.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/qwen.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/qwen2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen2_cls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/qwen2_cls.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/qwen2_moe.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen2_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/qwen2_rm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/qwen2_vl.py -------------------------------------------------------------------------------- /vllm/model_executor/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/registry.py -------------------------------------------------------------------------------- /vllm/model_executor/models/siglip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/siglip.py -------------------------------------------------------------------------------- /vllm/model_executor/models/solar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/solar.py -------------------------------------------------------------------------------- /vllm/model_executor/models/stablelm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/stablelm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/starcoder2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/starcoder2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/ultravox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/ultravox.py -------------------------------------------------------------------------------- /vllm/model_executor/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/utils.py -------------------------------------------------------------------------------- /vllm/model_executor/models/xverse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/models/xverse.py -------------------------------------------------------------------------------- /vllm/model_executor/parameter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/parameter.py -------------------------------------------------------------------------------- /vllm/model_executor/pooling_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/pooling_metadata.py -------------------------------------------------------------------------------- /vllm/model_executor/sampling_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/sampling_metadata.py -------------------------------------------------------------------------------- /vllm/model_executor/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/model_executor/utils.py -------------------------------------------------------------------------------- /vllm/multimodal/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/multimodal/__init__.py -------------------------------------------------------------------------------- /vllm/multimodal/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/multimodal/audio.py -------------------------------------------------------------------------------- /vllm/multimodal/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/multimodal/base.py -------------------------------------------------------------------------------- /vllm/multimodal/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/multimodal/image.py -------------------------------------------------------------------------------- /vllm/multimodal/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/multimodal/registry.py -------------------------------------------------------------------------------- /vllm/multimodal/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/multimodal/utils.py -------------------------------------------------------------------------------- /vllm/multimodal/video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/multimodal/video.py -------------------------------------------------------------------------------- /vllm/outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/outputs.py -------------------------------------------------------------------------------- /vllm/platforms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/platforms/__init__.py -------------------------------------------------------------------------------- /vllm/platforms/cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/platforms/cpu.py -------------------------------------------------------------------------------- /vllm/platforms/cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/platforms/cuda.py -------------------------------------------------------------------------------- /vllm/platforms/hpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/platforms/hpu.py -------------------------------------------------------------------------------- /vllm/platforms/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/platforms/interface.py -------------------------------------------------------------------------------- /vllm/platforms/neuron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/platforms/neuron.py -------------------------------------------------------------------------------- /vllm/platforms/openvino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/platforms/openvino.py -------------------------------------------------------------------------------- /vllm/platforms/rocm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/platforms/rocm.py -------------------------------------------------------------------------------- /vllm/platforms/tpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/platforms/tpu.py -------------------------------------------------------------------------------- /vllm/platforms/xpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/platforms/xpu.py -------------------------------------------------------------------------------- /vllm/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/plugins/__init__.py -------------------------------------------------------------------------------- /vllm/pooling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/pooling_params.py -------------------------------------------------------------------------------- /vllm/profiler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/profiler/__init__.py -------------------------------------------------------------------------------- /vllm/profiler/layerwise_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/profiler/layerwise_profile.py -------------------------------------------------------------------------------- /vllm/profiler/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/profiler/utils.py -------------------------------------------------------------------------------- /vllm/prompt_adapter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/prompt_adapter/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/prompt_adapter/layers.py -------------------------------------------------------------------------------- /vllm/prompt_adapter/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/prompt_adapter/models.py -------------------------------------------------------------------------------- /vllm/prompt_adapter/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/prompt_adapter/request.py -------------------------------------------------------------------------------- /vllm/prompt_adapter/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/prompt_adapter/utils.py -------------------------------------------------------------------------------- /vllm/prompt_adapter/worker_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/prompt_adapter/worker_manager.py -------------------------------------------------------------------------------- /vllm/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561. 2 | # The vllm package uses inline types. 3 | -------------------------------------------------------------------------------- /vllm/sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/sampling_params.py -------------------------------------------------------------------------------- /vllm/scalar_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/scalar_type.py -------------------------------------------------------------------------------- /vllm/scripts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/scripts.py -------------------------------------------------------------------------------- /vllm/sequence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/sequence.py -------------------------------------------------------------------------------- /vllm/spec_decode/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/spec_decode/batch_expansion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/batch_expansion.py -------------------------------------------------------------------------------- /vllm/spec_decode/draft_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/draft_model_runner.py -------------------------------------------------------------------------------- /vllm/spec_decode/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/interfaces.py -------------------------------------------------------------------------------- /vllm/spec_decode/medusa_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/medusa_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/metrics.py -------------------------------------------------------------------------------- /vllm/spec_decode/mqa_scorer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/mqa_scorer.py -------------------------------------------------------------------------------- /vllm/spec_decode/multi_step_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/multi_step_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/ngram_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/ngram_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/proposer_worker_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/proposer_worker_base.py -------------------------------------------------------------------------------- /vllm/spec_decode/spec_decode_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/spec_decode_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/target_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/target_model_runner.py -------------------------------------------------------------------------------- /vllm/spec_decode/top1_proposer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/top1_proposer.py -------------------------------------------------------------------------------- /vllm/spec_decode/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/spec_decode/util.py -------------------------------------------------------------------------------- /vllm/tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/tracing.py -------------------------------------------------------------------------------- /vllm/transformers_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/__init__.py -------------------------------------------------------------------------------- /vllm/transformers_utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/config.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/dbrx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/configs/dbrx.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/eagle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/configs/eagle.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/h2ovl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/configs/h2ovl.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/jais.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/configs/jais.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/configs/mpt.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/solar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/configs/solar.py -------------------------------------------------------------------------------- /vllm/transformers_utils/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/detokenizer.py -------------------------------------------------------------------------------- /vllm/transformers_utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/processor.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/tokenizer.py -------------------------------------------------------------------------------- /vllm/transformers_utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/transformers_utils/utils.py -------------------------------------------------------------------------------- /vllm/triton_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/triton_utils/__init__.py -------------------------------------------------------------------------------- /vllm/triton_utils/importing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/triton_utils/importing.py -------------------------------------------------------------------------------- /vllm/triton_utils/libentry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/triton_utils/libentry.py -------------------------------------------------------------------------------- /vllm/usage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/usage/usage_lib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/usage/usage_lib.py -------------------------------------------------------------------------------- /vllm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/utils.py -------------------------------------------------------------------------------- /vllm/v1/attention/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/v1/attention/backends/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/v1/attention/backends/flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/attention/backends/flash_attn.py -------------------------------------------------------------------------------- /vllm/v1/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/v1/core/kv_cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/core/kv_cache_manager.py -------------------------------------------------------------------------------- /vllm/v1/core/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/core/scheduler.py -------------------------------------------------------------------------------- /vllm/v1/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/v1/engine/llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/engine/llm_engine.py -------------------------------------------------------------------------------- /vllm/v1/executor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/v1/executor/gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/executor/gpu_executor.py -------------------------------------------------------------------------------- /vllm/v1/outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/outputs.py -------------------------------------------------------------------------------- /vllm/v1/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/request.py -------------------------------------------------------------------------------- /vllm/v1/sample/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/v1/sample/metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/sample/metadata.py -------------------------------------------------------------------------------- /vllm/v1/sample/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/sample/sampler.py -------------------------------------------------------------------------------- /vllm/v1/tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/v1/tokenizer/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/tokenizer/detokenizer.py -------------------------------------------------------------------------------- /vllm/v1/worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/v1/worker/gpu_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/worker/gpu_model_runner.py -------------------------------------------------------------------------------- /vllm/v1/worker/gpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/v1/worker/gpu_worker.py -------------------------------------------------------------------------------- /vllm/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/version.py -------------------------------------------------------------------------------- /vllm/vllm_flash_attn/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/worker/cache_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/cache_engine.py -------------------------------------------------------------------------------- /vllm/worker/cpu_enc_dec_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/cpu_enc_dec_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/cpu_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/cpu_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/cpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/cpu_worker.py -------------------------------------------------------------------------------- /vllm/worker/embedding_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/embedding_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/enc_dec_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/enc_dec_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/hpu_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/hpu_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/hpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/hpu_worker.py -------------------------------------------------------------------------------- /vllm/worker/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/model_runner.py -------------------------------------------------------------------------------- /vllm/worker/model_runner_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/model_runner_base.py -------------------------------------------------------------------------------- /vllm/worker/multi_step_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/multi_step_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/multi_step_tpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/multi_step_tpu_worker.py -------------------------------------------------------------------------------- /vllm/worker/multi_step_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/multi_step_worker.py -------------------------------------------------------------------------------- /vllm/worker/neuron_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/neuron_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/neuron_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/neuron_worker.py -------------------------------------------------------------------------------- /vllm/worker/openvino_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/openvino_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/openvino_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/openvino_worker.py -------------------------------------------------------------------------------- /vllm/worker/tpu_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/tpu_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/tpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/tpu_worker.py -------------------------------------------------------------------------------- /vllm/worker/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/utils.py -------------------------------------------------------------------------------- /vllm/worker/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/worker.py -------------------------------------------------------------------------------- /vllm/worker/worker_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/worker_base.py -------------------------------------------------------------------------------- /vllm/worker/xpu_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/xpu_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/xpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ASISys/Adrenaline/HEAD/vllm/worker/xpu_worker.py --------------------------------------------------------------------------------