├── .gitignore ├── README.MD ├── download_mmlu.sh ├── examples ├── .gitignore ├── __pycache__ │ ├── prompt_utils.cpython-310.pyc │ └── prompt_utils.cpython-311.pyc ├── api_client.py ├── aqlm_example.py ├── benchAcc │ ├── evalppl.py │ ├── output │ │ └── ppl_batchsize512_fp16_Llama-2-7b.csv1 │ ├── runfloat.sh │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── __init__.cpython-311.pyc │ │ ├── eval_tasks │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── _utils │ │ │ ├── __init__.py │ │ │ ├── classification_utils.py │ │ │ └── generation_utils.py │ │ ├── language_modeling_task.py │ │ ├── sequence_classification_task.py │ │ └── text_summarization_task.py │ │ ├── modeling │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── _base.cpython-310.pyc │ │ │ ├── _base.cpython-311.pyc │ │ │ ├── _const.cpython-310.pyc │ │ │ ├── _const.cpython-311.pyc │ │ │ ├── _utils.cpython-310.pyc │ │ │ └── _utils.cpython-311.pyc │ │ ├── _base.py │ │ ├── _const.py │ │ ├── _utils.py │ │ ├── auto.py │ │ ├── baichuan.py │ │ ├── bloom.py │ │ ├── codegen.py │ │ ├── gpt2.py │ │ ├── gpt_bigcode.py │ │ ├── gpt_neox.py │ │ ├── gptj.py │ │ ├── internlm.py │ │ ├── llama.py │ │ ├── moss.py │ │ ├── opt.py │ │ ├── qwen.py │ │ └── rw.py │ │ ├── nn_modules │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── _fused_base.cpython-310.pyc │ │ │ └── _fused_base.cpython-311.pyc │ │ ├── _fused_base.py │ │ ├── fused_gptj_attn.py │ │ ├── fused_llama_attn.py │ │ ├── fused_llama_mlp.py │ │ ├── qlinear │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── __init__.cpython-311.pyc │ │ │ ├── qlinear_cuda.py │ │ │ ├── qlinear_cuda_old.py │ │ │ ├── qlinear_exllama.py │ │ │ ├── qlinear_exllamav2.py │ │ │ ├── qlinear_qigen.py │ │ │ └── qlinear_triton.py │ │ └── triton_utils │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── mixin.cpython-310.pyc │ │ │ └── mixin.cpython-311.pyc │ │ │ ├── custom_autotune.py │ │ │ ├── kernels.py │ │ │ └── mixin.py │ │ ├── quantization │ │ ├── ACKNOWLEDGEMENT.md │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── gptq.cpython-310.pyc │ │ │ ├── gptq.cpython-311.pyc │ │ │ ├── quantizer.cpython-310.pyc │ │ │ └── quantizer.cpython-311.pyc │ │ ├── gptq.py │ │ └── quantizer.py │ │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── data_utils.cpython-310.pyc │ │ ├── data_utils.cpython-311.pyc │ │ ├── exllama_utils.cpython-310.pyc │ │ ├── exllama_utils.cpython-311.pyc │ │ ├── import_utils.cpython-310.pyc │ │ ├── import_utils.cpython-311.pyc │ │ ├── peft_utils.cpython-310.pyc │ │ ├── peft_utils.cpython-311.pyc │ │ ├── perplexity_utils.cpython-310.pyc │ │ └── perplexity_utils.cpython-311.pyc │ │ ├── data_utils.py │ │ ├── exllama_utils.py │ │ ├── import_utils.py │ │ ├── peft_utils.py │ │ └── perplexity_utils.py ├── download_mmlu.sh ├── fp8 │ ├── README.md │ ├── extract_scales.py │ └── quantizer │ │ ├── README.md │ │ └── quantize.py ├── gradio_openai_chatbot_webserver.py ├── gradio_webserver.py ├── gradio_webui.py ├── input.pt ├── lenovo.jpg ├── lenovo.py ├── llava_example.py ├── llm_engine_example.py ├── logging_configuration.md ├── mmlu.py ├── multilora_inference.py ├── offline_inference.py ├── offline_inference_distributed.py ├── offline_inference_neuron.py ├── offline_inference_with_prefix.py ├── openai_chat_completion_client.py ├── openai_completion_client.py ├── production_monitoring │ ├── README.md │ ├── docker-compose.yaml │ ├── grafana.json │ └── prometheus.yaml ├── prompt_utils.py ├── server.py ├── tensorize_vllm_model.py ├── test.py ├── test4bit.py ├── test4bitchatglm.py ├── test8bit.py ├── test8bitLongSeqLlama3.py ├── test8bitchatglm.py ├── test8bitqwen2.py └── testawq.py ├── figures ├── awq32.gif ├── awq512.gif ├── mixq32.gif ├── mixq512.gif ├── output.png └── textmixq.jpg ├── gradio_openai_chatbot_webserver.py ├── gradio_webserver.py ├── mmlu.py ├── out.txt ├── out2.txt ├── test4bit.py ├── test4bitchatglm.py ├── test8bit.py ├── test8bitLongSeqLlama3.py ├── test8bitchatglm.py ├── test8bitqwen2.py ├── testawq.py ├── testmmlu.sh └── vllm ├── .gitignore ├── __init__.py ├── __pycache__ ├── __init__.cpython-310.pyc ├── __init__.cpython-311.pyc ├── __init__.cpython-312.pyc ├── _core_ext.cpython-310.pyc ├── _core_ext.cpython-311.pyc ├── _core_ext.cpython-312.pyc ├── _custom_ops.cpython-310.pyc ├── _custom_ops.cpython-311.pyc ├── _custom_ops.cpython-312.pyc ├── _ipex_ops.cpython-311.pyc ├── _version.cpython-310.pyc ├── _version.cpython-311.pyc ├── block.cpython-310.pyc ├── block.cpython-311.pyc ├── config.cpython-310.pyc ├── config.cpython-311.pyc ├── config.cpython-312.pyc ├── connections.cpython-310.pyc ├── connections.cpython-311.pyc ├── envs.cpython-310.pyc ├── envs.cpython-311.pyc ├── envs.cpython-312.pyc ├── logger.cpython-310.pyc ├── logger.cpython-311.pyc ├── logger.cpython-312.pyc ├── outputs.cpython-310.pyc ├── outputs.cpython-311.pyc ├── pooling_params.cpython-310.pyc ├── pooling_params.cpython-311.pyc ├── pooling_params.cpython-312.pyc ├── sampling_params.cpython-310.pyc ├── sampling_params.cpython-311.pyc ├── sampling_params.cpython-312.pyc ├── scalar_type.cpython-310.pyc ├── scalar_type.cpython-311.pyc ├── scalar_type.cpython-312.pyc ├── scripts.cpython-311.pyc ├── sequence.cpython-310.pyc ├── sequence.cpython-311.pyc ├── sequence.cpython-312.pyc ├── tracing.cpython-310.pyc ├── tracing.cpython-311.pyc ├── utils.cpython-310.pyc ├── utils.cpython-311.pyc ├── utils.cpython-312.pyc ├── version.cpython-310.pyc └── version.cpython-311.pyc ├── _core_ext.py ├── _custom_ops.py ├── _ipex_ops.py ├── _version.py ├── adapter_commons ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── layers.cpython-310.pyc │ ├── layers.cpython-311.pyc │ ├── models.cpython-310.pyc │ ├── models.cpython-311.pyc │ ├── request.cpython-310.pyc │ ├── request.cpython-311.pyc │ ├── request.cpython-312.pyc │ ├── utils.cpython-310.pyc │ ├── utils.cpython-311.pyc │ ├── worker_manager.cpython-310.pyc │ └── worker_manager.cpython-311.pyc ├── layers.py ├── models.py ├── request.py ├── utils.py └── worker_manager.py ├── assets ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── audio.cpython-311.pyc │ ├── base.cpython-311.pyc │ ├── image.cpython-311.pyc │ └── video.cpython-311.pyc ├── audio.py ├── base.py ├── image.py └── video.py ├── attention ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── layer.cpython-310.pyc │ ├── layer.cpython-311.pyc │ ├── selector.cpython-310.pyc │ └── selector.cpython-311.pyc ├── backends │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── abstract.cpython-310.pyc │ │ ├── abstract.cpython-311.pyc │ │ ├── blocksparse_attn.cpython-311.pyc │ │ ├── flash_attn.cpython-311.pyc │ │ ├── flashinfer.cpython-311.pyc │ │ ├── ipex_attn.cpython-311.pyc │ │ ├── openvino.cpython-311.pyc │ │ ├── pallas.cpython-311.pyc │ │ ├── rocm_flash_attn.cpython-311.pyc │ │ ├── torch_sdpa.cpython-311.pyc │ │ ├── utils.cpython-310.pyc │ │ ├── utils.cpython-311.pyc │ │ ├── xformers.cpython-310.pyc │ │ └── xformers.cpython-311.pyc │ ├── abstract.py │ ├── blocksparse_attn.py │ ├── flash_attn.py │ ├── flashinfer.py │ ├── ipex_attn.py │ ├── openvino.py │ ├── pallas.py │ ├── rocm_flash_attn.py │ ├── torch_sdpa.py │ ├── utils.py │ └── xformers.py ├── layer.py ├── ops │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── ipex_attn.cpython-311.pyc │ │ ├── paged_attn.cpython-310.pyc │ │ ├── paged_attn.cpython-311.pyc │ │ ├── prefix_prefill.cpython-310.pyc │ │ ├── prefix_prefill.cpython-311.pyc │ │ └── triton_flash_attention.cpython-311.pyc │ ├── blocksparse_attention │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── blocksparse_attention_kernel.cpython-311.pyc │ │ │ ├── interface.cpython-311.pyc │ │ │ └── utils.cpython-311.pyc │ │ ├── blocksparse_attention_kernel.py │ │ ├── interface.py │ │ └── utils.py │ ├── ipex_attn.py │ ├── paged_attn.py │ ├── prefix_prefill.py │ └── triton_flash_attention.py └── selector.py ├── block.py ├── compilation ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── backends.cpython-311.pyc │ └── wrapper.cpython-311.pyc ├── backends.py └── wrapper.py ├── config.py ├── connections.py ├── core ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── block_manager_v1.cpython-310.pyc │ ├── block_manager_v1.cpython-311.pyc │ ├── block_manager_v2.cpython-311.pyc │ ├── embedding_model_block_manager.cpython-311.pyc │ ├── evictor_v1.cpython-310.pyc │ ├── evictor_v1.cpython-311.pyc │ ├── evictor_v2.cpython-311.pyc │ ├── interfaces.cpython-310.pyc │ ├── interfaces.cpython-311.pyc │ ├── scheduler.cpython-310.pyc │ └── scheduler.cpython-311.pyc ├── block │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── block_table.cpython-311.pyc │ │ ├── common.cpython-310.pyc │ │ ├── common.cpython-311.pyc │ │ ├── cpu_gpu_block_allocator.cpython-311.pyc │ │ ├── interfaces.cpython-310.pyc │ │ ├── interfaces.cpython-311.pyc │ │ ├── naive_block.cpython-311.pyc │ │ ├── prefix_caching_block.cpython-311.pyc │ │ ├── utils.cpython-310.pyc │ │ └── utils.cpython-311.pyc │ ├── block_table.py │ ├── common.py │ ├── cpu_gpu_block_allocator.py │ ├── interfaces.py │ ├── naive_block.py │ ├── prefix_caching_block.py │ └── utils.py ├── block_manager_v1.py ├── block_manager_v2.py ├── embedding_model_block_manager.py ├── evictor_v1.py ├── evictor_v2.py ├── interfaces.py └── scheduler.py ├── distributed ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── communication_op.cpython-310.pyc │ ├── communication_op.cpython-311.pyc │ ├── communication_op.cpython-312.pyc │ ├── parallel_state.cpython-310.pyc │ ├── parallel_state.cpython-311.pyc │ ├── parallel_state.cpython-312.pyc │ ├── utils.cpython-310.pyc │ ├── utils.cpython-311.pyc │ └── utils.cpython-312.pyc ├── communication_op.py ├── device_communicators │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── cuda_wrapper.cpython-310.pyc │ │ ├── cuda_wrapper.cpython-311.pyc │ │ ├── custom_all_reduce.cpython-310.pyc │ │ ├── custom_all_reduce.cpython-311.pyc │ │ ├── custom_all_reduce_utils.cpython-310.pyc │ │ ├── custom_all_reduce_utils.cpython-311.pyc │ │ ├── pynccl.cpython-310.pyc │ │ ├── pynccl.cpython-311.pyc │ │ ├── pynccl_wrapper.cpython-310.pyc │ │ ├── pynccl_wrapper.cpython-311.pyc │ │ ├── shm_broadcast.cpython-310.pyc │ │ ├── shm_broadcast.cpython-311.pyc │ │ ├── tpu_communicator.cpython-310.pyc │ │ └── tpu_communicator.cpython-311.pyc │ ├── cuda_wrapper.py │ ├── custom_all_reduce.py │ ├── custom_all_reduce_utils.py │ ├── pynccl.py │ ├── pynccl_wrapper.py │ ├── shm_broadcast.py │ └── tpu_communicator.py ├── parallel_state.py └── utils.py ├── engine ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── arg_utils.cpython-310.pyc │ ├── arg_utils.cpython-311.pyc │ ├── arg_utils.cpython-312.pyc │ ├── async_llm_engine.cpython-310.pyc │ ├── async_llm_engine.cpython-311.pyc │ ├── async_timeout.cpython-310.pyc │ ├── async_timeout.cpython-311.pyc │ ├── llm_engine.cpython-310.pyc │ ├── llm_engine.cpython-311.pyc │ ├── metrics.cpython-311.pyc │ ├── metrics_types.cpython-310.pyc │ ├── metrics_types.cpython-311.pyc │ └── protocol.cpython-311.pyc ├── arg_utils.py ├── async_llm_engine.py ├── async_timeout.py ├── llm_engine.py ├── metrics.py ├── metrics_types.py ├── multiprocessing │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── client.cpython-311.pyc │ │ └── engine.cpython-311.pyc │ ├── client.py │ └── engine.py ├── output_processor │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── interfaces.cpython-310.pyc │ │ ├── interfaces.cpython-311.pyc │ │ ├── multi_step.cpython-311.pyc │ │ ├── single_step.cpython-310.pyc │ │ ├── single_step.cpython-311.pyc │ │ ├── stop_checker.cpython-310.pyc │ │ ├── stop_checker.cpython-311.pyc │ │ ├── util.cpython-310.pyc │ │ └── util.cpython-311.pyc │ ├── interfaces.py │ ├── multi_step.py │ ├── single_step.py │ ├── stop_checker.py │ └── util.py └── protocol.py ├── entrypoints ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── api_server.cpython-311.pyc │ ├── chat_utils.cpython-310.pyc │ ├── chat_utils.cpython-311.pyc │ ├── launcher.cpython-311.pyc │ ├── llm.cpython-310.pyc │ ├── llm.cpython-311.pyc │ └── logger.cpython-311.pyc ├── api_server.py ├── chat_utils.py ├── launcher.py ├── llm.py ├── logger.py └── openai │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── api_server.cpython-311.pyc │ ├── cli_args.cpython-311.pyc │ ├── logits_processors.cpython-310.pyc │ ├── logits_processors.cpython-311.pyc │ ├── protocol.cpython-310.pyc │ ├── protocol.cpython-311.pyc │ ├── run_batch.cpython-311.pyc │ ├── serving_chat.cpython-311.pyc │ ├── serving_completion.cpython-311.pyc │ ├── serving_embedding.cpython-311.pyc │ ├── serving_engine.cpython-311.pyc │ └── serving_tokenization.cpython-311.pyc │ ├── api_server.py │ ├── cli_args.py │ ├── logits_processors.py │ ├── protocol.py │ ├── run_batch.py │ ├── serving_chat.py │ ├── serving_completion.py │ ├── serving_embedding.py │ ├── serving_engine.py │ ├── serving_tokenization.py │ └── tool_parsers │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── abstract_tool_parser.cpython-311.pyc │ ├── hermes_tool_parser.cpython-311.pyc │ ├── mistral_tool_parser.cpython-311.pyc │ └── utils.cpython-311.pyc │ ├── abstract_tool_parser.py │ ├── hermes_tool_parser.py │ ├── mistral_tool_parser.py │ └── utils.py ├── envs.py ├── examples └── .gitignore ├── executor ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── cpu_executor.cpython-311.pyc │ ├── distributed_gpu_executor.cpython-311.pyc │ ├── executor_base.cpython-310.pyc │ ├── executor_base.cpython-311.pyc │ ├── gpu_executor.cpython-310.pyc │ ├── gpu_executor.cpython-311.pyc │ ├── msgspec_utils.cpython-310.pyc │ ├── msgspec_utils.cpython-311.pyc │ ├── multiproc_gpu_executor.cpython-311.pyc │ ├── multiproc_worker_utils.cpython-311.pyc │ ├── multiproc_xpu_executor.cpython-311.pyc │ ├── neuron_executor.cpython-311.pyc │ ├── openvino_executor.cpython-311.pyc │ ├── ray_gpu_executor.cpython-311.pyc │ ├── ray_tpu_executor.cpython-311.pyc │ ├── ray_utils.cpython-310.pyc │ ├── ray_utils.cpython-311.pyc │ ├── ray_xpu_executor.cpython-311.pyc │ ├── tpu_executor.cpython-311.pyc │ └── xpu_executor.cpython-311.pyc ├── cpu_executor.py ├── distributed_gpu_executor.py ├── executor_base.py ├── gpu_executor.py ├── msgspec_utils.py ├── multiproc_gpu_executor.py ├── multiproc_worker_utils.py ├── multiproc_xpu_executor.py ├── neuron_executor.py ├── openvino_executor.py ├── ray_gpu_executor.py ├── ray_tpu_executor.py ├── ray_utils.py ├── ray_xpu_executor.py ├── tpu_executor.py └── xpu_executor.py ├── inputs ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── data.cpython-310.pyc │ ├── data.cpython-311.pyc │ ├── data.cpython-312.pyc │ ├── parse.cpython-310.pyc │ ├── parse.cpython-311.pyc │ ├── parse.cpython-312.pyc │ ├── preprocess.cpython-310.pyc │ ├── preprocess.cpython-311.pyc │ ├── registry.cpython-310.pyc │ ├── registry.cpython-311.pyc │ └── registry.cpython-312.pyc ├── data.py ├── parse.py ├── preprocess.py └── registry.py ├── logger.py ├── logging ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── formatter.cpython-310.pyc │ ├── formatter.cpython-311.pyc │ └── formatter.cpython-312.pyc └── formatter.py ├── lora ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── fully_sharded_layers.cpython-310.pyc │ ├── fully_sharded_layers.cpython-311.pyc │ ├── layers.cpython-310.pyc │ ├── layers.cpython-311.pyc │ ├── lora.cpython-310.pyc │ ├── lora.cpython-311.pyc │ ├── models.cpython-310.pyc │ ├── models.cpython-311.pyc │ ├── punica.cpython-310.pyc │ ├── punica.cpython-311.pyc │ ├── request.cpython-310.pyc │ ├── request.cpython-311.pyc │ ├── request.cpython-312.pyc │ ├── utils.cpython-310.pyc │ ├── utils.cpython-311.pyc │ ├── worker_manager.cpython-310.pyc │ └── worker_manager.cpython-311.pyc ├── fully_sharded_layers.py ├── layers.py ├── lora.py ├── models.py ├── ops │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── bgmv_expand.cpython-310.pyc │ │ ├── bgmv_expand.cpython-311.pyc │ │ ├── bgmv_expand_slice.cpython-310.pyc │ │ ├── bgmv_expand_slice.cpython-311.pyc │ │ ├── bgmv_shrink.cpython-310.pyc │ │ ├── bgmv_shrink.cpython-311.pyc │ │ ├── sgmv_expand.cpython-310.pyc │ │ ├── sgmv_expand.cpython-311.pyc │ │ ├── sgmv_expand_slice.cpython-310.pyc │ │ ├── sgmv_expand_slice.cpython-311.pyc │ │ ├── sgmv_shrink.cpython-310.pyc │ │ ├── sgmv_shrink.cpython-311.pyc │ │ ├── utils.cpython-310.pyc │ │ └── utils.cpython-311.pyc │ ├── bgmv_expand.py │ ├── bgmv_expand_slice.py │ ├── bgmv_shrink.py │ ├── sgmv_expand.py │ ├── sgmv_expand_slice.py │ ├── sgmv_shrink.py │ └── utils.py ├── punica.py ├── request.py ├── utils.py └── worker_manager.py ├── model_executor ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── custom_op.cpython-310.pyc │ ├── custom_op.cpython-311.pyc │ ├── custom_op.cpython-312.pyc │ ├── parameter.cpython-310.pyc │ ├── parameter.cpython-311.pyc │ ├── parameter.cpython-312.pyc │ ├── pooling_metadata.cpython-310.pyc │ ├── pooling_metadata.cpython-311.pyc │ ├── sampling_metadata.cpython-310.pyc │ ├── sampling_metadata.cpython-311.pyc │ ├── sampling_metadata.cpython-312.pyc │ ├── utils.cpython-310.pyc │ ├── utils.cpython-311.pyc │ └── utils.cpython-312.pyc ├── custom_op.py ├── guided_decoding │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── guided_fields.cpython-310.pyc │ │ ├── guided_fields.cpython-311.pyc │ │ ├── lm_format_enforcer_decoding.cpython-311.pyc │ │ ├── outlines_decoding.cpython-311.pyc │ │ └── outlines_logits_processors.cpython-311.pyc │ ├── guided_fields.py │ ├── lm_format_enforcer_decoding.py │ ├── outlines_decoding.py │ └── outlines_logits_processors.py ├── layers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── __init__.cpython-312.pyc │ │ ├── activation.cpython-310.pyc │ │ ├── activation.cpython-311.pyc │ │ ├── layernorm.cpython-310.pyc │ │ ├── layernorm.cpython-311.pyc │ │ ├── linear.cpython-310.pyc │ │ ├── linear.cpython-311.pyc │ │ ├── linear.cpython-312.pyc │ │ ├── logits_processor.cpython-310.pyc │ │ ├── logits_processor.cpython-311.pyc │ │ ├── pooler.cpython-311.pyc │ │ ├── rejection_sampler.cpython-311.pyc │ │ ├── resampler.cpython-311.pyc │ │ ├── rotary_embedding.cpython-310.pyc │ │ ├── rotary_embedding.cpython-311.pyc │ │ ├── sampler.cpython-310.pyc │ │ ├── sampler.cpython-311.pyc │ │ ├── spec_decode_base_sampler.cpython-310.pyc │ │ ├── spec_decode_base_sampler.cpython-311.pyc │ │ ├── spec_decode_base_sampler.cpython-312.pyc │ │ ├── typical_acceptance_sampler.cpython-311.pyc │ │ ├── vocab_parallel_embedding.cpython-310.pyc │ │ ├── vocab_parallel_embedding.cpython-311.pyc │ │ └── vocab_parallel_embedding.cpython-312.pyc │ ├── activation.py │ ├── fused_moe │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── __init__.cpython-312.pyc │ │ │ ├── fused_marlin_moe.cpython-310.pyc │ │ │ ├── fused_marlin_moe.cpython-311.pyc │ │ │ ├── fused_marlin_moe.cpython-312.pyc │ │ │ ├── fused_moe.cpython-310.pyc │ │ │ ├── fused_moe.cpython-311.pyc │ │ │ ├── fused_moe.cpython-312.pyc │ │ │ ├── layer.cpython-310.pyc │ │ │ ├── layer.cpython-311.pyc │ │ │ ├── layer.cpython-312.pyc │ │ │ └── moe_pallas.cpython-311.pyc │ │ ├── configs │ │ │ ├── E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ ├── E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ ├── E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ ├── E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=14336,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=1792,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=3584,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=7168,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ └── E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ ├── fused_marlin_moe.py │ │ ├── fused_moe.py │ │ ├── layer.py │ │ └── moe_pallas.py │ ├── layernorm.py │ ├── linear.py │ ├── logits_processor.py │ ├── mamba │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-311.pyc │ │ └── ops │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── causal_conv1d.cpython-311.pyc │ │ │ └── mamba_ssm.cpython-311.pyc │ │ │ ├── causal_conv1d.py │ │ │ └── mamba_ssm.py │ ├── pooler.py │ ├── quantization │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── __init__.cpython-312.pyc │ │ │ ├── aqlm.cpython-310.pyc │ │ │ ├── aqlm.cpython-311.pyc │ │ │ ├── aqlm.cpython-312.pyc │ │ │ ├── awq.cpython-310.pyc │ │ │ ├── awq.cpython-311.pyc │ │ │ ├── awq.cpython-312.pyc │ │ │ ├── awq_marlin.cpython-310.pyc │ │ │ ├── awq_marlin.cpython-311.pyc │ │ │ ├── awq_marlin.cpython-312.pyc │ │ │ ├── awq_triton.cpython-311.pyc │ │ │ ├── base_config.cpython-310.pyc │ │ │ ├── base_config.cpython-311.pyc │ │ │ ├── base_config.cpython-312.pyc │ │ │ ├── bitsandbytes.cpython-310.pyc │ │ │ ├── bitsandbytes.cpython-311.pyc │ │ │ ├── bitsandbytes.cpython-312.pyc │ │ │ ├── deepspeedfp.cpython-310.pyc │ │ │ ├── deepspeedfp.cpython-311.pyc │ │ │ ├── deepspeedfp.cpython-312.pyc │ │ │ ├── experts_int8.cpython-310.pyc │ │ │ ├── experts_int8.cpython-311.pyc │ │ │ ├── experts_int8.cpython-312.pyc │ │ │ ├── fbgemm_fp8.cpython-310.pyc │ │ │ ├── fbgemm_fp8.cpython-311.pyc │ │ │ ├── fbgemm_fp8.cpython-312.pyc │ │ │ ├── fp8.cpython-310.pyc │ │ │ ├── fp8.cpython-311.pyc │ │ │ ├── fp8.cpython-312.pyc │ │ │ ├── gguf.cpython-310.pyc │ │ │ ├── gguf.cpython-311.pyc │ │ │ ├── gguf.cpython-312.pyc │ │ │ ├── gptq.cpython-310.pyc │ │ │ ├── gptq.cpython-311.pyc │ │ │ ├── gptq.cpython-312.pyc │ │ │ ├── gptq_marlin.cpython-311.pyc │ │ │ ├── gptq_marlin.cpython-312.pyc │ │ │ ├── gptq_marlin_24.cpython-310.pyc │ │ │ ├── gptq_marlin_24.cpython-311.pyc │ │ │ ├── gptq_marlin_24.cpython-312.pyc │ │ │ ├── kv_cache.cpython-310.pyc │ │ │ ├── kv_cache.cpython-311.pyc │ │ │ ├── kv_cache.cpython-312.pyc │ │ │ ├── marlin.cpython-310.pyc │ │ │ ├── marlin.cpython-311.pyc │ │ │ ├── marlin.cpython-312.pyc │ │ │ ├── mixq.cpython-310.pyc │ │ │ ├── mixq.cpython-311.pyc │ │ │ ├── mixq.cpython-312.pyc │ │ │ ├── mixq4bit.cpython-310.pyc │ │ │ ├── mixq4bit.cpython-311.pyc │ │ │ ├── modelopt.cpython-310.pyc │ │ │ ├── modelopt.cpython-311.pyc │ │ │ ├── modelopt.cpython-312.pyc │ │ │ ├── neuron_quant.cpython-310.pyc │ │ │ ├── neuron_quant.cpython-311.pyc │ │ │ ├── neuron_quant.cpython-312.pyc │ │ │ ├── qqq.cpython-310.pyc │ │ │ ├── qqq.cpython-311.pyc │ │ │ ├── qqq.cpython-312.pyc │ │ │ ├── schema.cpython-310.pyc │ │ │ ├── schema.cpython-311.pyc │ │ │ ├── tpu_int8.cpython-310.pyc │ │ │ ├── tpu_int8.cpython-311.pyc │ │ │ └── tpu_int8.cpython-312.pyc │ │ ├── aqlm.py │ │ ├── awq.py │ │ ├── awq_marlin.py │ │ ├── awq_triton.py │ │ ├── base_config.py │ │ ├── bitsandbytes.py │ │ ├── compressed_tensors │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── __init__.cpython-311.pyc │ │ │ │ ├── __init__.cpython-312.pyc │ │ │ │ ├── compressed_tensors.cpython-310.pyc │ │ │ │ ├── compressed_tensors.cpython-311.pyc │ │ │ │ ├── compressed_tensors.cpython-312.pyc │ │ │ │ ├── compressed_tensors_moe.cpython-310.pyc │ │ │ │ ├── compressed_tensors_moe.cpython-311.pyc │ │ │ │ ├── compressed_tensors_moe.cpython-312.pyc │ │ │ │ ├── utils.cpython-310.pyc │ │ │ │ ├── utils.cpython-311.pyc │ │ │ │ └── utils.cpython-312.pyc │ │ │ ├── compressed_tensors.py │ │ │ ├── compressed_tensors_moe.py │ │ │ ├── schemes │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── __init__.cpython-311.pyc │ │ │ │ │ ├── __init__.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_scheme.cpython-310.pyc │ │ │ │ │ ├── compressed_tensors_scheme.cpython-311.pyc │ │ │ │ │ ├── compressed_tensors_scheme.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_w4a16_24.cpython-310.pyc │ │ │ │ │ ├── compressed_tensors_w4a16_24.cpython-311.pyc │ │ │ │ │ ├── compressed_tensors_w4a16_24.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_w8a16_fp8.cpython-310.pyc │ │ │ │ │ ├── compressed_tensors_w8a16_fp8.cpython-311.pyc │ │ │ │ │ ├── compressed_tensors_w8a16_fp8.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_w8a8_fp8.cpython-310.pyc │ │ │ │ │ ├── compressed_tensors_w8a8_fp8.cpython-311.pyc │ │ │ │ │ ├── compressed_tensors_w8a8_fp8.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_w8a8_int8.cpython-310.pyc │ │ │ │ │ ├── compressed_tensors_w8a8_int8.cpython-311.pyc │ │ │ │ │ ├── compressed_tensors_w8a8_int8.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_wNa16.cpython-310.pyc │ │ │ │ │ ├── compressed_tensors_wNa16.cpython-311.pyc │ │ │ │ │ └── compressed_tensors_wNa16.cpython-312.pyc │ │ │ │ ├── compressed_tensors_scheme.py │ │ │ │ ├── compressed_tensors_w4a16_24.py │ │ │ │ ├── compressed_tensors_w8a16_fp8.py │ │ │ │ ├── compressed_tensors_w8a8_fp8.py │ │ │ │ ├── compressed_tensors_w8a8_int8.py │ │ │ │ └── compressed_tensors_wNa16.py │ │ │ └── utils.py │ │ ├── deepspeedfp.py │ │ ├── experts_int8.py │ │ ├── fbgemm_fp8.py │ │ ├── fp8.py │ │ ├── gguf.py │ │ ├── gptq.py │ │ ├── gptq_marlin.py │ │ ├── gptq_marlin_24.py │ │ ├── kernels │ │ │ ├── MPLinearKernel.py │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── MPLinearKernel.cpython-310.pyc │ │ │ │ ├── MPLinearKernel.cpython-311.pyc │ │ │ │ ├── MPLinearKernel.cpython-312.pyc │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── __init__.cpython-311.pyc │ │ │ │ ├── __init__.cpython-312.pyc │ │ │ │ ├── machete.cpython-310.pyc │ │ │ │ ├── machete.cpython-311.pyc │ │ │ │ ├── machete.cpython-312.pyc │ │ │ │ ├── marlin.cpython-310.pyc │ │ │ │ ├── marlin.cpython-311.pyc │ │ │ │ └── marlin.cpython-312.pyc │ │ │ ├── machete.py │ │ │ └── marlin.py │ │ ├── kv_cache.py │ │ ├── marlin.py │ │ ├── mixq.py │ │ ├── mixq4bit.py │ │ ├── modelopt.py │ │ ├── neuron_quant.py │ │ ├── qqq.py │ │ ├── schema.py │ │ ├── tpu_int8.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── __init__.cpython-312.pyc │ │ │ ├── layer_utils.cpython-310.pyc │ │ │ ├── layer_utils.cpython-311.pyc │ │ │ ├── layer_utils.cpython-312.pyc │ │ │ ├── machete_utils.cpython-310.pyc │ │ │ ├── machete_utils.cpython-311.pyc │ │ │ ├── machete_utils.cpython-312.pyc │ │ │ ├── marlin_utils.cpython-310.pyc │ │ │ ├── marlin_utils.cpython-311.pyc │ │ │ ├── marlin_utils.cpython-312.pyc │ │ │ ├── marlin_utils_fp8.cpython-310.pyc │ │ │ ├── marlin_utils_fp8.cpython-311.pyc │ │ │ ├── marlin_utils_fp8.cpython-312.pyc │ │ │ ├── marlin_utils_test.cpython-311.pyc │ │ │ ├── marlin_utils_test_24.cpython-311.pyc │ │ │ ├── marlin_utils_test_qqq.cpython-311.pyc │ │ │ ├── quant_utils.cpython-310.pyc │ │ │ ├── quant_utils.cpython-311.pyc │ │ │ ├── quant_utils.cpython-312.pyc │ │ │ ├── w8a8_utils.cpython-310.pyc │ │ │ ├── w8a8_utils.cpython-311.pyc │ │ │ └── w8a8_utils.cpython-312.pyc │ │ │ ├── layer_utils.py │ │ │ ├── machete_utils.py │ │ │ ├── marlin_utils.py │ │ │ ├── marlin_utils_fp8.py │ │ │ ├── marlin_utils_test.py │ │ │ ├── marlin_utils_test_24.py │ │ │ ├── marlin_utils_test_qqq.py │ │ │ ├── quant_utils.py │ │ │ └── w8a8_utils.py │ ├── rejection_sampler.py │ ├── resampler.py │ ├── rotary_embedding.py │ ├── sampler.py │ ├── spec_decode_base_sampler.py │ ├── typical_acceptance_sampler.py │ └── vocab_parallel_embedding.py ├── model_loader │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── loader.cpython-310.pyc │ │ ├── loader.cpython-311.pyc │ │ ├── neuron.cpython-311.pyc │ │ ├── openvino.cpython-311.pyc │ │ ├── tensorizer.cpython-310.pyc │ │ ├── tensorizer.cpython-311.pyc │ │ ├── utils.cpython-310.pyc │ │ ├── utils.cpython-311.pyc │ │ ├── weight_utils.cpython-310.pyc │ │ └── weight_utils.cpython-311.pyc │ ├── loader.py │ ├── neuron.py │ ├── openvino.py │ ├── tensorizer.py │ ├── utils.py │ └── weight_utils.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── arctic.cpython-311.pyc │ │ ├── baichuan.cpython-311.pyc │ │ ├── bart.cpython-311.pyc │ │ ├── blip.cpython-311.pyc │ │ ├── blip2.cpython-311.pyc │ │ ├── bloom.cpython-311.pyc │ │ ├── chameleon.cpython-311.pyc │ │ ├── chatglm.cpython-311.pyc │ │ ├── clip.cpython-311.pyc │ │ ├── commandr.cpython-311.pyc │ │ ├── dbrx.cpython-311.pyc │ │ ├── decilm.cpython-311.pyc │ │ ├── deepseek.cpython-311.pyc │ │ ├── deepseek_v2.cpython-311.pyc │ │ ├── eagle.cpython-311.pyc │ │ ├── exaone.cpython-311.pyc │ │ ├── falcon.cpython-311.pyc │ │ ├── fuyu.cpython-311.pyc │ │ ├── gemma.cpython-311.pyc │ │ ├── gemma2.cpython-311.pyc │ │ ├── gpt2.cpython-311.pyc │ │ ├── gpt_bigcode.cpython-311.pyc │ │ ├── gpt_j.cpython-311.pyc │ │ ├── gpt_neox.cpython-311.pyc │ │ ├── granite.cpython-311.pyc │ │ ├── idefics2_vision_model.cpython-311.pyc │ │ ├── interfaces.cpython-310.pyc │ │ ├── interfaces.cpython-311.pyc │ │ ├── intern_vit.cpython-311.pyc │ │ ├── internlm2.cpython-311.pyc │ │ ├── internvl.cpython-311.pyc │ │ ├── jais.cpython-311.pyc │ │ ├── jamba.cpython-311.pyc │ │ ├── llama.cpython-310.pyc │ │ ├── llama.cpython-311.pyc │ │ ├── llama_embedding.cpython-311.pyc │ │ ├── llava.cpython-311.pyc │ │ ├── llava_next.cpython-311.pyc │ │ ├── llava_next_video.cpython-311.pyc │ │ ├── llava_onevision.cpython-311.pyc │ │ ├── medusa.cpython-311.pyc │ │ ├── minicpm.cpython-311.pyc │ │ ├── minicpm3.cpython-311.pyc │ │ ├── minicpmv.cpython-311.pyc │ │ ├── mixtral.cpython-311.pyc │ │ ├── mixtral_quant.cpython-311.pyc │ │ ├── mllama.cpython-311.pyc │ │ ├── mlp_speculator.cpython-311.pyc │ │ ├── mpt.cpython-311.pyc │ │ ├── na_vit.cpython-311.pyc │ │ ├── nemotron.cpython-311.pyc │ │ ├── olmo.cpython-311.pyc │ │ ├── olmoe.cpython-311.pyc │ │ ├── opt.cpython-311.pyc │ │ ├── orion.cpython-311.pyc │ │ ├── paligemma.cpython-311.pyc │ │ ├── persimmon.cpython-311.pyc │ │ ├── phi.cpython-311.pyc │ │ ├── phi3.cpython-311.pyc │ │ ├── phi3_small.cpython-311.pyc │ │ ├── phi3v.cpython-311.pyc │ │ ├── phimoe.cpython-311.pyc │ │ ├── pixtral.cpython-311.pyc │ │ ├── qwen.cpython-311.pyc │ │ ├── qwen2.cpython-310.pyc │ │ ├── qwen2.cpython-311.pyc │ │ ├── qwen2_moe.cpython-311.pyc │ │ ├── qwen2_vl.cpython-311.pyc │ │ ├── siglip.cpython-311.pyc │ │ ├── solar.cpython-311.pyc │ │ ├── stablelm.cpython-311.pyc │ │ ├── starcoder2.cpython-311.pyc │ │ ├── ultravox.cpython-311.pyc │ │ ├── utils.cpython-310.pyc │ │ ├── utils.cpython-311.pyc │ │ └── xverse.cpython-311.pyc │ ├── arctic.py │ ├── baichuan.py │ ├── bart.py │ ├── blip.py │ ├── blip2.py │ ├── bloom.py │ ├── chameleon.py │ ├── chatglm.py │ ├── clip.py │ ├── commandr.py │ ├── dbrx.py │ ├── decilm.py │ ├── deepseek.py │ ├── deepseek_v2.py │ ├── eagle.py │ ├── exaone.py │ ├── falcon.py │ ├── fuyu.py │ ├── gemma.py │ ├── gemma2.py │ ├── gpt2.py │ ├── gpt_bigcode.py │ ├── gpt_j.py │ ├── gpt_neox.py │ ├── granite.py │ ├── idefics2_vision_model.py │ ├── interfaces.py │ ├── intern_vit.py │ ├── internlm2.py │ ├── internvl.py │ ├── jais.py │ ├── jamba.py │ ├── llama.py │ ├── llama_embedding.py │ ├── llava.py │ ├── llava_next.py │ ├── llava_next_video.py │ ├── llava_onevision.py │ ├── medusa.py │ ├── minicpm.py │ ├── minicpm3.py │ ├── minicpmv.py │ ├── mixtral.py │ ├── mixtral_quant.py │ ├── mllama.py │ ├── mlp_speculator.py │ ├── mpt.py │ ├── na_vit.py │ ├── nemotron.py │ ├── olmo.py │ ├── olmoe.py │ ├── opt.py │ ├── orion.py │ ├── paligemma.py │ ├── persimmon.py │ ├── phi.py │ ├── phi3.py │ ├── phi3_small.py │ ├── phi3v.py │ ├── phimoe.py │ ├── pixtral.py │ ├── qwen.py │ ├── qwen2.py │ ├── qwen2_moe.py │ ├── qwen2_vl.py │ ├── siglip.py │ ├── solar.py │ ├── stablelm.py │ ├── starcoder2.py │ ├── ultravox.py │ ├── utils.py │ └── xverse.py ├── parameter.py ├── pooling_metadata.py ├── sampling_metadata.py └── utils.py ├── multimodal ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── audio.cpython-310.pyc │ ├── audio.cpython-311.pyc │ ├── base.cpython-310.pyc │ ├── base.cpython-311.pyc │ ├── image.cpython-310.pyc │ ├── image.cpython-311.pyc │ ├── registry.cpython-310.pyc │ ├── registry.cpython-311.pyc │ ├── utils.cpython-310.pyc │ ├── utils.cpython-311.pyc │ ├── video.cpython-310.pyc │ └── video.cpython-311.pyc ├── audio.py ├── base.py ├── image.py ├── registry.py ├── utils.py └── video.py ├── outputs.py ├── platforms ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── cpu.cpython-311.pyc │ ├── cuda.cpython-310.pyc │ ├── cuda.cpython-311.pyc │ ├── interface.cpython-310.pyc │ ├── interface.cpython-311.pyc │ ├── interface.cpython-312.pyc │ ├── rocm.cpython-311.pyc │ └── tpu.cpython-311.pyc ├── cpu.py ├── cuda.py ├── interface.py ├── rocm.py └── tpu.py ├── plugins ├── __init__.py └── __pycache__ │ ├── __init__.cpython-310.pyc │ └── __init__.cpython-311.pyc ├── pooling_params.py ├── production_monitoring ├── README.md ├── docker-compose.yaml ├── grafana.json └── prometheus.yaml ├── prompt_adapter ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── layers.cpython-310.pyc │ ├── layers.cpython-311.pyc │ ├── models.cpython-310.pyc │ ├── models.cpython-311.pyc │ ├── request.cpython-310.pyc │ ├── request.cpython-311.pyc │ ├── request.cpython-312.pyc │ ├── utils.cpython-310.pyc │ ├── utils.cpython-311.pyc │ ├── worker_manager.cpython-310.pyc │ └── worker_manager.cpython-311.pyc ├── layers.py ├── models.py ├── request.py ├── utils.py └── worker_manager.py ├── py.typed ├── sampling_params.py ├── scalar_type.py ├── scripts.py ├── sequence.py ├── spec_decode ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── batch_expansion.cpython-311.pyc │ ├── draft_model_runner.cpython-311.pyc │ ├── interfaces.cpython-311.pyc │ ├── medusa_worker.cpython-311.pyc │ ├── metrics.cpython-310.pyc │ ├── metrics.cpython-311.pyc │ ├── metrics.cpython-312.pyc │ ├── mlp_speculator_worker.cpython-311.pyc │ ├── multi_step_worker.cpython-311.pyc │ ├── ngram_worker.cpython-311.pyc │ ├── proposer_worker_base.cpython-311.pyc │ ├── smaller_tp_proposer_worker.cpython-311.pyc │ ├── spec_decode_worker.cpython-311.pyc │ ├── target_model_runner.cpython-311.pyc │ ├── top1_proposer.cpython-311.pyc │ └── util.cpython-311.pyc ├── batch_expansion.py ├── draft_model_runner.py ├── interfaces.py ├── medusa_worker.py ├── metrics.py ├── mlp_speculator_worker.py ├── multi_step_worker.py ├── ngram_worker.py ├── proposer_worker_base.py ├── smaller_tp_proposer_worker.py ├── spec_decode_worker.py ├── target_model_runner.py ├── top1_proposer.py └── util.py ├── tracing.py ├── transformers_utils ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── config.cpython-310.pyc │ ├── config.cpython-311.pyc │ ├── detokenizer.cpython-310.pyc │ ├── detokenizer.cpython-311.pyc │ ├── processor.cpython-310.pyc │ ├── processor.cpython-311.pyc │ ├── tokenizer.cpython-310.pyc │ ├── tokenizer.cpython-311.pyc │ ├── utils.cpython-310.pyc │ └── utils.cpython-311.pyc ├── config.py ├── configs │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── arctic.cpython-311.pyc │ │ ├── chatglm.cpython-310.pyc │ │ ├── chatglm.cpython-311.pyc │ │ ├── dbrx.cpython-310.pyc │ │ ├── dbrx.cpython-311.pyc │ │ ├── eagle.cpython-310.pyc │ │ ├── eagle.cpython-311.pyc │ │ ├── exaone.cpython-310.pyc │ │ ├── exaone.cpython-311.pyc │ │ ├── falcon.cpython-310.pyc │ │ ├── falcon.cpython-311.pyc │ │ ├── granite.cpython-310.pyc │ │ ├── granite.cpython-311.pyc │ │ ├── internvl.cpython-310.pyc │ │ ├── internvl.cpython-311.pyc │ │ ├── jais.cpython-310.pyc │ │ ├── jais.cpython-311.pyc │ │ ├── medusa.cpython-310.pyc │ │ ├── medusa.cpython-311.pyc │ │ ├── mllama.cpython-311.pyc │ │ ├── mlp_speculator.cpython-310.pyc │ │ ├── mlp_speculator.cpython-311.pyc │ │ ├── mpt.cpython-310.pyc │ │ ├── mpt.cpython-311.pyc │ │ ├── nemotron.cpython-310.pyc │ │ ├── nemotron.cpython-311.pyc │ │ ├── solar.cpython-310.pyc │ │ ├── solar.cpython-311.pyc │ │ ├── ultravox.cpython-310.pyc │ │ └── ultravox.cpython-311.pyc │ ├── arctic.py │ ├── chatglm.py │ ├── dbrx.py │ ├── eagle.py │ ├── exaone.py │ ├── falcon.py │ ├── granite.py │ ├── internvl.py │ ├── jais.py │ ├── medusa.py │ ├── mllama.py │ ├── mlp_speculator.py │ ├── mpt.py │ ├── nemotron.py │ ├── solar.py │ └── ultravox.py ├── detokenizer.py ├── processor.py ├── tokenizer.py ├── tokenizer_group │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── base_tokenizer_group.cpython-310.pyc │ │ ├── base_tokenizer_group.cpython-311.pyc │ │ ├── ray_tokenizer_group.cpython-310.pyc │ │ ├── ray_tokenizer_group.cpython-311.pyc │ │ ├── tokenizer_group.cpython-310.pyc │ │ └── tokenizer_group.cpython-311.pyc │ ├── base_tokenizer_group.py │ ├── ray_tokenizer_group.py │ └── tokenizer_group.py ├── tokenizers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── baichuan.cpython-310.pyc │ │ ├── baichuan.cpython-311.pyc │ │ ├── mistral.cpython-310.pyc │ │ └── mistral.cpython-311.pyc │ ├── baichuan.py │ └── mistral.py └── utils.py ├── triton_utils ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── custom_cache_manager.cpython-310.pyc │ ├── custom_cache_manager.cpython-311.pyc │ ├── custom_cache_manager.cpython-312.pyc │ ├── importing.cpython-310.pyc │ ├── importing.cpython-311.pyc │ ├── importing.cpython-312.pyc │ ├── libentry.cpython-310.pyc │ ├── libentry.cpython-311.pyc │ └── libentry.cpython-312.pyc ├── custom_cache_manager.py ├── importing.py └── libentry.py ├── usage ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── usage_lib.cpython-310.pyc │ └── usage_lib.cpython-311.pyc └── usage_lib.py ├── utils.py ├── version.py └── worker ├── __init__.py ├── __pycache__ ├── __init__.cpython-310.pyc ├── __init__.cpython-311.pyc ├── cache_engine.cpython-310.pyc ├── cache_engine.cpython-311.pyc ├── cpu_model_runner.cpython-311.pyc ├── cpu_worker.cpython-311.pyc ├── embedding_model_runner.cpython-310.pyc ├── embedding_model_runner.cpython-311.pyc ├── enc_dec_model_runner.cpython-310.pyc ├── enc_dec_model_runner.cpython-311.pyc ├── model_runner.cpython-310.pyc ├── model_runner.cpython-311.pyc ├── model_runner_base.cpython-310.pyc ├── model_runner_base.cpython-311.pyc ├── multi_step_model_runner.cpython-311.pyc ├── multi_step_tpu_worker.cpython-311.pyc ├── multi_step_worker.cpython-311.pyc ├── neuron_model_runner.cpython-311.pyc ├── neuron_worker.cpython-311.pyc ├── openvino_model_runner.cpython-311.pyc ├── openvino_worker.cpython-311.pyc ├── tpu_model_runner.cpython-311.pyc ├── tpu_worker.cpython-311.pyc ├── utils.cpython-310.pyc ├── utils.cpython-311.pyc ├── worker.cpython-310.pyc ├── worker.cpython-311.pyc ├── worker_base.cpython-310.pyc ├── worker_base.cpython-311.pyc ├── xpu_model_runner.cpython-311.pyc └── xpu_worker.cpython-311.pyc ├── cache_engine.py ├── cpu_model_runner.py ├── cpu_worker.py ├── embedding_model_runner.py ├── enc_dec_model_runner.py ├── model_runner.py ├── model_runner_base.py ├── multi_step_model_runner.py ├── multi_step_tpu_worker.py ├── multi_step_worker.py ├── neuron_model_runner.py ├── neuron_worker.py ├── openvino_model_runner.py ├── openvino_worker.py ├── tpu_model_runner.py ├── tpu_worker.py ├── utils.py ├── worker.py ├── worker_base.py ├── xpu_model_runner.py └── xpu_worker.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/.gitignore -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/README.MD -------------------------------------------------------------------------------- /download_mmlu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/download_mmlu.sh -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/.gitignore -------------------------------------------------------------------------------- /examples/__pycache__/prompt_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/__pycache__/prompt_utils.cpython-310.pyc -------------------------------------------------------------------------------- /examples/__pycache__/prompt_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/__pycache__/prompt_utils.cpython-311.pyc -------------------------------------------------------------------------------- /examples/api_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/api_client.py -------------------------------------------------------------------------------- /examples/aqlm_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/aqlm_example.py -------------------------------------------------------------------------------- /examples/benchAcc/evalppl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/evalppl.py -------------------------------------------------------------------------------- /examples/benchAcc/runfloat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/runfloat.sh -------------------------------------------------------------------------------- /examples/benchAcc/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/__init__.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/eval_tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/eval_tasks/__init__.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/eval_tasks/_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/eval_tasks/_base.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/eval_tasks/_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/__init__.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/_base.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/_const.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/_const.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/_utils.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/auto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/auto.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/baichuan.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/bloom.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/codegen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/codegen.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/gpt2.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/gpt_bigcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/gpt_bigcode.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/gpt_neox.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/gptj.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/internlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/internlm.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/llama.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/moss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/moss.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/opt.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/qwen.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/rw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/modeling/rw.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/_fused_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/nn_modules/_fused_base.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/fused_gptj_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/nn_modules/fused_gptj_attn.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/fused_llama_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/nn_modules/fused_llama_attn.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/fused_llama_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/nn_modules/fused_llama_mlp.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/qlinear/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/nn_modules/qlinear/__init__.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/qlinear/qlinear_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/nn_modules/qlinear/qlinear_cuda.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/triton_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/triton_utils/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/nn_modules/triton_utils/kernels.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/triton_utils/mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/nn_modules/triton_utils/mixin.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/ACKNOWLEDGEMENT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/quantization/ACKNOWLEDGEMENT.md -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/quantization/__init__.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/quantization/gptq.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/quantization/quantizer.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/utils/__init__.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/utils/data_utils.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/exllama_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/utils/exllama_utils.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/utils/import_utils.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/peft_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/utils/peft_utils.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/perplexity_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/benchAcc/utils/utils/perplexity_utils.py -------------------------------------------------------------------------------- /examples/download_mmlu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/download_mmlu.sh -------------------------------------------------------------------------------- /examples/fp8/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/fp8/README.md -------------------------------------------------------------------------------- /examples/fp8/extract_scales.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/fp8/extract_scales.py -------------------------------------------------------------------------------- /examples/fp8/quantizer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/fp8/quantizer/README.md -------------------------------------------------------------------------------- /examples/fp8/quantizer/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/fp8/quantizer/quantize.py -------------------------------------------------------------------------------- /examples/gradio_openai_chatbot_webserver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/gradio_openai_chatbot_webserver.py -------------------------------------------------------------------------------- /examples/gradio_webserver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/gradio_webserver.py -------------------------------------------------------------------------------- /examples/gradio_webui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/gradio_webui.py -------------------------------------------------------------------------------- /examples/input.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/input.pt -------------------------------------------------------------------------------- /examples/lenovo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/lenovo.jpg -------------------------------------------------------------------------------- /examples/lenovo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/lenovo.py -------------------------------------------------------------------------------- /examples/llava_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/llava_example.py -------------------------------------------------------------------------------- /examples/llm_engine_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/llm_engine_example.py -------------------------------------------------------------------------------- /examples/logging_configuration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/logging_configuration.md -------------------------------------------------------------------------------- /examples/mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/mmlu.py -------------------------------------------------------------------------------- /examples/multilora_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/multilora_inference.py -------------------------------------------------------------------------------- /examples/offline_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/offline_inference.py -------------------------------------------------------------------------------- /examples/offline_inference_distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/offline_inference_distributed.py -------------------------------------------------------------------------------- /examples/offline_inference_neuron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/offline_inference_neuron.py -------------------------------------------------------------------------------- /examples/offline_inference_with_prefix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/offline_inference_with_prefix.py -------------------------------------------------------------------------------- /examples/openai_chat_completion_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/openai_chat_completion_client.py -------------------------------------------------------------------------------- /examples/openai_completion_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/openai_completion_client.py -------------------------------------------------------------------------------- /examples/production_monitoring/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/production_monitoring/README.md -------------------------------------------------------------------------------- /examples/production_monitoring/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/production_monitoring/docker-compose.yaml -------------------------------------------------------------------------------- /examples/production_monitoring/grafana.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/production_monitoring/grafana.json -------------------------------------------------------------------------------- /examples/production_monitoring/prometheus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/production_monitoring/prometheus.yaml -------------------------------------------------------------------------------- /examples/prompt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/prompt_utils.py -------------------------------------------------------------------------------- /examples/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/server.py -------------------------------------------------------------------------------- /examples/tensorize_vllm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/tensorize_vllm_model.py -------------------------------------------------------------------------------- /examples/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/test.py -------------------------------------------------------------------------------- /examples/test4bit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/test4bit.py -------------------------------------------------------------------------------- /examples/test4bitchatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/test4bitchatglm.py -------------------------------------------------------------------------------- /examples/test8bit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/test8bit.py -------------------------------------------------------------------------------- /examples/test8bitLongSeqLlama3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/test8bitLongSeqLlama3.py -------------------------------------------------------------------------------- /examples/test8bitchatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/test8bitchatglm.py -------------------------------------------------------------------------------- /examples/test8bitqwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/test8bitqwen2.py -------------------------------------------------------------------------------- /examples/testawq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/examples/testawq.py -------------------------------------------------------------------------------- /figures/awq32.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/figures/awq32.gif -------------------------------------------------------------------------------- /figures/awq512.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/figures/awq512.gif -------------------------------------------------------------------------------- /figures/mixq32.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/figures/mixq32.gif -------------------------------------------------------------------------------- /figures/mixq512.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/figures/mixq512.gif -------------------------------------------------------------------------------- /figures/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/figures/output.png -------------------------------------------------------------------------------- /figures/textmixq.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/figures/textmixq.jpg -------------------------------------------------------------------------------- /gradio_openai_chatbot_webserver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/gradio_openai_chatbot_webserver.py -------------------------------------------------------------------------------- /gradio_webserver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/gradio_webserver.py -------------------------------------------------------------------------------- /mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/mmlu.py -------------------------------------------------------------------------------- /out.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/out.txt -------------------------------------------------------------------------------- /out2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/out2.txt -------------------------------------------------------------------------------- /test4bit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/test4bit.py -------------------------------------------------------------------------------- /test4bitchatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/test4bitchatglm.py -------------------------------------------------------------------------------- /test8bit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/test8bit.py -------------------------------------------------------------------------------- /test8bitLongSeqLlama3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/test8bitLongSeqLlama3.py -------------------------------------------------------------------------------- /test8bitchatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/test8bitchatglm.py -------------------------------------------------------------------------------- /test8bitqwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/test8bitqwen2.py -------------------------------------------------------------------------------- /testawq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/testawq.py -------------------------------------------------------------------------------- /testmmlu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/testmmlu.sh -------------------------------------------------------------------------------- /vllm/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/.gitignore -------------------------------------------------------------------------------- /vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__init__.py -------------------------------------------------------------------------------- /vllm/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_core_ext.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/_core_ext.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_core_ext.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/_core_ext.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_core_ext.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/_core_ext.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_custom_ops.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/_custom_ops.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_custom_ops.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/_custom_ops.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_custom_ops.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/_custom_ops.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_ipex_ops.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/_ipex_ops.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_version.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/_version.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_version.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/_version.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/block.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/block.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/block.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/block.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/config.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/config.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/config.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/config.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/connections.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/connections.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/connections.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/connections.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/envs.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/envs.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/envs.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/envs.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/envs.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/envs.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/logger.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/logger.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/logger.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/logger.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/logger.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/logger.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/outputs.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/outputs.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/outputs.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/outputs.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/pooling_params.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/pooling_params.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/pooling_params.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/pooling_params.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/pooling_params.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/pooling_params.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/sampling_params.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/sampling_params.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/sampling_params.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/sampling_params.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/sampling_params.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/sampling_params.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/scalar_type.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/scalar_type.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/scalar_type.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/scalar_type.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/scalar_type.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/scalar_type.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/scripts.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/scripts.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/sequence.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/sequence.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/sequence.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/sequence.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/sequence.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/sequence.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/tracing.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/tracing.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/tracing.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/tracing.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/version.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/version.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/version.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/__pycache__/version.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/_core_ext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/_core_ext.py -------------------------------------------------------------------------------- /vllm/_custom_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/_custom_ops.py -------------------------------------------------------------------------------- /vllm/_ipex_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/_ipex_ops.py -------------------------------------------------------------------------------- /vllm/_version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/_version.py -------------------------------------------------------------------------------- /vllm/adapter_commons/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/layers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/layers.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/layers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/layers.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/models.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/models.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/models.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/models.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/request.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/request.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/request.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/request.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/request.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/request.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/layers.py -------------------------------------------------------------------------------- /vllm/adapter_commons/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/models.py -------------------------------------------------------------------------------- /vllm/adapter_commons/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/request.py -------------------------------------------------------------------------------- /vllm/adapter_commons/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/utils.py -------------------------------------------------------------------------------- /vllm/adapter_commons/worker_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/adapter_commons/worker_manager.py -------------------------------------------------------------------------------- /vllm/assets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/assets/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/assets/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/assets/__pycache__/audio.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/assets/__pycache__/audio.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/assets/__pycache__/base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/assets/__pycache__/base.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/assets/__pycache__/image.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/assets/__pycache__/image.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/assets/__pycache__/video.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/assets/__pycache__/video.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/assets/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/assets/audio.py -------------------------------------------------------------------------------- /vllm/assets/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/assets/base.py -------------------------------------------------------------------------------- /vllm/assets/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/assets/image.py -------------------------------------------------------------------------------- /vllm/assets/video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/assets/video.py -------------------------------------------------------------------------------- /vllm/attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/__init__.py -------------------------------------------------------------------------------- /vllm/attention/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/attention/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/__pycache__/layer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/__pycache__/layer.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/attention/__pycache__/layer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/__pycache__/layer.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/__pycache__/selector.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/__pycache__/selector.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/attention/__pycache__/selector.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/__pycache__/selector.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/pallas.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/__pycache__/pallas.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/abstract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/abstract.py -------------------------------------------------------------------------------- /vllm/attention/backends/blocksparse_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/blocksparse_attn.py -------------------------------------------------------------------------------- /vllm/attention/backends/flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/flash_attn.py -------------------------------------------------------------------------------- /vllm/attention/backends/flashinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/flashinfer.py -------------------------------------------------------------------------------- /vllm/attention/backends/ipex_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/ipex_attn.py -------------------------------------------------------------------------------- /vllm/attention/backends/openvino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/openvino.py -------------------------------------------------------------------------------- /vllm/attention/backends/pallas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/pallas.py -------------------------------------------------------------------------------- /vllm/attention/backends/rocm_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/rocm_flash_attn.py -------------------------------------------------------------------------------- /vllm/attention/backends/torch_sdpa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/torch_sdpa.py -------------------------------------------------------------------------------- /vllm/attention/backends/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/utils.py -------------------------------------------------------------------------------- /vllm/attention/backends/xformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/backends/xformers.py -------------------------------------------------------------------------------- /vllm/attention/layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/layer.py -------------------------------------------------------------------------------- /vllm/attention/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/attention/ops/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/__pycache__/ipex_attn.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/__pycache__/ipex_attn.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/__pycache__/paged_attn.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/__pycache__/paged_attn.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/__pycache__/paged_attn.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/__pycache__/paged_attn.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/blocksparse_attention/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/attention/ops/blocksparse_attention/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/blocksparse_attention/interface.py -------------------------------------------------------------------------------- /vllm/attention/ops/blocksparse_attention/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/blocksparse_attention/utils.py -------------------------------------------------------------------------------- /vllm/attention/ops/ipex_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/ipex_attn.py -------------------------------------------------------------------------------- /vllm/attention/ops/paged_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/paged_attn.py -------------------------------------------------------------------------------- /vllm/attention/ops/prefix_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/prefix_prefill.py -------------------------------------------------------------------------------- /vllm/attention/ops/triton_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/ops/triton_flash_attention.py -------------------------------------------------------------------------------- /vllm/attention/selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/attention/selector.py -------------------------------------------------------------------------------- /vllm/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/block.py -------------------------------------------------------------------------------- /vllm/compilation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/compilation/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/compilation/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/compilation/__pycache__/backends.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/compilation/__pycache__/backends.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/compilation/__pycache__/wrapper.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/compilation/__pycache__/wrapper.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/compilation/backends.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/compilation/backends.py -------------------------------------------------------------------------------- /vllm/compilation/wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/compilation/wrapper.py -------------------------------------------------------------------------------- /vllm/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/config.py -------------------------------------------------------------------------------- /vllm/connections.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/connections.py -------------------------------------------------------------------------------- /vllm/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/core/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/block_manager_v1.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/block_manager_v1.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/block_manager_v1.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/block_manager_v1.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/block_manager_v2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/block_manager_v2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/evictor_v1.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/evictor_v1.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/evictor_v1.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/evictor_v1.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/evictor_v2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/evictor_v2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/interfaces.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/interfaces.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/interfaces.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/interfaces.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/scheduler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/scheduler.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/scheduler.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/__pycache__/scheduler.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/block_table.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/__pycache__/block_table.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/common.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/__pycache__/common.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/common.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/__pycache__/common.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/interfaces.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/__pycache__/interfaces.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/interfaces.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/__pycache__/interfaces.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/naive_block.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/__pycache__/naive_block.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/block_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/block_table.py -------------------------------------------------------------------------------- /vllm/core/block/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/common.py -------------------------------------------------------------------------------- /vllm/core/block/cpu_gpu_block_allocator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/cpu_gpu_block_allocator.py -------------------------------------------------------------------------------- /vllm/core/block/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/interfaces.py -------------------------------------------------------------------------------- /vllm/core/block/naive_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/naive_block.py -------------------------------------------------------------------------------- /vllm/core/block/prefix_caching_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/prefix_caching_block.py -------------------------------------------------------------------------------- /vllm/core/block/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block/utils.py -------------------------------------------------------------------------------- /vllm/core/block_manager_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block_manager_v1.py -------------------------------------------------------------------------------- /vllm/core/block_manager_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/block_manager_v2.py -------------------------------------------------------------------------------- /vllm/core/embedding_model_block_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/embedding_model_block_manager.py -------------------------------------------------------------------------------- /vllm/core/evictor_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/evictor_v1.py -------------------------------------------------------------------------------- /vllm/core/evictor_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/evictor_v2.py -------------------------------------------------------------------------------- /vllm/core/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/interfaces.py -------------------------------------------------------------------------------- /vllm/core/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/core/scheduler.py -------------------------------------------------------------------------------- /vllm/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/__init__.py -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/__pycache__/utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/distributed/communication_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/communication_op.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/cuda_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/device_communicators/cuda_wrapper.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/custom_all_reduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/device_communicators/custom_all_reduce.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/pynccl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/device_communicators/pynccl.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/pynccl_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/device_communicators/pynccl_wrapper.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/shm_broadcast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/device_communicators/shm_broadcast.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/tpu_communicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/device_communicators/tpu_communicator.py -------------------------------------------------------------------------------- /vllm/distributed/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/parallel_state.py -------------------------------------------------------------------------------- /vllm/distributed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/distributed/utils.py -------------------------------------------------------------------------------- /vllm/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/engine/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/arg_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/arg_utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/arg_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/arg_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/arg_utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/arg_utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/async_llm_engine.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/async_llm_engine.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/async_llm_engine.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/async_llm_engine.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/async_timeout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/async_timeout.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/async_timeout.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/async_timeout.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/llm_engine.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/llm_engine.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/llm_engine.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/llm_engine.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/metrics.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/metrics.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/metrics_types.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/metrics_types.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/metrics_types.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/metrics_types.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/protocol.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/__pycache__/protocol.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/arg_utils.py -------------------------------------------------------------------------------- /vllm/engine/async_llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/async_llm_engine.py -------------------------------------------------------------------------------- /vllm/engine/async_timeout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/async_timeout.py -------------------------------------------------------------------------------- /vllm/engine/llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/llm_engine.py -------------------------------------------------------------------------------- /vllm/engine/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/metrics.py -------------------------------------------------------------------------------- /vllm/engine/metrics_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/metrics_types.py -------------------------------------------------------------------------------- /vllm/engine/multiprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/multiprocessing/__init__.py -------------------------------------------------------------------------------- /vllm/engine/multiprocessing/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/multiprocessing/client.py -------------------------------------------------------------------------------- /vllm/engine/multiprocessing/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/multiprocessing/engine.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/engine/output_processor/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/output_processor/interfaces.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/multi_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/output_processor/multi_step.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/single_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/output_processor/single_step.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/stop_checker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/output_processor/stop_checker.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/output_processor/util.py -------------------------------------------------------------------------------- /vllm/engine/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/engine/protocol.py -------------------------------------------------------------------------------- /vllm/entrypoints/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/api_server.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/__pycache__/api_server.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/chat_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/__pycache__/chat_utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/chat_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/__pycache__/chat_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/launcher.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/__pycache__/launcher.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/llm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/__pycache__/llm.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/llm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/__pycache__/llm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/logger.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/__pycache__/logger.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/api_server.py -------------------------------------------------------------------------------- /vllm/entrypoints/chat_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/chat_utils.py -------------------------------------------------------------------------------- /vllm/entrypoints/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/launcher.py -------------------------------------------------------------------------------- /vllm/entrypoints/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/llm.py -------------------------------------------------------------------------------- /vllm/entrypoints/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/logger.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/entrypoints/openai/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/api_server.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/cli_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/cli_args.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/logits_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/logits_processors.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/protocol.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/run_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/run_batch.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/serving_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/serving_chat.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/serving_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/serving_completion.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/serving_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/serving_embedding.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/serving_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/serving_engine.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/serving_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/serving_tokenization.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/tool_parsers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/tool_parsers/__init__.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/tool_parsers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/entrypoints/openai/tool_parsers/utils.py -------------------------------------------------------------------------------- /vllm/envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/envs.py -------------------------------------------------------------------------------- /vllm/examples/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/examples/.gitignore -------------------------------------------------------------------------------- /vllm/executor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/executor/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/cpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/cpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/executor_base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/executor_base.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/executor_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/executor_base.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/gpu_executor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/gpu_executor.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/gpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/gpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/msgspec_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/msgspec_utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/msgspec_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/msgspec_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/neuron_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/neuron_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/ray_gpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/ray_gpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/ray_tpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/ray_tpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/ray_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/ray_utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/ray_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/ray_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/ray_xpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/ray_xpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/tpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/tpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/xpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/__pycache__/xpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/cpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/cpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/distributed_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/distributed_gpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/executor_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/executor_base.py -------------------------------------------------------------------------------- /vllm/executor/gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/gpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/msgspec_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/msgspec_utils.py -------------------------------------------------------------------------------- /vllm/executor/multiproc_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/multiproc_gpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/multiproc_worker_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/multiproc_worker_utils.py -------------------------------------------------------------------------------- /vllm/executor/multiproc_xpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/multiproc_xpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/neuron_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/neuron_executor.py -------------------------------------------------------------------------------- /vllm/executor/openvino_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/openvino_executor.py -------------------------------------------------------------------------------- /vllm/executor/ray_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/ray_gpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/ray_tpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/ray_tpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/ray_utils.py -------------------------------------------------------------------------------- /vllm/executor/ray_xpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/ray_xpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/tpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/tpu_executor.py -------------------------------------------------------------------------------- /vllm/executor/xpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/executor/xpu_executor.py -------------------------------------------------------------------------------- /vllm/inputs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__init__.py -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/data.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/data.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/data.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/data.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/data.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/parse.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/parse.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/parse.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/parse.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/parse.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/parse.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/preprocess.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/preprocess.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/preprocess.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/preprocess.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/registry.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/registry.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/registry.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/registry.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/registry.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/__pycache__/registry.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/inputs/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/data.py -------------------------------------------------------------------------------- /vllm/inputs/parse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/parse.py -------------------------------------------------------------------------------- /vllm/inputs/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/preprocess.py -------------------------------------------------------------------------------- /vllm/inputs/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/inputs/registry.py -------------------------------------------------------------------------------- /vllm/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/logger.py -------------------------------------------------------------------------------- /vllm/logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/logging/__init__.py -------------------------------------------------------------------------------- /vllm/logging/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/logging/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/logging/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/logging/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/logging/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/logging/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/logging/__pycache__/formatter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/logging/__pycache__/formatter.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/logging/__pycache__/formatter.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/logging/__pycache__/formatter.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/logging/__pycache__/formatter.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/logging/__pycache__/formatter.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/logging/formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/logging/formatter.py -------------------------------------------------------------------------------- /vllm/lora/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/lora/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/fully_sharded_layers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/fully_sharded_layers.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/fully_sharded_layers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/fully_sharded_layers.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/layers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/layers.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/layers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/layers.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/lora.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/lora.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/lora.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/lora.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/models.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/models.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/models.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/models.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/punica.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/punica.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/punica.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/punica.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/request.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/request.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/request.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/request.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/request.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/request.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/worker_manager.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/worker_manager.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/worker_manager.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/__pycache__/worker_manager.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/fully_sharded_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/fully_sharded_layers.py -------------------------------------------------------------------------------- /vllm/lora/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/layers.py -------------------------------------------------------------------------------- /vllm/lora/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/lora.py -------------------------------------------------------------------------------- /vllm/lora/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/models.py -------------------------------------------------------------------------------- /vllm/lora/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/bgmv_expand.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/bgmv_expand.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/bgmv_expand.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/bgmv_expand.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/bgmv_shrink.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/bgmv_shrink.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/bgmv_shrink.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/bgmv_shrink.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/sgmv_expand.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/sgmv_expand.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/sgmv_expand.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/sgmv_expand.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/sgmv_shrink.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/sgmv_shrink.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/sgmv_shrink.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/sgmv_shrink.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/bgmv_expand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/bgmv_expand.py -------------------------------------------------------------------------------- /vllm/lora/ops/bgmv_expand_slice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/bgmv_expand_slice.py -------------------------------------------------------------------------------- /vllm/lora/ops/bgmv_shrink.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/bgmv_shrink.py -------------------------------------------------------------------------------- /vllm/lora/ops/sgmv_expand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/sgmv_expand.py -------------------------------------------------------------------------------- /vllm/lora/ops/sgmv_expand_slice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/sgmv_expand_slice.py -------------------------------------------------------------------------------- /vllm/lora/ops/sgmv_shrink.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/sgmv_shrink.py -------------------------------------------------------------------------------- /vllm/lora/ops/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/ops/utils.py -------------------------------------------------------------------------------- /vllm/lora/punica.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/punica.py -------------------------------------------------------------------------------- /vllm/lora/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/request.py -------------------------------------------------------------------------------- /vllm/lora/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/utils.py -------------------------------------------------------------------------------- /vllm/lora/worker_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/lora/worker_manager.py -------------------------------------------------------------------------------- /vllm/model_executor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/__pycache__/utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/custom_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/custom_op.py -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/guided_decoding/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/guided_fields.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/guided_decoding/guided_fields.py -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/outlines_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/guided_decoding/outlines_decoding.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/activation.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/fused_moe/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/fused_marlin_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/fused_moe/fused_moe.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/fused_moe/layer.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/moe_pallas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/fused_moe/moe_pallas.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/layernorm.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/linear.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/logits_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/logits_processor.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/ops/causal_conv1d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/mamba/ops/causal_conv1d.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/ops/mamba_ssm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/mamba/ops/mamba_ssm.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/pooler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/pooler.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/aqlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/aqlm.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/awq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/awq.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/awq_marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/awq_marlin.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/awq_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/awq_triton.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/base_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/base_config.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/bitsandbytes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/bitsandbytes.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/deepspeedfp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/deepspeedfp.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/experts_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/experts_int8.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/fbgemm_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/fbgemm_fp8.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/fp8.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/gguf.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/gptq.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/gptq_marlin.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/kv_cache.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/marlin.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/mixq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/mixq.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/mixq4bit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/mixq4bit.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/modelopt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/modelopt.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/neuron_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/neuron_quant.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/qqq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/qqq.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/schema.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/tpu_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/quantization/tpu_int8.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/rejection_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/rejection_sampler.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/resampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/resampler.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/rotary_embedding.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/sampler.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/spec_decode_base_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/spec_decode_base_sampler.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/typical_acceptance_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/typical_acceptance_sampler.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/vocab_parallel_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/layers/vocab_parallel_embedding.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/model_loader/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/model_loader/loader.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/neuron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/model_loader/neuron.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/openvino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/model_loader/openvino.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/tensorizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/model_loader/tensorizer.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/model_loader/utils.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/weight_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/model_loader/weight_utils.py -------------------------------------------------------------------------------- /vllm/model_executor/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/models/arctic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/arctic.py -------------------------------------------------------------------------------- /vllm/model_executor/models/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/baichuan.py -------------------------------------------------------------------------------- /vllm/model_executor/models/bart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/bart.py -------------------------------------------------------------------------------- /vllm/model_executor/models/blip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/blip.py -------------------------------------------------------------------------------- /vllm/model_executor/models/blip2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/blip2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/bloom.py -------------------------------------------------------------------------------- /vllm/model_executor/models/chameleon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/chameleon.py -------------------------------------------------------------------------------- /vllm/model_executor/models/chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/chatglm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/clip.py -------------------------------------------------------------------------------- /vllm/model_executor/models/commandr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/commandr.py -------------------------------------------------------------------------------- /vllm/model_executor/models/dbrx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/dbrx.py -------------------------------------------------------------------------------- /vllm/model_executor/models/decilm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/decilm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/deepseek.py -------------------------------------------------------------------------------- /vllm/model_executor/models/deepseek_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/deepseek_v2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/eagle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/eagle.py -------------------------------------------------------------------------------- /vllm/model_executor/models/exaone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/exaone.py -------------------------------------------------------------------------------- /vllm/model_executor/models/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/falcon.py -------------------------------------------------------------------------------- /vllm/model_executor/models/fuyu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/fuyu.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/gemma.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gemma2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/gemma2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/gpt2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_bigcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/gpt_bigcode.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_j.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/gpt_j.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/gpt_neox.py -------------------------------------------------------------------------------- /vllm/model_executor/models/granite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/granite.py -------------------------------------------------------------------------------- /vllm/model_executor/models/idefics2_vision_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/idefics2_vision_model.py -------------------------------------------------------------------------------- /vllm/model_executor/models/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/interfaces.py -------------------------------------------------------------------------------- /vllm/model_executor/models/intern_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/intern_vit.py -------------------------------------------------------------------------------- /vllm/model_executor/models/internlm2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/internlm2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/internvl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/internvl.py -------------------------------------------------------------------------------- /vllm/model_executor/models/jais.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/jais.py -------------------------------------------------------------------------------- /vllm/model_executor/models/jamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/jamba.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/llama.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llama_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/llama_embedding.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/llava.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llava_next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/llava_next.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llava_next_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/llava_next_video.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llava_onevision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/llava_onevision.py -------------------------------------------------------------------------------- /vllm/model_executor/models/medusa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/medusa.py -------------------------------------------------------------------------------- /vllm/model_executor/models/minicpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/minicpm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/minicpm3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/minicpm3.py -------------------------------------------------------------------------------- /vllm/model_executor/models/minicpmv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/minicpmv.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/mixtral.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mixtral_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/mixtral_quant.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mllama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/mllama.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mlp_speculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/mlp_speculator.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/mpt.py -------------------------------------------------------------------------------- /vllm/model_executor/models/na_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/na_vit.py -------------------------------------------------------------------------------- /vllm/model_executor/models/nemotron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/nemotron.py -------------------------------------------------------------------------------- /vllm/model_executor/models/olmo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/olmo.py -------------------------------------------------------------------------------- /vllm/model_executor/models/olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/olmoe.py -------------------------------------------------------------------------------- /vllm/model_executor/models/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/opt.py -------------------------------------------------------------------------------- /vllm/model_executor/models/orion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/orion.py -------------------------------------------------------------------------------- /vllm/model_executor/models/paligemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/paligemma.py -------------------------------------------------------------------------------- /vllm/model_executor/models/persimmon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/persimmon.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/phi.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phi3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/phi3.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phi3_small.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/phi3_small.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phi3v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/phi3v.py -------------------------------------------------------------------------------- /vllm/model_executor/models/phimoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/phimoe.py -------------------------------------------------------------------------------- /vllm/model_executor/models/pixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/pixtral.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/qwen.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/qwen2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/qwen2_moe.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/qwen2_vl.py -------------------------------------------------------------------------------- /vllm/model_executor/models/siglip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/siglip.py -------------------------------------------------------------------------------- /vllm/model_executor/models/solar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/solar.py -------------------------------------------------------------------------------- /vllm/model_executor/models/stablelm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/stablelm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/starcoder2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/starcoder2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/ultravox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/ultravox.py -------------------------------------------------------------------------------- /vllm/model_executor/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/utils.py -------------------------------------------------------------------------------- /vllm/model_executor/models/xverse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/models/xverse.py -------------------------------------------------------------------------------- /vllm/model_executor/parameter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/parameter.py -------------------------------------------------------------------------------- /vllm/model_executor/pooling_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/pooling_metadata.py -------------------------------------------------------------------------------- /vllm/model_executor/sampling_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/sampling_metadata.py -------------------------------------------------------------------------------- /vllm/model_executor/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/model_executor/utils.py -------------------------------------------------------------------------------- /vllm/multimodal/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__init__.py -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/audio.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/audio.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/audio.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/audio.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/base.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/image.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/image.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/image.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/image.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/registry.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/registry.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/registry.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/registry.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/video.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/video.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/video.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/__pycache__/video.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/audio.py -------------------------------------------------------------------------------- /vllm/multimodal/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/base.py -------------------------------------------------------------------------------- /vllm/multimodal/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/image.py -------------------------------------------------------------------------------- /vllm/multimodal/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/registry.py -------------------------------------------------------------------------------- /vllm/multimodal/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/utils.py -------------------------------------------------------------------------------- /vllm/multimodal/video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/multimodal/video.py -------------------------------------------------------------------------------- /vllm/outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/outputs.py -------------------------------------------------------------------------------- /vllm/platforms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__init__.py -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/cpu.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/cpu.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/cuda.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/cuda.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/cuda.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/cuda.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/interface.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/interface.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/interface.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/interface.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/interface.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/interface.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/rocm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/rocm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/tpu.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/__pycache__/tpu.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/cpu.py -------------------------------------------------------------------------------- /vllm/platforms/cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/cuda.py -------------------------------------------------------------------------------- /vllm/platforms/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/interface.py -------------------------------------------------------------------------------- /vllm/platforms/rocm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/rocm.py -------------------------------------------------------------------------------- /vllm/platforms/tpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/platforms/tpu.py -------------------------------------------------------------------------------- /vllm/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/plugins/__init__.py -------------------------------------------------------------------------------- /vllm/plugins/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/plugins/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/plugins/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/plugins/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/pooling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/pooling_params.py -------------------------------------------------------------------------------- /vllm/production_monitoring/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/production_monitoring/README.md -------------------------------------------------------------------------------- /vllm/production_monitoring/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/production_monitoring/docker-compose.yaml -------------------------------------------------------------------------------- /vllm/production_monitoring/grafana.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/production_monitoring/grafana.json -------------------------------------------------------------------------------- /vllm/production_monitoring/prometheus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/production_monitoring/prometheus.yaml -------------------------------------------------------------------------------- /vllm/prompt_adapter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/layers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/layers.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/layers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/layers.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/models.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/models.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/models.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/models.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/request.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/request.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/request.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/request.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/request.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/request.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/layers.py -------------------------------------------------------------------------------- /vllm/prompt_adapter/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/models.py -------------------------------------------------------------------------------- /vllm/prompt_adapter/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/request.py -------------------------------------------------------------------------------- /vllm/prompt_adapter/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/utils.py -------------------------------------------------------------------------------- /vllm/prompt_adapter/worker_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/prompt_adapter/worker_manager.py -------------------------------------------------------------------------------- /vllm/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561. 2 | # The vllm package uses inline types. 3 | -------------------------------------------------------------------------------- /vllm/sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/sampling_params.py -------------------------------------------------------------------------------- /vllm/scalar_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/scalar_type.py -------------------------------------------------------------------------------- /vllm/scripts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/scripts.py -------------------------------------------------------------------------------- /vllm/sequence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/sequence.py -------------------------------------------------------------------------------- /vllm/spec_decode/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/interfaces.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/__pycache__/interfaces.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/metrics.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/__pycache__/metrics.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/metrics.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/__pycache__/metrics.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/metrics.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/__pycache__/metrics.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/util.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/__pycache__/util.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/batch_expansion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/batch_expansion.py -------------------------------------------------------------------------------- /vllm/spec_decode/draft_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/draft_model_runner.py -------------------------------------------------------------------------------- /vllm/spec_decode/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/interfaces.py -------------------------------------------------------------------------------- /vllm/spec_decode/medusa_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/medusa_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/metrics.py -------------------------------------------------------------------------------- /vllm/spec_decode/mlp_speculator_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/mlp_speculator_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/multi_step_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/multi_step_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/ngram_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/ngram_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/proposer_worker_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/proposer_worker_base.py -------------------------------------------------------------------------------- /vllm/spec_decode/smaller_tp_proposer_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/smaller_tp_proposer_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/spec_decode_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/spec_decode_worker.py -------------------------------------------------------------------------------- /vllm/spec_decode/target_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/target_model_runner.py -------------------------------------------------------------------------------- /vllm/spec_decode/top1_proposer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/top1_proposer.py -------------------------------------------------------------------------------- /vllm/spec_decode/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/spec_decode/util.py -------------------------------------------------------------------------------- /vllm/tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/tracing.py -------------------------------------------------------------------------------- /vllm/transformers_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/transformers_utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/config.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/__init__.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/arctic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/arctic.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/chatglm.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/dbrx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/dbrx.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/eagle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/eagle.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/exaone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/exaone.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/falcon.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/granite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/granite.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/internvl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/internvl.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/jais.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/jais.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/medusa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/medusa.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/mllama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/mllama.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/mlp_speculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/mlp_speculator.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/mpt.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/nemotron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/nemotron.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/solar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/solar.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/ultravox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/configs/ultravox.py -------------------------------------------------------------------------------- /vllm/transformers_utils/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/detokenizer.py -------------------------------------------------------------------------------- /vllm/transformers_utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/processor.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/tokenizer.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer_group/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/tokenizer_group/__init__.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/tokenizers/__init__.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizers/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/tokenizers/baichuan.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizers/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/tokenizers/mistral.py -------------------------------------------------------------------------------- /vllm/transformers_utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/transformers_utils/utils.py -------------------------------------------------------------------------------- /vllm/triton_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/__init__.py -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/importing.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/__pycache__/importing.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/importing.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/__pycache__/importing.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/importing.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/__pycache__/importing.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/libentry.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/__pycache__/libentry.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/libentry.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/__pycache__/libentry.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/libentry.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/__pycache__/libentry.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/custom_cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/custom_cache_manager.py -------------------------------------------------------------------------------- /vllm/triton_utils/importing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/importing.py -------------------------------------------------------------------------------- /vllm/triton_utils/libentry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/triton_utils/libentry.py -------------------------------------------------------------------------------- /vllm/usage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/usage/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/usage/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/usage/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/usage/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/usage/__pycache__/usage_lib.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/usage/__pycache__/usage_lib.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/usage/__pycache__/usage_lib.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/usage/__pycache__/usage_lib.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/usage/usage_lib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/usage/usage_lib.py -------------------------------------------------------------------------------- /vllm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/utils.py -------------------------------------------------------------------------------- /vllm/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/version.py -------------------------------------------------------------------------------- /vllm/worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/worker/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/cache_engine.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/cache_engine.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/cache_engine.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/cache_engine.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/cpu_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/cpu_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/cpu_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/cpu_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/model_runner.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/model_runner.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/neuron_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/neuron_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/openvino_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/openvino_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/tpu_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/tpu_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/tpu_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/tpu_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/worker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/worker.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/worker_base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/worker_base.cpython-310.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/worker_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/worker_base.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/xpu_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/xpu_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/xpu_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/__pycache__/xpu_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/cache_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/cache_engine.py -------------------------------------------------------------------------------- /vllm/worker/cpu_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/cpu_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/cpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/cpu_worker.py -------------------------------------------------------------------------------- /vllm/worker/embedding_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/embedding_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/enc_dec_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/enc_dec_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/model_runner.py -------------------------------------------------------------------------------- /vllm/worker/model_runner_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/model_runner_base.py -------------------------------------------------------------------------------- /vllm/worker/multi_step_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/multi_step_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/multi_step_tpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/multi_step_tpu_worker.py -------------------------------------------------------------------------------- /vllm/worker/multi_step_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/multi_step_worker.py -------------------------------------------------------------------------------- /vllm/worker/neuron_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/neuron_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/neuron_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/neuron_worker.py -------------------------------------------------------------------------------- /vllm/worker/openvino_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/openvino_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/openvino_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/openvino_worker.py -------------------------------------------------------------------------------- /vllm/worker/tpu_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/tpu_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/tpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/tpu_worker.py -------------------------------------------------------------------------------- /vllm/worker/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/utils.py -------------------------------------------------------------------------------- /vllm/worker/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/worker.py -------------------------------------------------------------------------------- /vllm/worker/worker_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/worker_base.py -------------------------------------------------------------------------------- /vllm/worker/xpu_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/xpu_model_runner.py -------------------------------------------------------------------------------- /vllm/worker/xpu_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/HEAD/vllm/worker/xpu_worker.py --------------------------------------------------------------------------------