├── .gitignore ├── README.MD ├── download_mmlu.sh ├── examples ├── .gitignore ├── __pycache__ │ ├── prompt_utils.cpython-310.pyc │ └── prompt_utils.cpython-311.pyc ├── api_client.py ├── aqlm_example.py ├── benchAcc │ ├── evalppl.py │ ├── output │ │ └── ppl_batchsize512_fp16_Llama-2-7b.csv1 │ ├── runfloat.sh │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── __init__.cpython-311.pyc │ │ ├── eval_tasks │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── _utils │ │ │ ├── __init__.py │ │ │ ├── classification_utils.py │ │ │ └── generation_utils.py │ │ ├── language_modeling_task.py │ │ ├── sequence_classification_task.py │ │ └── text_summarization_task.py │ │ ├── modeling │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── _base.cpython-310.pyc │ │ │ ├── _base.cpython-311.pyc │ │ │ ├── _const.cpython-310.pyc │ │ │ ├── _const.cpython-311.pyc │ │ │ ├── _utils.cpython-310.pyc │ │ │ └── _utils.cpython-311.pyc │ │ ├── _base.py │ │ ├── _const.py │ │ ├── _utils.py │ │ ├── auto.py │ │ ├── baichuan.py │ │ ├── bloom.py │ │ ├── codegen.py │ │ ├── gpt2.py │ │ ├── gpt_bigcode.py │ │ ├── gpt_neox.py │ │ ├── gptj.py │ │ ├── internlm.py │ │ ├── llama.py │ │ ├── moss.py │ │ ├── opt.py │ │ ├── qwen.py │ │ └── rw.py │ │ ├── nn_modules │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── _fused_base.cpython-310.pyc │ │ │ └── _fused_base.cpython-311.pyc │ │ ├── _fused_base.py │ │ ├── fused_gptj_attn.py │ │ ├── fused_llama_attn.py │ │ ├── fused_llama_mlp.py │ │ ├── qlinear │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── __init__.cpython-311.pyc │ │ │ ├── qlinear_cuda.py │ │ │ ├── qlinear_cuda_old.py │ │ │ ├── qlinear_exllama.py │ │ │ ├── qlinear_exllamav2.py │ │ │ ├── qlinear_qigen.py │ │ │ └── qlinear_triton.py │ │ └── triton_utils │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── mixin.cpython-310.pyc │ │ │ └── mixin.cpython-311.pyc │ │ │ ├── custom_autotune.py │ │ │ ├── kernels.py │ │ │ └── mixin.py │ │ ├── quantization │ │ ├── ACKNOWLEDGEMENT.md │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── gptq.cpython-310.pyc │ │ │ ├── gptq.cpython-311.pyc │ │ │ ├── quantizer.cpython-310.pyc │ │ │ └── quantizer.cpython-311.pyc │ │ ├── gptq.py │ │ └── quantizer.py │ │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── data_utils.cpython-310.pyc │ │ ├── data_utils.cpython-311.pyc │ │ ├── exllama_utils.cpython-310.pyc │ │ ├── exllama_utils.cpython-311.pyc │ │ ├── import_utils.cpython-310.pyc │ │ ├── import_utils.cpython-311.pyc │ │ ├── peft_utils.cpython-310.pyc │ │ ├── peft_utils.cpython-311.pyc │ │ ├── perplexity_utils.cpython-310.pyc │ │ └── perplexity_utils.cpython-311.pyc │ │ ├── data_utils.py │ │ ├── exllama_utils.py │ │ ├── import_utils.py │ │ ├── peft_utils.py │ │ └── perplexity_utils.py ├── download_mmlu.sh ├── fp8 │ ├── README.md │ ├── extract_scales.py │ └── quantizer │ │ ├── README.md │ │ └── quantize.py ├── gradio_openai_chatbot_webserver.py ├── gradio_webserver.py ├── gradio_webui.py ├── input.pt ├── lenovo.jpg ├── lenovo.py ├── llava_example.py ├── llm_engine_example.py ├── logging_configuration.md ├── mmlu.py ├── multilora_inference.py ├── offline_inference.py ├── offline_inference_distributed.py ├── offline_inference_neuron.py ├── offline_inference_with_prefix.py ├── openai_chat_completion_client.py ├── openai_completion_client.py ├── production_monitoring │ ├── README.md │ ├── docker-compose.yaml │ ├── grafana.json │ └── prometheus.yaml ├── prompt_utils.py ├── server.py ├── tensorize_vllm_model.py ├── test.py ├── test4bit.py ├── test4bitchatglm.py ├── test8bit.py ├── test8bitLongSeqLlama3.py ├── test8bitchatglm.py ├── test8bitqwen2.py └── testawq.py ├── figures ├── awq32.gif ├── awq512.gif ├── mixq32.gif ├── mixq512.gif ├── output.png └── textmixq.jpg ├── gradio_openai_chatbot_webserver.py ├── gradio_webserver.py ├── mmlu.py ├── out.txt ├── out2.txt ├── test4bit.py ├── test4bitchatglm.py ├── test8bit.py ├── test8bitLongSeqLlama3.py ├── test8bitchatglm.py ├── test8bitqwen2.py ├── testawq.py ├── testmmlu.sh └── vllm ├── .gitignore ├── __init__.py ├── __pycache__ ├── __init__.cpython-311.pyc ├── __init__.cpython-312.pyc ├── _core_ext.cpython-311.pyc ├── _core_ext.cpython-312.pyc ├── _custom_ops.cpython-311.pyc ├── _custom_ops.cpython-312.pyc ├── _ipex_ops.cpython-311.pyc ├── _version.cpython-311.pyc ├── block.cpython-311.pyc ├── config.cpython-311.pyc ├── config.cpython-312.pyc ├── connections.cpython-311.pyc ├── envs.cpython-311.pyc ├── envs.cpython-312.pyc ├── logger.cpython-311.pyc ├── logger.cpython-312.pyc ├── outputs.cpython-311.pyc ├── pooling_params.cpython-311.pyc ├── pooling_params.cpython-312.pyc ├── sampling_params.cpython-311.pyc ├── sampling_params.cpython-312.pyc ├── scalar_type.cpython-311.pyc ├── scalar_type.cpython-312.pyc ├── scripts.cpython-311.pyc ├── sequence.cpython-311.pyc ├── sequence.cpython-312.pyc ├── tracing.cpython-311.pyc ├── utils.cpython-311.pyc ├── utils.cpython-312.pyc └── version.cpython-311.pyc ├── _core_ext.py ├── _custom_ops.py ├── _ipex_ops.py ├── _version.py ├── adapter_commons ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── layers.cpython-311.pyc │ ├── models.cpython-311.pyc │ ├── request.cpython-311.pyc │ ├── request.cpython-312.pyc │ ├── utils.cpython-311.pyc │ └── worker_manager.cpython-311.pyc ├── layers.py ├── models.py ├── request.py ├── utils.py └── worker_manager.py ├── assets ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── audio.cpython-311.pyc │ ├── base.cpython-311.pyc │ ├── image.cpython-311.pyc │ └── video.cpython-311.pyc ├── audio.py ├── base.py ├── image.py └── video.py ├── attention ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── layer.cpython-311.pyc │ └── selector.cpython-311.pyc ├── backends │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── abstract.cpython-311.pyc │ │ ├── blocksparse_attn.cpython-311.pyc │ │ ├── flash_attn.cpython-311.pyc │ │ ├── flashinfer.cpython-311.pyc │ │ ├── ipex_attn.cpython-311.pyc │ │ ├── openvino.cpython-311.pyc │ │ ├── pallas.cpython-311.pyc │ │ ├── rocm_flash_attn.cpython-311.pyc │ │ ├── torch_sdpa.cpython-311.pyc │ │ ├── utils.cpython-311.pyc │ │ └── xformers.cpython-311.pyc │ ├── abstract.py │ ├── blocksparse_attn.py │ ├── flash_attn.py │ ├── flashinfer.py │ ├── ipex_attn.py │ ├── openvino.py │ ├── pallas.py │ ├── rocm_flash_attn.py │ ├── torch_sdpa.py │ ├── utils.py │ └── xformers.py ├── layer.py ├── ops │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── ipex_attn.cpython-311.pyc │ │ ├── paged_attn.cpython-311.pyc │ │ ├── prefix_prefill.cpython-311.pyc │ │ └── triton_flash_attention.cpython-311.pyc │ ├── blocksparse_attention │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── blocksparse_attention_kernel.cpython-311.pyc │ │ │ ├── interface.cpython-311.pyc │ │ │ └── utils.cpython-311.pyc │ │ ├── blocksparse_attention_kernel.py │ │ ├── interface.py │ │ └── utils.py │ ├── ipex_attn.py │ ├── paged_attn.py │ ├── prefix_prefill.py │ └── triton_flash_attention.py └── selector.py ├── block.py ├── compilation ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── backends.cpython-311.pyc │ └── wrapper.cpython-311.pyc ├── backends.py └── wrapper.py ├── config.py ├── connections.py ├── core ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── block_manager_v1.cpython-311.pyc │ ├── block_manager_v2.cpython-311.pyc │ ├── embedding_model_block_manager.cpython-311.pyc │ ├── evictor_v1.cpython-311.pyc │ ├── evictor_v2.cpython-311.pyc │ ├── interfaces.cpython-311.pyc │ └── scheduler.cpython-311.pyc ├── block │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── block_table.cpython-311.pyc │ │ ├── common.cpython-311.pyc │ │ ├── cpu_gpu_block_allocator.cpython-311.pyc │ │ ├── interfaces.cpython-311.pyc │ │ ├── naive_block.cpython-311.pyc │ │ ├── prefix_caching_block.cpython-311.pyc │ │ └── utils.cpython-311.pyc │ ├── block_table.py │ ├── common.py │ ├── cpu_gpu_block_allocator.py │ ├── interfaces.py │ ├── naive_block.py │ ├── prefix_caching_block.py │ └── utils.py ├── block_manager_v1.py ├── block_manager_v2.py ├── embedding_model_block_manager.py ├── evictor_v1.py ├── evictor_v2.py ├── interfaces.py └── scheduler.py ├── distributed ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── communication_op.cpython-311.pyc │ ├── communication_op.cpython-312.pyc │ ├── parallel_state.cpython-311.pyc │ ├── parallel_state.cpython-312.pyc │ ├── utils.cpython-311.pyc │ └── utils.cpython-312.pyc ├── communication_op.py ├── device_communicators │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── cuda_wrapper.cpython-311.pyc │ │ ├── custom_all_reduce.cpython-311.pyc │ │ ├── custom_all_reduce_utils.cpython-311.pyc │ │ ├── pynccl.cpython-311.pyc │ │ ├── pynccl_wrapper.cpython-311.pyc │ │ ├── shm_broadcast.cpython-311.pyc │ │ └── tpu_communicator.cpython-311.pyc │ ├── cuda_wrapper.py │ ├── custom_all_reduce.py │ ├── custom_all_reduce_utils.py │ ├── pynccl.py │ ├── pynccl_wrapper.py │ ├── shm_broadcast.py │ └── tpu_communicator.py ├── parallel_state.py └── utils.py ├── engine ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── arg_utils.cpython-311.pyc │ ├── arg_utils.cpython-312.pyc │ ├── async_llm_engine.cpython-311.pyc │ ├── async_timeout.cpython-311.pyc │ ├── llm_engine.cpython-311.pyc │ ├── metrics.cpython-311.pyc │ ├── metrics_types.cpython-311.pyc │ └── protocol.cpython-311.pyc ├── arg_utils.py ├── async_llm_engine.py ├── async_timeout.py ├── llm_engine.py ├── metrics.py ├── metrics_types.py ├── multiprocessing │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── client.cpython-311.pyc │ │ └── engine.cpython-311.pyc │ ├── client.py │ └── engine.py ├── output_processor │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── interfaces.cpython-311.pyc │ │ ├── multi_step.cpython-311.pyc │ │ ├── single_step.cpython-311.pyc │ │ ├── stop_checker.cpython-311.pyc │ │ └── util.cpython-311.pyc │ ├── interfaces.py │ ├── multi_step.py │ ├── single_step.py │ ├── stop_checker.py │ └── util.py └── protocol.py ├── entrypoints ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── api_server.cpython-311.pyc │ ├── chat_utils.cpython-311.pyc │ ├── launcher.cpython-311.pyc │ ├── llm.cpython-311.pyc │ └── logger.cpython-311.pyc ├── api_server.py ├── chat_utils.py ├── launcher.py ├── llm.py ├── logger.py └── openai │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── api_server.cpython-311.pyc │ ├── cli_args.cpython-311.pyc │ ├── logits_processors.cpython-311.pyc │ ├── protocol.cpython-311.pyc │ ├── run_batch.cpython-311.pyc │ ├── serving_chat.cpython-311.pyc │ ├── serving_completion.cpython-311.pyc │ ├── serving_embedding.cpython-311.pyc │ ├── serving_engine.cpython-311.pyc │ └── serving_tokenization.cpython-311.pyc │ ├── api_server.py │ ├── cli_args.py │ ├── logits_processors.py │ ├── protocol.py │ ├── run_batch.py │ ├── serving_chat.py │ ├── serving_completion.py │ ├── serving_embedding.py │ ├── serving_engine.py │ ├── serving_tokenization.py │ └── tool_parsers │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── abstract_tool_parser.cpython-311.pyc │ ├── hermes_tool_parser.cpython-311.pyc │ ├── mistral_tool_parser.cpython-311.pyc │ └── utils.cpython-311.pyc │ ├── abstract_tool_parser.py │ ├── hermes_tool_parser.py │ ├── mistral_tool_parser.py │ └── utils.py ├── envs.py ├── examples └── .gitignore ├── executor ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── cpu_executor.cpython-311.pyc │ ├── distributed_gpu_executor.cpython-311.pyc │ ├── executor_base.cpython-311.pyc │ ├── gpu_executor.cpython-311.pyc │ ├── msgspec_utils.cpython-311.pyc │ ├── multiproc_gpu_executor.cpython-311.pyc │ ├── multiproc_worker_utils.cpython-311.pyc │ ├── multiproc_xpu_executor.cpython-311.pyc │ ├── neuron_executor.cpython-311.pyc │ ├── openvino_executor.cpython-311.pyc │ ├── ray_gpu_executor.cpython-311.pyc │ ├── ray_tpu_executor.cpython-311.pyc │ ├── ray_utils.cpython-311.pyc │ ├── ray_xpu_executor.cpython-311.pyc │ ├── tpu_executor.cpython-311.pyc │ └── xpu_executor.cpython-311.pyc ├── cpu_executor.py ├── distributed_gpu_executor.py ├── executor_base.py ├── gpu_executor.py ├── msgspec_utils.py ├── multiproc_gpu_executor.py ├── multiproc_worker_utils.py ├── multiproc_xpu_executor.py ├── neuron_executor.py ├── openvino_executor.py ├── ray_gpu_executor.py ├── ray_tpu_executor.py ├── ray_utils.py ├── ray_xpu_executor.py ├── tpu_executor.py └── xpu_executor.py ├── inputs ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── data.cpython-311.pyc │ ├── data.cpython-312.pyc │ ├── parse.cpython-311.pyc │ ├── parse.cpython-312.pyc │ ├── preprocess.cpython-311.pyc │ ├── registry.cpython-311.pyc │ └── registry.cpython-312.pyc ├── data.py ├── parse.py ├── preprocess.py └── registry.py ├── logger.py ├── logging ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── formatter.cpython-311.pyc │ └── formatter.cpython-312.pyc └── formatter.py ├── lora ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── fully_sharded_layers.cpython-311.pyc │ ├── layers.cpython-311.pyc │ ├── lora.cpython-311.pyc │ ├── models.cpython-311.pyc │ ├── punica.cpython-311.pyc │ ├── request.cpython-311.pyc │ ├── request.cpython-312.pyc │ ├── utils.cpython-311.pyc │ └── worker_manager.cpython-311.pyc ├── fully_sharded_layers.py ├── layers.py ├── lora.py ├── models.py ├── ops │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── bgmv_expand.cpython-311.pyc │ │ ├── bgmv_expand_slice.cpython-311.pyc │ │ ├── bgmv_shrink.cpython-311.pyc │ │ ├── sgmv_expand.cpython-311.pyc │ │ ├── sgmv_expand_slice.cpython-311.pyc │ │ ├── sgmv_shrink.cpython-311.pyc │ │ └── utils.cpython-311.pyc │ ├── bgmv_expand.py │ ├── bgmv_expand_slice.py │ ├── bgmv_shrink.py │ ├── sgmv_expand.py │ ├── sgmv_expand_slice.py │ ├── sgmv_shrink.py │ └── utils.py ├── punica.py ├── request.py ├── utils.py └── worker_manager.py ├── model_executor ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── custom_op.cpython-311.pyc │ ├── custom_op.cpython-312.pyc │ ├── parameter.cpython-311.pyc │ ├── parameter.cpython-312.pyc │ ├── pooling_metadata.cpython-311.pyc │ ├── sampling_metadata.cpython-311.pyc │ ├── sampling_metadata.cpython-312.pyc │ ├── utils.cpython-311.pyc │ └── utils.cpython-312.pyc ├── custom_op.py ├── guided_decoding │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── guided_fields.cpython-311.pyc │ │ ├── lm_format_enforcer_decoding.cpython-311.pyc │ │ ├── outlines_decoding.cpython-311.pyc │ │ └── outlines_logits_processors.cpython-311.pyc │ ├── guided_fields.py │ ├── lm_format_enforcer_decoding.py │ ├── outlines_decoding.py │ └── outlines_logits_processors.py ├── layers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── __init__.cpython-312.pyc │ │ ├── activation.cpython-311.pyc │ │ ├── layernorm.cpython-311.pyc │ │ ├── linear.cpython-311.pyc │ │ ├── linear.cpython-312.pyc │ │ ├── logits_processor.cpython-311.pyc │ │ ├── pooler.cpython-311.pyc │ │ ├── rejection_sampler.cpython-311.pyc │ │ ├── resampler.cpython-311.pyc │ │ ├── rotary_embedding.cpython-311.pyc │ │ ├── sampler.cpython-311.pyc │ │ ├── spec_decode_base_sampler.cpython-311.pyc │ │ ├── spec_decode_base_sampler.cpython-312.pyc │ │ ├── typical_acceptance_sampler.cpython-311.pyc │ │ ├── vocab_parallel_embedding.cpython-311.pyc │ │ └── vocab_parallel_embedding.cpython-312.pyc │ ├── activation.py │ ├── fused_moe │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── __init__.cpython-312.pyc │ │ │ ├── fused_marlin_moe.cpython-311.pyc │ │ │ ├── fused_marlin_moe.cpython-312.pyc │ │ │ ├── fused_moe.cpython-311.pyc │ │ │ ├── fused_moe.cpython-312.pyc │ │ │ ├── layer.cpython-311.pyc │ │ │ ├── layer.cpython-312.pyc │ │ │ └── moe_pallas.cpython-311.pyc │ │ ├── configs │ │ │ ├── E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ ├── E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ ├── E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ ├── E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ ├── E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=14336,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=1792,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=3584,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ ├── E=8,N=7168,device_name=AMD_Instinct_MI300X.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ └── E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ ├── fused_marlin_moe.py │ │ ├── fused_moe.py │ │ ├── layer.py │ │ └── moe_pallas.py │ ├── layernorm.py │ ├── linear.py │ ├── logits_processor.py │ ├── mamba │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-311.pyc │ │ └── ops │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── causal_conv1d.cpython-311.pyc │ │ │ └── mamba_ssm.cpython-311.pyc │ │ │ ├── causal_conv1d.py │ │ │ └── mamba_ssm.py │ ├── pooler.py │ ├── quantization │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── __init__.cpython-312.pyc │ │ │ ├── aqlm.cpython-311.pyc │ │ │ ├── aqlm.cpython-312.pyc │ │ │ ├── awq.cpython-311.pyc │ │ │ ├── awq.cpython-312.pyc │ │ │ ├── awq_marlin.cpython-311.pyc │ │ │ ├── awq_marlin.cpython-312.pyc │ │ │ ├── awq_triton.cpython-311.pyc │ │ │ ├── base_config.cpython-311.pyc │ │ │ ├── base_config.cpython-312.pyc │ │ │ ├── bitsandbytes.cpython-311.pyc │ │ │ ├── bitsandbytes.cpython-312.pyc │ │ │ ├── deepspeedfp.cpython-311.pyc │ │ │ ├── deepspeedfp.cpython-312.pyc │ │ │ ├── experts_int8.cpython-311.pyc │ │ │ ├── experts_int8.cpython-312.pyc │ │ │ ├── fbgemm_fp8.cpython-311.pyc │ │ │ ├── fbgemm_fp8.cpython-312.pyc │ │ │ ├── fp8.cpython-311.pyc │ │ │ ├── fp8.cpython-312.pyc │ │ │ ├── gguf.cpython-311.pyc │ │ │ ├── gguf.cpython-312.pyc │ │ │ ├── gptq.cpython-311.pyc │ │ │ ├── gptq.cpython-312.pyc │ │ │ ├── gptq_marlin.cpython-311.pyc │ │ │ ├── gptq_marlin.cpython-312.pyc │ │ │ ├── gptq_marlin_24.cpython-311.pyc │ │ │ ├── gptq_marlin_24.cpython-312.pyc │ │ │ ├── kv_cache.cpython-311.pyc │ │ │ ├── kv_cache.cpython-312.pyc │ │ │ ├── marlin.cpython-311.pyc │ │ │ ├── marlin.cpython-312.pyc │ │ │ ├── mixq.cpython-311.pyc │ │ │ ├── mixq.cpython-312.pyc │ │ │ ├── mixq4bit.cpython-311.pyc │ │ │ ├── modelopt.cpython-311.pyc │ │ │ ├── modelopt.cpython-312.pyc │ │ │ ├── neuron_quant.cpython-311.pyc │ │ │ ├── neuron_quant.cpython-312.pyc │ │ │ ├── qqq.cpython-311.pyc │ │ │ ├── qqq.cpython-312.pyc │ │ │ ├── schema.cpython-311.pyc │ │ │ ├── tpu_int8.cpython-311.pyc │ │ │ └── tpu_int8.cpython-312.pyc │ │ ├── aqlm.py │ │ ├── awq.py │ │ ├── awq_marlin.py │ │ ├── awq_triton.py │ │ ├── base_config.py │ │ ├── bitsandbytes.py │ │ ├── compressed_tensors │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-311.pyc │ │ │ │ ├── __init__.cpython-312.pyc │ │ │ │ ├── compressed_tensors.cpython-311.pyc │ │ │ │ ├── compressed_tensors.cpython-312.pyc │ │ │ │ ├── compressed_tensors_moe.cpython-311.pyc │ │ │ │ ├── compressed_tensors_moe.cpython-312.pyc │ │ │ │ ├── utils.cpython-311.pyc │ │ │ │ └── utils.cpython-312.pyc │ │ │ ├── compressed_tensors.py │ │ │ ├── compressed_tensors_moe.py │ │ │ ├── schemes │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-311.pyc │ │ │ │ │ ├── __init__.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_scheme.cpython-311.pyc │ │ │ │ │ ├── compressed_tensors_scheme.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_w4a16_24.cpython-311.pyc │ │ │ │ │ ├── compressed_tensors_w4a16_24.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_w8a16_fp8.cpython-311.pyc │ │ │ │ │ ├── compressed_tensors_w8a16_fp8.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_w8a8_fp8.cpython-311.pyc │ │ │ │ │ ├── compressed_tensors_w8a8_fp8.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_w8a8_int8.cpython-311.pyc │ │ │ │ │ ├── compressed_tensors_w8a8_int8.cpython-312.pyc │ │ │ │ │ ├── compressed_tensors_wNa16.cpython-311.pyc │ │ │ │ │ └── compressed_tensors_wNa16.cpython-312.pyc │ │ │ │ ├── compressed_tensors_scheme.py │ │ │ │ ├── compressed_tensors_w4a16_24.py │ │ │ │ ├── compressed_tensors_w8a16_fp8.py │ │ │ │ ├── compressed_tensors_w8a8_fp8.py │ │ │ │ ├── compressed_tensors_w8a8_int8.py │ │ │ │ └── compressed_tensors_wNa16.py │ │ │ └── utils.py │ │ ├── deepspeedfp.py │ │ ├── experts_int8.py │ │ ├── fbgemm_fp8.py │ │ ├── fp8.py │ │ ├── gguf.py │ │ ├── gptq.py │ │ ├── gptq_marlin.py │ │ ├── gptq_marlin_24.py │ │ ├── kernels │ │ │ ├── MPLinearKernel.py │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── MPLinearKernel.cpython-311.pyc │ │ │ │ ├── MPLinearKernel.cpython-312.pyc │ │ │ │ ├── __init__.cpython-311.pyc │ │ │ │ ├── __init__.cpython-312.pyc │ │ │ │ ├── machete.cpython-311.pyc │ │ │ │ ├── machete.cpython-312.pyc │ │ │ │ ├── marlin.cpython-311.pyc │ │ │ │ └── marlin.cpython-312.pyc │ │ │ ├── machete.py │ │ │ └── marlin.py │ │ ├── kv_cache.py │ │ ├── marlin.py │ │ ├── mixq.py │ │ ├── mixq4bit.py │ │ ├── modelopt.py │ │ ├── neuron_quant.py │ │ ├── qqq.py │ │ ├── schema.py │ │ ├── tpu_int8.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── __init__.cpython-312.pyc │ │ │ ├── layer_utils.cpython-311.pyc │ │ │ ├── layer_utils.cpython-312.pyc │ │ │ ├── machete_utils.cpython-311.pyc │ │ │ ├── machete_utils.cpython-312.pyc │ │ │ ├── marlin_utils.cpython-311.pyc │ │ │ ├── marlin_utils.cpython-312.pyc │ │ │ ├── marlin_utils_fp8.cpython-311.pyc │ │ │ ├── marlin_utils_fp8.cpython-312.pyc │ │ │ ├── marlin_utils_test.cpython-311.pyc │ │ │ ├── marlin_utils_test_24.cpython-311.pyc │ │ │ ├── marlin_utils_test_qqq.cpython-311.pyc │ │ │ ├── quant_utils.cpython-311.pyc │ │ │ ├── quant_utils.cpython-312.pyc │ │ │ ├── w8a8_utils.cpython-311.pyc │ │ │ └── w8a8_utils.cpython-312.pyc │ │ │ ├── layer_utils.py │ │ │ ├── machete_utils.py │ │ │ ├── marlin_utils.py │ │ │ ├── marlin_utils_fp8.py │ │ │ ├── marlin_utils_test.py │ │ │ ├── marlin_utils_test_24.py │ │ │ ├── marlin_utils_test_qqq.py │ │ │ ├── quant_utils.py │ │ │ └── w8a8_utils.py │ ├── rejection_sampler.py │ ├── resampler.py │ ├── rotary_embedding.py │ ├── sampler.py │ ├── spec_decode_base_sampler.py │ ├── typical_acceptance_sampler.py │ └── vocab_parallel_embedding.py ├── model_loader │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── loader.cpython-311.pyc │ │ ├── neuron.cpython-311.pyc │ │ ├── openvino.cpython-311.pyc │ │ ├── tensorizer.cpython-311.pyc │ │ ├── utils.cpython-311.pyc │ │ └── weight_utils.cpython-311.pyc │ ├── loader.py │ ├── neuron.py │ ├── openvino.py │ ├── tensorizer.py │ ├── utils.py │ └── weight_utils.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── arctic.cpython-311.pyc │ │ ├── baichuan.cpython-311.pyc │ │ ├── bart.cpython-311.pyc │ │ ├── blip.cpython-311.pyc │ │ ├── blip2.cpython-311.pyc │ │ ├── bloom.cpython-311.pyc │ │ ├── chameleon.cpython-311.pyc │ │ ├── chatglm.cpython-311.pyc │ │ ├── clip.cpython-311.pyc │ │ ├── commandr.cpython-311.pyc │ │ ├── dbrx.cpython-311.pyc │ │ ├── decilm.cpython-311.pyc │ │ ├── deepseek.cpython-311.pyc │ │ ├── deepseek_v2.cpython-311.pyc │ │ ├── eagle.cpython-311.pyc │ │ ├── exaone.cpython-311.pyc │ │ ├── falcon.cpython-311.pyc │ │ ├── fuyu.cpython-311.pyc │ │ ├── gemma.cpython-311.pyc │ │ ├── gemma2.cpython-311.pyc │ │ ├── gpt2.cpython-311.pyc │ │ ├── gpt_bigcode.cpython-311.pyc │ │ ├── gpt_j.cpython-311.pyc │ │ ├── gpt_neox.cpython-311.pyc │ │ ├── granite.cpython-311.pyc │ │ ├── idefics2_vision_model.cpython-311.pyc │ │ ├── interfaces.cpython-311.pyc │ │ ├── intern_vit.cpython-311.pyc │ │ ├── internlm2.cpython-311.pyc │ │ ├── internvl.cpython-311.pyc │ │ ├── jais.cpython-311.pyc │ │ ├── jamba.cpython-311.pyc │ │ ├── llama.cpython-311.pyc │ │ ├── llama_embedding.cpython-311.pyc │ │ ├── llava.cpython-311.pyc │ │ ├── llava_next.cpython-311.pyc │ │ ├── llava_next_video.cpython-311.pyc │ │ ├── llava_onevision.cpython-311.pyc │ │ ├── medusa.cpython-311.pyc │ │ ├── minicpm.cpython-311.pyc │ │ ├── minicpm3.cpython-311.pyc │ │ ├── minicpmv.cpython-311.pyc │ │ ├── mixtral.cpython-311.pyc │ │ ├── mixtral_quant.cpython-311.pyc │ │ ├── mllama.cpython-311.pyc │ │ ├── mlp_speculator.cpython-311.pyc │ │ ├── mpt.cpython-311.pyc │ │ ├── na_vit.cpython-311.pyc │ │ ├── nemotron.cpython-311.pyc │ │ ├── olmo.cpython-311.pyc │ │ ├── olmoe.cpython-311.pyc │ │ ├── opt.cpython-311.pyc │ │ ├── orion.cpython-311.pyc │ │ ├── paligemma.cpython-311.pyc │ │ ├── persimmon.cpython-311.pyc │ │ ├── phi.cpython-311.pyc │ │ ├── phi3.cpython-311.pyc │ │ ├── phi3_small.cpython-311.pyc │ │ ├── phi3v.cpython-311.pyc │ │ ├── phimoe.cpython-311.pyc │ │ ├── pixtral.cpython-311.pyc │ │ ├── qwen.cpython-311.pyc │ │ ├── qwen2.cpython-311.pyc │ │ ├── qwen2_moe.cpython-311.pyc │ │ ├── qwen2_vl.cpython-311.pyc │ │ ├── siglip.cpython-311.pyc │ │ ├── solar.cpython-311.pyc │ │ ├── stablelm.cpython-311.pyc │ │ ├── starcoder2.cpython-311.pyc │ │ ├── ultravox.cpython-311.pyc │ │ ├── utils.cpython-311.pyc │ │ └── xverse.cpython-311.pyc │ ├── arctic.py │ ├── baichuan.py │ ├── bart.py │ ├── blip.py │ ├── blip2.py │ ├── bloom.py │ ├── chameleon.py │ ├── chatglm.py │ ├── clip.py │ ├── commandr.py │ ├── dbrx.py │ ├── decilm.py │ ├── deepseek.py │ ├── deepseek_v2.py │ ├── eagle.py │ ├── exaone.py │ ├── falcon.py │ ├── fuyu.py │ ├── gemma.py │ ├── gemma2.py │ ├── gpt2.py │ ├── gpt_bigcode.py │ ├── gpt_j.py │ ├── gpt_neox.py │ ├── granite.py │ ├── idefics2_vision_model.py │ ├── interfaces.py │ ├── intern_vit.py │ ├── internlm2.py │ ├── internvl.py │ ├── jais.py │ ├── jamba.py │ ├── llama.py │ ├── llama_embedding.py │ ├── llava.py │ ├── llava_next.py │ ├── llava_next_video.py │ ├── llava_onevision.py │ ├── medusa.py │ ├── minicpm.py │ ├── minicpm3.py │ ├── minicpmv.py │ ├── mixtral.py │ ├── mixtral_quant.py │ ├── mllama.py │ ├── mlp_speculator.py │ ├── mpt.py │ ├── na_vit.py │ ├── nemotron.py │ ├── olmo.py │ ├── olmoe.py │ ├── opt.py │ ├── orion.py │ ├── paligemma.py │ ├── persimmon.py │ ├── phi.py │ ├── phi3.py │ ├── phi3_small.py │ ├── phi3v.py │ ├── phimoe.py │ ├── pixtral.py │ ├── qwen.py │ ├── qwen2.py │ ├── qwen2_moe.py │ ├── qwen2_vl.py │ ├── siglip.py │ ├── solar.py │ ├── stablelm.py │ ├── starcoder2.py │ ├── ultravox.py │ ├── utils.py │ └── xverse.py ├── parameter.py ├── pooling_metadata.py ├── sampling_metadata.py └── utils.py ├── multimodal ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── audio.cpython-311.pyc │ ├── base.cpython-311.pyc │ ├── image.cpython-311.pyc │ ├── registry.cpython-311.pyc │ ├── utils.cpython-311.pyc │ └── video.cpython-311.pyc ├── audio.py ├── base.py ├── image.py ├── registry.py ├── utils.py └── video.py ├── outputs.py ├── platforms ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── cpu.cpython-311.pyc │ ├── cuda.cpython-311.pyc │ ├── interface.cpython-311.pyc │ ├── interface.cpython-312.pyc │ ├── rocm.cpython-311.pyc │ └── tpu.cpython-311.pyc ├── cpu.py ├── cuda.py ├── interface.py ├── rocm.py └── tpu.py ├── plugins ├── __init__.py └── __pycache__ │ └── __init__.cpython-311.pyc ├── pooling_params.py ├── production_monitoring ├── README.md ├── docker-compose.yaml ├── grafana.json └── prometheus.yaml ├── prompt_adapter ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── layers.cpython-311.pyc │ ├── models.cpython-311.pyc │ ├── request.cpython-311.pyc │ ├── request.cpython-312.pyc │ ├── utils.cpython-311.pyc │ └── worker_manager.cpython-311.pyc ├── layers.py ├── models.py ├── request.py ├── utils.py └── worker_manager.py ├── py.typed ├── sampling_params.py ├── scalar_type.py ├── scripts.py ├── sequence.py ├── spec_decode ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── batch_expansion.cpython-311.pyc │ ├── draft_model_runner.cpython-311.pyc │ ├── interfaces.cpython-311.pyc │ ├── medusa_worker.cpython-311.pyc │ ├── metrics.cpython-311.pyc │ ├── metrics.cpython-312.pyc │ ├── mlp_speculator_worker.cpython-311.pyc │ ├── multi_step_worker.cpython-311.pyc │ ├── ngram_worker.cpython-311.pyc │ ├── proposer_worker_base.cpython-311.pyc │ ├── smaller_tp_proposer_worker.cpython-311.pyc │ ├── spec_decode_worker.cpython-311.pyc │ ├── target_model_runner.cpython-311.pyc │ ├── top1_proposer.cpython-311.pyc │ └── util.cpython-311.pyc ├── batch_expansion.py ├── draft_model_runner.py ├── interfaces.py ├── medusa_worker.py ├── metrics.py ├── mlp_speculator_worker.py ├── multi_step_worker.py ├── ngram_worker.py ├── proposer_worker_base.py ├── smaller_tp_proposer_worker.py ├── spec_decode_worker.py ├── target_model_runner.py ├── top1_proposer.py └── util.py ├── tracing.py ├── transformers_utils ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── config.cpython-311.pyc │ ├── detokenizer.cpython-311.pyc │ ├── processor.cpython-311.pyc │ ├── tokenizer.cpython-311.pyc │ └── utils.cpython-311.pyc ├── config.py ├── configs │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── arctic.cpython-311.pyc │ │ ├── chatglm.cpython-311.pyc │ │ ├── dbrx.cpython-311.pyc │ │ ├── eagle.cpython-311.pyc │ │ ├── exaone.cpython-311.pyc │ │ ├── falcon.cpython-311.pyc │ │ ├── granite.cpython-311.pyc │ │ ├── internvl.cpython-311.pyc │ │ ├── jais.cpython-311.pyc │ │ ├── medusa.cpython-311.pyc │ │ ├── mllama.cpython-311.pyc │ │ ├── mlp_speculator.cpython-311.pyc │ │ ├── mpt.cpython-311.pyc │ │ ├── nemotron.cpython-311.pyc │ │ ├── solar.cpython-311.pyc │ │ └── ultravox.cpython-311.pyc │ ├── arctic.py │ ├── chatglm.py │ ├── dbrx.py │ ├── eagle.py │ ├── exaone.py │ ├── falcon.py │ ├── granite.py │ ├── internvl.py │ ├── jais.py │ ├── medusa.py │ ├── mllama.py │ ├── mlp_speculator.py │ ├── mpt.py │ ├── nemotron.py │ ├── solar.py │ └── ultravox.py ├── detokenizer.py ├── processor.py ├── tokenizer.py ├── tokenizer_group │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── base_tokenizer_group.cpython-311.pyc │ │ ├── ray_tokenizer_group.cpython-311.pyc │ │ └── tokenizer_group.cpython-311.pyc │ ├── base_tokenizer_group.py │ ├── ray_tokenizer_group.py │ └── tokenizer_group.py ├── tokenizers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── baichuan.cpython-311.pyc │ │ └── mistral.cpython-311.pyc │ ├── baichuan.py │ └── mistral.py └── utils.py ├── triton_utils ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── __init__.cpython-312.pyc │ ├── custom_cache_manager.cpython-311.pyc │ ├── custom_cache_manager.cpython-312.pyc │ ├── importing.cpython-311.pyc │ ├── importing.cpython-312.pyc │ ├── libentry.cpython-311.pyc │ └── libentry.cpython-312.pyc ├── custom_cache_manager.py ├── importing.py └── libentry.py ├── usage ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ └── usage_lib.cpython-311.pyc └── usage_lib.py ├── utils.py ├── version.py └── worker ├── __init__.py ├── __pycache__ ├── __init__.cpython-311.pyc ├── cache_engine.cpython-311.pyc ├── cpu_model_runner.cpython-311.pyc ├── cpu_worker.cpython-311.pyc ├── embedding_model_runner.cpython-311.pyc ├── enc_dec_model_runner.cpython-311.pyc ├── model_runner.cpython-311.pyc ├── model_runner_base.cpython-311.pyc ├── multi_step_model_runner.cpython-311.pyc ├── multi_step_tpu_worker.cpython-311.pyc ├── multi_step_worker.cpython-311.pyc ├── neuron_model_runner.cpython-311.pyc ├── neuron_worker.cpython-311.pyc ├── openvino_model_runner.cpython-311.pyc ├── openvino_worker.cpython-311.pyc ├── tpu_model_runner.cpython-311.pyc ├── tpu_worker.cpython-311.pyc ├── utils.cpython-311.pyc ├── worker.cpython-311.pyc ├── worker_base.cpython-311.pyc ├── xpu_model_runner.cpython-311.pyc └── xpu_worker.cpython-311.pyc ├── cache_engine.py ├── cpu_model_runner.py ├── cpu_worker.py ├── embedding_model_runner.py ├── enc_dec_model_runner.py ├── model_runner.py ├── model_runner_base.py ├── multi_step_model_runner.py ├── multi_step_tpu_worker.py ├── multi_step_worker.py ├── neuron_model_runner.py ├── neuron_worker.py ├── openvino_model_runner.py ├── openvino_worker.py ├── tpu_model_runner.py ├── tpu_worker.py ├── utils.py ├── worker.py ├── worker_base.py ├── xpu_model_runner.py └── xpu_worker.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | ./vllm_flash_attn 3 | 4 | vllm_flash_attn/* -------------------------------------------------------------------------------- /download_mmlu.sh: -------------------------------------------------------------------------------- 1 | 2 | mkdir data; wget https://people.eecs.berkeley.edu/~hendrycks/data.tar -O data/mmlu.tar 3 | tar --no-same-owner -xf data/mmlu.tar -C data && mv data/data data/mmlu 4 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | data/* 3 | ./data -------------------------------------------------------------------------------- /examples/__pycache__/prompt_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/__pycache__/prompt_utils.cpython-310.pyc -------------------------------------------------------------------------------- /examples/__pycache__/prompt_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/__pycache__/prompt_utils.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils.peft_utils import get_gptq_peft_model 2 | from .utils.exllama_utils import exllama_set_max_input_length 3 | -------------------------------------------------------------------------------- /examples/benchAcc/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/eval_tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from .language_modeling_task import * 2 | from .sequence_classification_task import * 3 | from .text_summarization_task import * 4 | -------------------------------------------------------------------------------- /examples/benchAcc/utils/eval_tasks/_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/eval_tasks/_utils/__init__.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from ._base import BaseGPTQForCausalLM, BaseQuantizeConfig 2 | -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/modeling/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/modeling/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/__pycache__/_base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/modeling/__pycache__/_base.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/__pycache__/_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/modeling/__pycache__/_base.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/__pycache__/_const.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/modeling/__pycache__/_const.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/__pycache__/_const.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/modeling/__pycache__/_const.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/__pycache__/_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/modeling/__pycache__/_utils.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/__pycache__/_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/modeling/__pycache__/_utils.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/codegen.py: -------------------------------------------------------------------------------- 1 | from ._base import * 2 | 3 | 4 | class CodeGenGPTQForCausalLM(BaseGPTQForCausalLM): 5 | layer_type = "CodeGenBlock" 6 | layers_block_name = "transformer.h" 7 | outside_layer_modules = ["transformer.wte", "transformer.ln_f"] 8 | inside_layer_modules = [ 9 | ["attn.qkv_proj"], 10 | ["attn.out_proj"], 11 | ["mlp.fc_in"], 12 | ["mlp.fc_out"] 13 | ] 14 | 15 | 16 | __all__ = ["CodeGenGPTQForCausalLM"] 17 | -------------------------------------------------------------------------------- /examples/benchAcc/utils/modeling/moss.py: -------------------------------------------------------------------------------- 1 | from ._base import * 2 | 3 | 4 | class MOSSGPTQForCausalLM(BaseGPTQForCausalLM): 5 | layer_type = "MossBlock" 6 | layers_block_name = "transformer.h" 7 | outside_layer_modules = ["transformer.wte", "transformer.ln_f"] 8 | inside_layer_modules = [ 9 | ["attn.qkv_proj"], 10 | ["attn.out_proj"], 11 | ["mlp.fc_in"], 12 | ["mlp.fc_out"] 13 | ] 14 | 15 | 16 | __all__ = ["MOSSGPTQForCausalLM"] 17 | -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/__init__.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/__pycache__/_fused_base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/__pycache__/_fused_base.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/__pycache__/_fused_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/__pycache__/_fused_base.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/qlinear/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/qlinear/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/qlinear/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/qlinear/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/triton_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/triton_utils/__init__.py -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/triton_utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/triton_utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/triton_utils/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/triton_utils/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/triton_utils/__pycache__/mixin.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/triton_utils/__pycache__/mixin.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/triton_utils/__pycache__/mixin.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/nn_modules/triton_utils/__pycache__/mixin.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/nn_modules/triton_utils/mixin.py: -------------------------------------------------------------------------------- 1 | class TritonModuleMixin: 2 | @classmethod 3 | def warmup(cls, model, transpose=False, seqlen=2048): 4 | pass 5 | -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/ACKNOWLEDGEMENT.md: -------------------------------------------------------------------------------- 1 | The codes in this directory are mainly referenced from @qwopqwop200 's [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa/tree/cuda), which itself is based on [gptq](https://github.com/IST-DASLab/gptq) -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/__init__.py: -------------------------------------------------------------------------------- 1 | from .gptq import * 2 | from .quantizer import * 3 | -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/quantization/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/quantization/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/__pycache__/gptq.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/quantization/__pycache__/gptq.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/__pycache__/gptq.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/quantization/__pycache__/gptq.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/__pycache__/quantizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/quantization/__pycache__/quantizer.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/quantization/__pycache__/quantizer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/quantization/__pycache__/quantizer.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .perplexity_utils import Perplexity -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/data_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/data_utils.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/data_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/data_utils.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/exllama_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/exllama_utils.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/exllama_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/exllama_utils.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/import_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/import_utils.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/import_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/import_utils.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/peft_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/peft_utils.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/peft_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/peft_utils.cpython-311.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/perplexity_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/perplexity_utils.cpython-310.pyc -------------------------------------------------------------------------------- /examples/benchAcc/utils/utils/__pycache__/perplexity_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/benchAcc/utils/utils/__pycache__/perplexity_utils.cpython-311.pyc -------------------------------------------------------------------------------- /examples/download_mmlu.sh: -------------------------------------------------------------------------------- 1 | 2 | mkdir data; wget https://people.eecs.berkeley.edu/~hendrycks/data.tar -O data/mmlu.tar 3 | tar --no-same-owner -xf data/mmlu.tar -C data && mv data/data data/mmlu 4 | -------------------------------------------------------------------------------- /examples/input.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/input.pt -------------------------------------------------------------------------------- /examples/lenovo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/examples/lenovo.jpg -------------------------------------------------------------------------------- /examples/production_monitoring/prometheus.yaml: -------------------------------------------------------------------------------- 1 | # prometheus.yaml 2 | global: 3 | scrape_interval: 5s 4 | evaluation_interval: 30s 5 | 6 | scrape_configs: 7 | - job_name: vllm 8 | static_configs: 9 | - targets: 10 | - 'host.docker.internal:8000' 11 | -------------------------------------------------------------------------------- /figures/awq32.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/figures/awq32.gif -------------------------------------------------------------------------------- /figures/awq512.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/figures/awq512.gif -------------------------------------------------------------------------------- /figures/mixq32.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/figures/mixq32.gif -------------------------------------------------------------------------------- /figures/mixq512.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/figures/mixq512.gif -------------------------------------------------------------------------------- /figures/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/figures/output.png -------------------------------------------------------------------------------- /figures/textmixq.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/figures/textmixq.jpg -------------------------------------------------------------------------------- /vllm/.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | ./vllm_flash_attn 3 | 4 | vllm_flash_attn/* -------------------------------------------------------------------------------- /vllm/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_core_ext.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/_core_ext.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_core_ext.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/_core_ext.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_custom_ops.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/_custom_ops.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_custom_ops.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/_custom_ops.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_ipex_ops.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/_ipex_ops.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/_version.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/_version.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/block.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/block.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/config.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/config.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/config.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/connections.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/connections.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/envs.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/envs.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/envs.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/envs.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/logger.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/logger.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/logger.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/logger.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/outputs.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/outputs.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/pooling_params.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/pooling_params.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/pooling_params.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/pooling_params.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/sampling_params.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/sampling_params.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/sampling_params.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/sampling_params.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/scalar_type.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/scalar_type.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/scalar_type.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/scalar_type.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/scripts.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/scripts.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/sequence.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/sequence.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/sequence.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/sequence.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/tracing.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/tracing.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/__pycache__/version.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/__pycache__/version.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/adapter_commons/__init__.py -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/adapter_commons/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/adapter_commons/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/layers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/adapter_commons/__pycache__/layers.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/models.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/adapter_commons/__pycache__/models.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/request.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/adapter_commons/__pycache__/request.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/request.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/adapter_commons/__pycache__/request.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/adapter_commons/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/__pycache__/worker_manager.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/adapter_commons/__pycache__/worker_manager.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/adapter_commons/layers.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Tuple 3 | 4 | 5 | @dataclass 6 | class AdapterMapping: 7 | # Per every token in input_ids: 8 | index_mapping: Tuple[int, ...] 9 | # Per sampled token: 10 | prompt_mapping: Tuple[int, ...] 11 | 12 | def __post_init__(self): 13 | self.index_mapping = tuple(self.index_mapping) 14 | self.prompt_mapping = tuple(self.prompt_mapping) -------------------------------------------------------------------------------- /vllm/assets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/assets/__init__.py -------------------------------------------------------------------------------- /vllm/assets/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/assets/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/assets/__pycache__/audio.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/assets/__pycache__/audio.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/assets/__pycache__/base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/assets/__pycache__/base.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/assets/__pycache__/image.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/assets/__pycache__/image.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/assets/__pycache__/video.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/assets/__pycache__/video.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/__pycache__/layer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/__pycache__/layer.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/__pycache__/selector.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/__pycache__/selector.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__init__.py -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/abstract.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/abstract.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/blocksparse_attn.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/blocksparse_attn.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/flash_attn.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/flash_attn.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/flashinfer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/flashinfer.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/ipex_attn.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/ipex_attn.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/openvino.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/openvino.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/pallas.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/pallas.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/rocm_flash_attn.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/rocm_flash_attn.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/torch_sdpa.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/torch_sdpa.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/backends/__pycache__/xformers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/backends/__pycache__/xformers.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/__init__.py -------------------------------------------------------------------------------- /vllm/attention/ops/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/__pycache__/ipex_attn.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/__pycache__/ipex_attn.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/__pycache__/paged_attn.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/__pycache__/paged_attn.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/__pycache__/prefix_prefill.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/__pycache__/prefix_prefill.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/__pycache__/triton_flash_attention.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/__pycache__/triton_flash_attention.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/blocksparse_attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/blocksparse_attention/__init__.py -------------------------------------------------------------------------------- /vllm/attention/ops/blocksparse_attention/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/blocksparse_attention/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/blocksparse_attention/__pycache__/blocksparse_attention_kernel.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/blocksparse_attention/__pycache__/blocksparse_attention_kernel.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/blocksparse_attention/__pycache__/interface.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/blocksparse_attention/__pycache__/interface.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/attention/ops/blocksparse_attention/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/attention/ops/blocksparse_attention/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/compilation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/compilation/__init__.py -------------------------------------------------------------------------------- /vllm/compilation/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/compilation/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/compilation/__pycache__/backends.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/compilation/__pycache__/backends.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/compilation/__pycache__/wrapper.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/compilation/__pycache__/wrapper.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/__init__.py -------------------------------------------------------------------------------- /vllm/core/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/block_manager_v1.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/__pycache__/block_manager_v1.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/block_manager_v2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/__pycache__/block_manager_v2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/embedding_model_block_manager.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/__pycache__/embedding_model_block_manager.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/evictor_v1.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/__pycache__/evictor_v1.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/evictor_v2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/__pycache__/evictor_v2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/interfaces.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/__pycache__/interfaces.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/__pycache__/scheduler.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/__pycache__/scheduler.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/block/__init__.py -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/block/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/block_table.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/block/__pycache__/block_table.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/common.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/block/__pycache__/common.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/cpu_gpu_block_allocator.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/block/__pycache__/cpu_gpu_block_allocator.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/interfaces.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/block/__pycache__/interfaces.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/naive_block.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/block/__pycache__/naive_block.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/prefix_caching_block.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/block/__pycache__/prefix_caching_block.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/core/block/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/core/block/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/__init__.py: -------------------------------------------------------------------------------- 1 | from .communication_op import * 2 | from .parallel_state import * 3 | from .utils import * 4 | -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/communication_op.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/__pycache__/communication_op.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/communication_op.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/__pycache__/communication_op.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/parallel_state.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/__pycache__/parallel_state.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/parallel_state.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/__pycache__/parallel_state.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/__pycache__/utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/__pycache__/utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/device_communicators/__init__.py -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/device_communicators/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__pycache__/cuda_wrapper.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/device_communicators/__pycache__/cuda_wrapper.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__pycache__/custom_all_reduce.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/device_communicators/__pycache__/custom_all_reduce.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__pycache__/custom_all_reduce_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/device_communicators/__pycache__/custom_all_reduce_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__pycache__/pynccl.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/device_communicators/__pycache__/pynccl.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__pycache__/pynccl_wrapper.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/device_communicators/__pycache__/pynccl_wrapper.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__pycache__/shm_broadcast.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/device_communicators/__pycache__/shm_broadcast.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/distributed/device_communicators/__pycache__/tpu_communicator.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/distributed/device_communicators/__pycache__/tpu_communicator.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__init__.py -------------------------------------------------------------------------------- /vllm/engine/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/arg_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__pycache__/arg_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/arg_utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__pycache__/arg_utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/async_llm_engine.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__pycache__/async_llm_engine.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/async_timeout.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__pycache__/async_timeout.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/llm_engine.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__pycache__/llm_engine.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/metrics.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__pycache__/metrics.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/metrics_types.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__pycache__/metrics_types.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/__pycache__/protocol.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/__pycache__/protocol.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/multiprocessing/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/multiprocessing/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/multiprocessing/__pycache__/client.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/multiprocessing/__pycache__/client.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/multiprocessing/__pycache__/engine.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/multiprocessing/__pycache__/engine.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/output_processor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/output_processor/__init__.py -------------------------------------------------------------------------------- /vllm/engine/output_processor/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/output_processor/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/output_processor/__pycache__/interfaces.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/output_processor/__pycache__/interfaces.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/output_processor/__pycache__/multi_step.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/output_processor/__pycache__/multi_step.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/output_processor/__pycache__/single_step.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/output_processor/__pycache__/single_step.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/output_processor/__pycache__/stop_checker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/output_processor/__pycache__/stop_checker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/engine/output_processor/__pycache__/util.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/engine/output_processor/__pycache__/util.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/__init__.py -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/api_server.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/__pycache__/api_server.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/chat_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/__pycache__/chat_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/launcher.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/__pycache__/launcher.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/llm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/__pycache__/llm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/__pycache__/logger.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/__pycache__/logger.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__init__.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/api_server.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/api_server.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/cli_args.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/cli_args.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/logits_processors.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/logits_processors.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/protocol.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/protocol.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/run_batch.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/run_batch.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/serving_chat.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/serving_chat.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/serving_completion.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/serving_completion.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/serving_embedding.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/serving_embedding.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/serving_engine.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/serving_engine.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__pycache__/serving_tokenization.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/__pycache__/serving_tokenization.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/tool_parsers/__init__.py: -------------------------------------------------------------------------------- 1 | from .abstract_tool_parser import ToolParser 2 | from .hermes_tool_parser import Hermes2ProToolParser 3 | from .mistral_tool_parser import MistralToolParser 4 | 5 | __all__ = ["ToolParser", "Hermes2ProToolParser", "MistralToolParser"] -------------------------------------------------------------------------------- /vllm/entrypoints/openai/tool_parsers/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/tool_parsers/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/tool_parsers/__pycache__/abstract_tool_parser.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/tool_parsers/__pycache__/abstract_tool_parser.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/tool_parsers/__pycache__/hermes_tool_parser.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/tool_parsers/__pycache__/hermes_tool_parser.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/tool_parsers/__pycache__/mistral_tool_parser.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/tool_parsers/__pycache__/mistral_tool_parser.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/entrypoints/openai/tool_parsers/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/entrypoints/openai/tool_parsers/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/examples/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | data/* 3 | ./data -------------------------------------------------------------------------------- /vllm/executor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__init__.py -------------------------------------------------------------------------------- /vllm/executor/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/cpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/cpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/distributed_gpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/distributed_gpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/executor_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/executor_base.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/gpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/gpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/msgspec_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/msgspec_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/multiproc_gpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/multiproc_gpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/multiproc_worker_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/multiproc_worker_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/multiproc_xpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/multiproc_xpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/neuron_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/neuron_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/openvino_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/openvino_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/ray_gpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/ray_gpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/ray_tpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/ray_tpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/ray_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/ray_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/ray_xpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/ray_xpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/tpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/tpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/executor/__pycache__/xpu_executor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/executor/__pycache__/xpu_executor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/inputs/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/inputs/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/data.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/inputs/__pycache__/data.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/data.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/inputs/__pycache__/data.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/parse.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/inputs/__pycache__/parse.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/parse.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/inputs/__pycache__/parse.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/preprocess.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/inputs/__pycache__/preprocess.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/registry.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/inputs/__pycache__/registry.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/inputs/__pycache__/registry.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/inputs/__pycache__/registry.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/logging/__init__.py: -------------------------------------------------------------------------------- 1 | from vllm.logging.formatter import NewLineFormatter 2 | 3 | __all__ = [ 4 | "NewLineFormatter", 5 | ] 6 | -------------------------------------------------------------------------------- /vllm/logging/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/logging/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/logging/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/logging/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/logging/__pycache__/formatter.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/logging/__pycache__/formatter.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/logging/__pycache__/formatter.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/logging/__pycache__/formatter.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/lora/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__init__.py -------------------------------------------------------------------------------- /vllm/lora/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/fully_sharded_layers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/fully_sharded_layers.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/layers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/layers.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/lora.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/lora.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/models.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/models.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/punica.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/punica.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/request.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/request.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/request.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/request.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/__pycache__/worker_manager.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/__pycache__/worker_manager.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/ops/__init__.py -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/ops/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/bgmv_expand.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/ops/__pycache__/bgmv_expand.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/bgmv_expand_slice.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/ops/__pycache__/bgmv_expand_slice.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/bgmv_shrink.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/ops/__pycache__/bgmv_shrink.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/sgmv_expand.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/ops/__pycache__/sgmv_expand.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/sgmv_expand_slice.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/ops/__pycache__/sgmv_expand_slice.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/sgmv_shrink.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/ops/__pycache__/sgmv_shrink.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/lora/ops/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/lora/ops/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/custom_op.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/custom_op.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/custom_op.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/custom_op.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/parameter.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/parameter.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/parameter.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/parameter.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/pooling_metadata.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/pooling_metadata.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/sampling_metadata.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/sampling_metadata.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/sampling_metadata.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/sampling_metadata.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/__pycache__/utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/__pycache__/utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/guided_decoding/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/__pycache__/guided_fields.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/guided_decoding/__pycache__/guided_fields.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/__pycache__/lm_format_enforcer_decoding.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/guided_decoding/__pycache__/lm_format_enforcer_decoding.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/__pycache__/outlines_decoding.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/guided_decoding/__pycache__/outlines_decoding.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/guided_decoding/__pycache__/outlines_logits_processors.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/guided_decoding/__pycache__/outlines_logits_processors.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/activation.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/activation.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/layernorm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/layernorm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/linear.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/linear.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/linear.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/linear.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/logits_processor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/logits_processor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/pooler.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/pooler.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/rejection_sampler.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/rejection_sampler.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/resampler.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/resampler.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/rotary_embedding.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/rotary_embedding.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/sampler.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/sampler.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/spec_decode_base_sampler.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/spec_decode_base_sampler.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/spec_decode_base_sampler.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/spec_decode_base_sampler.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/typical_acceptance_sampler.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/typical_acceptance_sampler.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/vocab_parallel_embedding.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/vocab_parallel_embedding.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/__pycache__/vocab_parallel_embedding.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/__pycache__/vocab_parallel_embedding.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/fused_moe/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/fused_moe/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__pycache__/fused_marlin_moe.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/fused_moe/__pycache__/fused_marlin_moe.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__pycache__/fused_marlin_moe.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/fused_moe/__pycache__/fused_marlin_moe.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__pycache__/fused_moe.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/fused_moe/__pycache__/fused_moe.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__pycache__/fused_moe.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/fused_moe/__pycache__/fused_moe.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__pycache__/layer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/fused_moe/__pycache__/layer.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__pycache__/layer.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/fused_moe/__pycache__/layer.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/fused_moe/__pycache__/moe_pallas.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/fused_moe/__pycache__/moe_pallas.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/mamba/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/mamba/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/mamba/ops/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/ops/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/mamba/ops/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/ops/__pycache__/causal_conv1d.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/mamba/ops/__pycache__/causal_conv1d.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/mamba/ops/__pycache__/mamba_ssm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/mamba/ops/__pycache__/mamba_ssm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/aqlm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/aqlm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/aqlm.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/aqlm.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/awq.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/awq.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/awq.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/awq.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/awq_marlin.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/awq_marlin.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/awq_marlin.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/awq_marlin.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/awq_triton.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/awq_triton.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/base_config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/base_config.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/base_config.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/base_config.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/bitsandbytes.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/bitsandbytes.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/bitsandbytes.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/bitsandbytes.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/deepspeedfp.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/deepspeedfp.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/deepspeedfp.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/deepspeedfp.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/experts_int8.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/experts_int8.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/experts_int8.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/experts_int8.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/fbgemm_fp8.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/fbgemm_fp8.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/fbgemm_fp8.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/fbgemm_fp8.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/fp8.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/fp8.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/fp8.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/fp8.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/gguf.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/gguf.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/gguf.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/gguf.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/gptq.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/gptq.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/gptq.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/gptq.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/gptq_marlin.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/gptq_marlin.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/gptq_marlin.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/gptq_marlin.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/gptq_marlin_24.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/gptq_marlin_24.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/gptq_marlin_24.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/gptq_marlin_24.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/kv_cache.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/kv_cache.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/kv_cache.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/kv_cache.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/marlin.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/marlin.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/marlin.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/marlin.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/mixq.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/mixq.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/mixq.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/mixq.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/mixq4bit.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/mixq4bit.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/modelopt.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/modelopt.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/modelopt.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/modelopt.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/neuron_quant.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/neuron_quant.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/neuron_quant.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/neuron_quant.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/qqq.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/qqq.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/qqq.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/qqq.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/schema.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/schema.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/tpu_int8.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/tpu_int8.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/__pycache__/tpu_int8.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/__pycache__/tpu_int8.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors_moe.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors_moe.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors_moe.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors_moe.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_scheme.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_scheme.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_scheme.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_scheme.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w4a16_24.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w4a16_24.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w4a16_24.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w4a16_24.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a16_fp8.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a16_fp8.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a16_fp8.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a16_fp8.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_fp8.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_fp8.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_fp8.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_fp8.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_int8.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_int8.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_int8.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_int8.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_wNa16.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_wNa16.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_wNa16.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_wNa16.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/gptq_marlin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/gptq_marlin.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/kernels/__pycache__/MPLinearKernel.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/kernels/__pycache__/MPLinearKernel.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/kernels/__pycache__/MPLinearKernel.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/kernels/__pycache__/MPLinearKernel.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/kernels/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/kernels/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/kernels/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/kernels/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/kernels/__pycache__/machete.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/kernels/__pycache__/machete.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/kernels/__pycache__/machete.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/kernels/__pycache__/machete.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/kernels/__pycache__/marlin.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/kernels/__pycache__/marlin.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/kernels/__pycache__/marlin.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/kernels/__pycache__/marlin.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .layer_utils import replace_parameter, update_tensor_inplace 2 | 3 | __all__ = ['update_tensor_inplace', 'replace_parameter'] 4 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/layer_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/layer_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/layer_utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/layer_utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/machete_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/machete_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/machete_utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/machete_utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_fp8.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_fp8.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_fp8.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test_24.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test_24.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test_qqq.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test_qqq.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/quant_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/quant_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/quant_utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/quant_utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/w8a8_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/w8a8_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantization/utils/__pycache__/w8a8_utils.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/layers/quantization/utils/__pycache__/w8a8_utils.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/model_loader/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/__pycache__/loader.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/model_loader/__pycache__/loader.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/__pycache__/neuron.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/model_loader/__pycache__/neuron.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/__pycache__/openvino.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/model_loader/__pycache__/openvino.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/__pycache__/tensorizer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/model_loader/__pycache__/tensorizer.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/model_loader/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/model_loader/__pycache__/weight_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/model_loader/__pycache__/weight_utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/arctic.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/arctic.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/baichuan.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/baichuan.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/bart.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/bart.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/blip.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/blip.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/blip2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/blip2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/bloom.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/bloom.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/chameleon.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/chameleon.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/chatglm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/chatglm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/clip.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/clip.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/commandr.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/commandr.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/dbrx.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/dbrx.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/decilm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/decilm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/deepseek.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/deepseek.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/deepseek_v2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/deepseek_v2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/eagle.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/eagle.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/exaone.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/exaone.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/falcon.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/falcon.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/fuyu.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/fuyu.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/gemma.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/gemma.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/gemma2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/gemma2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/gpt2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/gpt2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/gpt_bigcode.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/gpt_bigcode.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/gpt_j.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/gpt_j.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/gpt_neox.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/gpt_neox.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/granite.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/granite.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/idefics2_vision_model.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/idefics2_vision_model.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/interfaces.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/interfaces.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/intern_vit.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/intern_vit.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/internlm2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/internlm2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/internvl.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/internvl.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/jais.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/jais.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/jamba.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/jamba.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/llama.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/llama.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/llama_embedding.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/llama_embedding.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/llava.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/llava.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/llava_next.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/llava_next.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/llava_next_video.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/llava_next_video.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/llava_onevision.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/llava_onevision.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/medusa.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/medusa.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/minicpm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/minicpm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/minicpm3.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/minicpm3.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/minicpmv.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/minicpmv.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/mixtral.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/mixtral.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/mixtral_quant.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/mixtral_quant.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/mllama.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/mllama.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/mlp_speculator.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/mlp_speculator.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/mpt.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/mpt.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/na_vit.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/na_vit.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/nemotron.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/nemotron.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/olmo.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/olmo.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/olmoe.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/olmoe.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/opt.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/opt.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/orion.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/orion.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/paligemma.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/paligemma.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/persimmon.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/persimmon.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/phi.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/phi.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/phi3.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/phi3.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/phi3_small.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/phi3_small.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/phi3v.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/phi3v.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/phimoe.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/phimoe.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/pixtral.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/pixtral.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/qwen.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/qwen.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/qwen2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/qwen2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/qwen2_moe.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/qwen2_moe.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/qwen2_vl.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/qwen2_vl.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/siglip.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/siglip.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/solar.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/solar.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/stablelm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/stablelm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/starcoder2.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/starcoder2.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/ultravox.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/ultravox.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/__pycache__/xverse.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/model_executor/models/__pycache__/xverse.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/model_executor/models/phi3.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Adapted from llama.py 3 | """Inference-only Phi3 model code inherit from Llama.py""" 4 | 5 | from vllm.model_executor.models.llama import LlamaForCausalLM 6 | 7 | 8 | class Phi3ForCausalLM(LlamaForCausalLM): 9 | 10 | packed_modules_mapping = { 11 | "qkv_proj": [ 12 | "qkv_proj", 13 | ], 14 | "gate_up_proj": [ 15 | "gate_up_proj", 16 | ], 17 | } 18 | -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/multimodal/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/audio.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/multimodal/__pycache__/audio.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/multimodal/__pycache__/base.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/image.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/multimodal/__pycache__/image.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/registry.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/multimodal/__pycache__/registry.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/multimodal/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/multimodal/__pycache__/video.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/multimodal/__pycache__/video.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/platforms/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/platforms/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/cpu.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/platforms/__pycache__/cpu.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/cuda.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/platforms/__pycache__/cuda.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/interface.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/platforms/__pycache__/interface.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/interface.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/platforms/__pycache__/interface.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/rocm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/platforms/__pycache__/rocm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/__pycache__/tpu.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/platforms/__pycache__/tpu.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/platforms/cpu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .interface import Platform, PlatformEnum 4 | 5 | 6 | class CpuPlatform(Platform): 7 | _enum = PlatformEnum.CPU 8 | 9 | @classmethod 10 | def get_device_name(cls, device_id: int = 0) -> str: 11 | return "cpu" 12 | 13 | @classmethod 14 | def inference_mode(cls): 15 | return torch.no_grad() 16 | -------------------------------------------------------------------------------- /vllm/platforms/tpu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .interface import Platform, PlatformEnum 4 | 5 | 6 | class TpuPlatform(Platform): 7 | _enum = PlatformEnum.TPU 8 | 9 | @classmethod 10 | def get_device_name(cls, device_id: int = 0) -> str: 11 | raise NotImplementedError 12 | 13 | @classmethod 14 | def inference_mode(cls): 15 | return torch.no_grad() 16 | -------------------------------------------------------------------------------- /vllm/plugins/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/plugins/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/production_monitoring/prometheus.yaml: -------------------------------------------------------------------------------- 1 | # prometheus.yaml 2 | global: 3 | scrape_interval: 5s 4 | evaluation_interval: 30s 5 | 6 | scrape_configs: 7 | - job_name: vllm 8 | static_configs: 9 | - targets: 10 | - 'host.docker.internal:8000' 11 | -------------------------------------------------------------------------------- /vllm/prompt_adapter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/prompt_adapter/__init__.py -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/prompt_adapter/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/prompt_adapter/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/layers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/prompt_adapter/__pycache__/layers.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/models.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/prompt_adapter/__pycache__/models.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/request.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/prompt_adapter/__pycache__/request.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/request.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/prompt_adapter/__pycache__/request.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/prompt_adapter/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/prompt_adapter/__pycache__/worker_manager.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/prompt_adapter/__pycache__/worker_manager.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561. 2 | # The vllm package uses inline types. 3 | -------------------------------------------------------------------------------- /vllm/spec_decode/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__init__.py -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/batch_expansion.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/batch_expansion.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/draft_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/draft_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/interfaces.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/interfaces.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/medusa_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/medusa_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/metrics.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/metrics.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/metrics.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/metrics.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/mlp_speculator_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/mlp_speculator_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/multi_step_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/multi_step_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/ngram_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/ngram_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/proposer_worker_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/proposer_worker_base.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/smaller_tp_proposer_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/smaller_tp_proposer_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/spec_decode_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/spec_decode_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/target_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/target_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/top1_proposer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/top1_proposer.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/spec_decode/__pycache__/util.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/spec_decode/__pycache__/util.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/__init__.py -------------------------------------------------------------------------------- /vllm/transformers_utils/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/__pycache__/config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/__pycache__/config.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/__pycache__/detokenizer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/__pycache__/detokenizer.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/__pycache__/processor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/__pycache__/processor.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/__pycache__/tokenizer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/__pycache__/tokenizer.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/arctic.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/arctic.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/chatglm.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/chatglm.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/dbrx.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/dbrx.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/eagle.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/eagle.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/exaone.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/exaone.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/falcon.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/falcon.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/granite.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/granite.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/internvl.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/internvl.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/jais.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/jais.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/medusa.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/medusa.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/mllama.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/mllama.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/mlp_speculator.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/mlp_speculator.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/mpt.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/mpt.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/nemotron.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/nemotron.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/solar.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/solar.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__pycache__/ultravox.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/configs/__pycache__/ultravox.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer_group/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/tokenizer_group/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer_group/__pycache__/base_tokenizer_group.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/tokenizer_group/__pycache__/base_tokenizer_group.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer_group/__pycache__/ray_tokenizer_group.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/tokenizer_group/__pycache__/ray_tokenizer_group.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer_group/__pycache__/tokenizer_group.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/tokenizer_group/__pycache__/tokenizer_group.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizers/__init__.py: -------------------------------------------------------------------------------- 1 | from vllm.transformers_utils.tokenizers.baichuan import BaichuanTokenizer 2 | from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer 3 | 4 | __all__ = ["BaichuanTokenizer", "MistralTokenizer"] 5 | -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizers/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/tokenizers/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizers/__pycache__/baichuan.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/tokenizers/__pycache__/baichuan.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizers/__pycache__/mistral.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/transformers_utils/tokenizers/__pycache__/mistral.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/transformers_utils/utils.py: -------------------------------------------------------------------------------- 1 | from os import PathLike 2 | from pathlib import Path 3 | from typing import Union 4 | 5 | 6 | def check_gguf_file(model: Union[str, PathLike]) -> bool: 7 | """Check if the file is a GGUF model.""" 8 | model = Path(model) 9 | if not model.is_file(): 10 | return False 11 | elif model.suffix == ".gguf": 12 | return True 13 | 14 | with open(model, "rb") as f: 15 | header = f.read(4) 16 | return header == b"GGUF" 17 | -------------------------------------------------------------------------------- /vllm/triton_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from vllm.triton_utils.importing import HAS_TRITON 2 | 3 | __all__ = ["HAS_TRITON"] 4 | 5 | if HAS_TRITON: 6 | 7 | from vllm.triton_utils.custom_cache_manager import ( 8 | maybe_set_triton_cache_manager) 9 | from vllm.triton_utils.libentry import libentry 10 | 11 | __all__ += ["maybe_set_triton_cache_manager", "libentry"] 12 | -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/triton_utils/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/__init__.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/triton_utils/__pycache__/__init__.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/custom_cache_manager.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/triton_utils/__pycache__/custom_cache_manager.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/custom_cache_manager.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/triton_utils/__pycache__/custom_cache_manager.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/importing.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/triton_utils/__pycache__/importing.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/importing.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/triton_utils/__pycache__/importing.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/libentry.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/triton_utils/__pycache__/libentry.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/__pycache__/libentry.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/triton_utils/__pycache__/libentry.cpython-312.pyc -------------------------------------------------------------------------------- /vllm/triton_utils/importing.py: -------------------------------------------------------------------------------- 1 | from importlib.util import find_spec 2 | 3 | from vllm.logger import init_logger 4 | 5 | logger = init_logger(__name__) 6 | 7 | HAS_TRITON = find_spec("triton") is not None 8 | 9 | if not HAS_TRITON: 10 | logger.info("Triton not installed; certain GPU-related functions" 11 | " will not be available.") 12 | -------------------------------------------------------------------------------- /vllm/usage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/usage/__init__.py -------------------------------------------------------------------------------- /vllm/usage/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/usage/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/usage/__pycache__/usage_lib.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/usage/__pycache__/usage_lib.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/version.py: -------------------------------------------------------------------------------- 1 | try: 2 | from ._version import __version__, __version_tuple__ 3 | except Exception as e: 4 | import warnings 5 | 6 | warnings.warn(f"Failed to read commit hash:\n{e}", 7 | RuntimeWarning, 8 | stacklevel=2) 9 | 10 | __version__ = "dev" 11 | __version_tuple__ = (0, 0, __version__) 12 | -------------------------------------------------------------------------------- /vllm/worker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__init__.py -------------------------------------------------------------------------------- /vllm/worker/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/cache_engine.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/cache_engine.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/cpu_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/cpu_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/cpu_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/cpu_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/embedding_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/embedding_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/enc_dec_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/enc_dec_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/model_runner_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/model_runner_base.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/multi_step_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/multi_step_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/multi_step_tpu_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/multi_step_tpu_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/multi_step_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/multi_step_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/neuron_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/neuron_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/neuron_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/neuron_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/openvino_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/openvino_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/openvino_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/openvino_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/tpu_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/tpu_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/tpu_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/tpu_worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/worker.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/worker_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/worker_base.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/xpu_model_runner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/xpu_model_runner.cpython-311.pyc -------------------------------------------------------------------------------- /vllm/worker/__pycache__/xpu_worker.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Qcompiler/vllm-mixed-precision/fef6b3c5cb6dde90e8ca410231d18df57d73c64e/vllm/worker/__pycache__/xpu_worker.cpython-311.pyc --------------------------------------------------------------------------------