├── GLakeServe
    ├── CMakeLists.txt
    ├── CONTRIBUTING.md
    ├── Dockerfile
    ├── Dockerfile.cpu
    ├── Dockerfile.neuron
    ├── Dockerfile.rocm
    ├── LICENSE
    ├── MANIFEST.in
    ├── README.md
    ├── benchmarks
    │   ├── README.md
    │   ├── backend_request_func.py
    │   ├── benchmark_latency.py
    │   ├── benchmark_prefix_caching.py
    │   ├── benchmark_serving.py
    │   ├── benchmark_throughput.py
    │   ├── jsonl.py
    │   ├── kernels
    │   │   ├── benchmark_aqlm.py
    │   │   ├── benchmark_mixtral_moe.py
    │   │   ├── benchmark_paged_attention.py
    │   │   └── benchmark_rope.py
    │   ├── launch_tgi_server.sh
    │   ├── overheads
    │   │   └── benchmark_hashing.py
    │   └── sonnet.txt
    ├── cmake
    │   ├── cpu_extension.cmake
    │   ├── hipify.py
    │   └── utils.cmake
    ├── collect_env.py
    ├── csrc
    │   ├── activation_kernels.cu
    │   ├── attention
    │   │   ├── attention_dtypes.h
    │   │   ├── attention_generic.cuh
    │   │   ├── attention_kernels.cu
    │   │   ├── attention_utils.cuh
    │   │   ├── dtype_bfloat16.cuh
    │   │   ├── dtype_float16.cuh
    │   │   ├── dtype_float32.cuh
    │   │   └── dtype_fp8.cuh
    │   ├── cache.h
    │   ├── cache_kernels.cu
    │   ├── cpu
    │   │   ├── activation.cpp
    │   │   ├── attention.cpp
    │   │   ├── cache.cpp
    │   │   ├── cpu_types.hpp
    │   │   ├── layernorm.cpp
    │   │   ├── pos_encoding.cpp
    │   │   └── pybind.cpp
    │   ├── cuda_compat.h
    │   ├── cuda_utils.h
    │   ├── cuda_utils_kernels.cu
    │   ├── custom_all_reduce.cu
    │   ├── custom_all_reduce.cuh
    │   ├── custom_all_reduce_test.cu
    │   ├── dispatch_utils.h
    │   ├── layernorm_kernels.cu
    │   ├── moe
    │   │   ├── moe_ops.cpp
    │   │   ├── moe_ops.h
    │   │   └── topk_softmax_kernels.cu
    │   ├── moe_align_block_size_kernels.cu
    │   ├── ops.h
    │   ├── pos_encoding_kernels.cu
    │   ├── punica
    │   │   ├── LICENSE
    │   │   ├── bgmv
    │   │   │   ├── bgmv_bf16_bf16_bf16.cu
    │   │   │   ├── bgmv_bf16_fp32_bf16.cu
    │   │   │   ├── bgmv_config.h
    │   │   │   ├── bgmv_fp16_fp16_fp16.cu
    │   │   │   ├── bgmv_fp16_fp32_fp16.cu
    │   │   │   ├── bgmv_fp32_bf16_bf16.cu
    │   │   │   ├── bgmv_fp32_fp16_fp16.cu
    │   │   │   ├── bgmv_impl.cuh
    │   │   │   ├── generator.py
    │   │   │   └── vec_dtypes.cuh
    │   │   ├── punica_ops.cu
    │   │   ├── punica_ops.h
    │   │   ├── punica_pybind.cpp
    │   │   └── type_convert.h
    │   ├── pybind.cpp
    │   ├── quantization
    │   │   ├── aqlm
    │   │   │   └── gemm_kernels.cu
    │   │   ├── awq
    │   │   │   ├── dequantize.cuh
    │   │   │   └── gemm_kernels.cu
    │   │   ├── fp8
    │   │   │   ├── amd
    │   │   │   │   ├── hip_float8.h
    │   │   │   │   ├── hip_float8_impl.h
    │   │   │   │   └── quant_utils.cuh
    │   │   │   ├── common.cu
    │   │   │   └── nvidia
    │   │   │   │   └── quant_utils.cuh
    │   │   ├── gptq
    │   │   │   ├── compat.cuh
    │   │   │   ├── matrix_view.cuh
    │   │   │   ├── q_gemm.cu
    │   │   │   ├── qdq_2.cuh
    │   │   │   ├── qdq_3.cuh
    │   │   │   ├── qdq_4.cuh
    │   │   │   ├── qdq_8.cuh
    │   │   │   └── qdq_util.cuh
    │   │   ├── gptq_marlin
    │   │   │   ├── gptq_marlin.cu
    │   │   │   ├── gptq_marlin.cuh
    │   │   │   └── gptq_marlin_repack.cu
    │   │   ├── marlin
    │   │   │   ├── LICENSE
    │   │   │   └── marlin_cuda_kernel.cu
    │   │   └── squeezellm
    │   │   │   └── quant_cuda_kernel.cu
    │   └── reduction_utils.cuh
    ├── examples
    │   ├── api_client.py
    │   ├── aqlm_example.py
    │   ├── fp8
    │   │   ├── README.md
    │   │   ├── extract_scales.py
    │   │   └── quantizer
    │   │   │   ├── README.md
    │   │   │   └── quantize.py
    │   ├── gradio_openai_chatbot_webserver.py
    │   ├── gradio_webserver.py
    │   ├── llava_example.py
    │   ├── llm_engine_example.py
    │   ├── logging_configuration.md
    │   ├── multilora_inference.py
    │   ├── offline_inference.py
    │   ├── offline_inference_arctic.py
    │   ├── offline_inference_distributed.py
    │   ├── offline_inference_embedding.py
    │   ├── offline_inference_neuron.py
    │   ├── offline_inference_openai.md
    │   ├── offline_inference_with_prefix.py
    │   ├── openai_chat_completion_client.py
    │   ├── openai_completion_client.py
    │   ├── openai_embedding_client.py
    │   ├── openi_example_batch.jsonl
    │   ├── production_monitoring
    │   │   ├── README.md
    │   │   ├── docker-compose.yaml
    │   │   ├── grafana.json
    │   │   └── prometheus.yaml
    │   ├── save_sharded_state.py
    │   ├── template_alpaca.jinja
    │   ├── template_baichuan.jinja
    │   ├── template_chatglm.jinja
    │   ├── template_chatglm2.jinja
    │   ├── template_chatml.jinja
    │   ├── template_falcon.jinja
    │   ├── template_falcon_180b.jinja
    │   ├── template_inkbot.jinja
    │   └── tensorize_vllm_model.py
    ├── format.sh
    ├── pyproject.toml
    ├── requirements-build.txt
    ├── requirements-common.txt
    ├── requirements-cuda.txt
    ├── rocm_patch
    │   └── rocm_bf16.patch
    ├── setup.py
    ├── tests
    │   ├── __init__.py
    │   ├── async_engine
    │   │   ├── __init__.py
    │   │   ├── api_server_async_engine.py
    │   │   ├── test_api_server.py
    │   │   ├── test_async_llm_engine.py
    │   │   ├── test_chat_template.py
    │   │   ├── test_merge_async_iterators.py
    │   │   ├── test_openapi_server_ray.py
    │   │   └── test_request_tracker.py
    │   ├── basic_correctness
    │   │   ├── __init__.py
    │   │   ├── test_basic_correctness.py
    │   │   ├── test_chunked_prefill.py
    │   │   └── test_preemption.py
    │   ├── conftest.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── block
    │   │   │   ├── __init__.py
    │   │   │   ├── conftest.py
    │   │   │   ├── e2e
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── conftest.py
    │   │   │   │   └── test_correctness.py
    │   │   │   ├── test_block_manager_v2.py
    │   │   │   ├── test_block_table.py
    │   │   │   ├── test_common.py
    │   │   │   ├── test_cpu_gpu_block_allocator.py
    │   │   │   ├── test_naive_block.py
    │   │   │   └── test_prefix_caching_block.py
    │   │   ├── test_block_manager.py
    │   │   ├── test_chunked_prefill_scheduler.py
    │   │   ├── test_scheduler.py
    │   │   └── utils.py
    │   ├── distributed
    │   │   ├── __init__.py
    │   │   ├── test_basic_distributed_correctness.py
    │   │   ├── test_chunked_prefill_distributed.py
    │   │   ├── test_comm_ops.py
    │   │   ├── test_custom_all_reduce.py
    │   │   ├── test_pynccl.py
    │   │   └── test_pynccl_library.py
    │   ├── engine
    │   │   ├── __init__.py
    │   │   ├── output_processor
    │   │   │   ├── __init__.py
    │   │   │   └── test_multi_step.py
    │   │   ├── test_computed_prefix_blocks.py
    │   │   ├── test_detokenization.py
    │   │   ├── test_multiproc_workers.py
    │   │   ├── test_skip_tokenizer_init.py
    │   │   ├── test_stop_reason.py
    │   │   └── test_stop_strings.py
    │   ├── entrypoints
    │   │   ├── __init__.py
    │   │   ├── openai
    │   │   │   └── test_serving_chat.py
    │   │   ├── test_guided_processors.py
    │   │   ├── test_llm_generate.py
    │   │   ├── test_openai_run_batch.py
    │   │   ├── test_openai_server.py
    │   │   └── test_server_oot_registration.py
    │   ├── fp8_kv
    │   │   ├── llama2-70b-fp8-kv
    │   │   │   └── kv_cache_scales.json
    │   │   └── llama2-7b-fp8-kv
    │   │   │   └── kv_cache_scales.json
    │   ├── kernels
    │   │   ├── __init__.py
    │   │   ├── allclose_default.py
    │   │   ├── conftest.py
    │   │   ├── test_activation.py
    │   │   ├── test_attention.py
    │   │   ├── test_cache.py
    │   │   ├── test_layernorm.py
    │   │   ├── test_moe.py
    │   │   ├── test_pos_encoding.py
    │   │   ├── test_prefix_prefill.py
    │   │   ├── test_rand.py
    │   │   └── test_sampler.py
    │   ├── lora
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── test_baichuan.py
    │   │   ├── test_chatglm3.py
    │   │   ├── test_gemma.py
    │   │   ├── test_layer_variation.py
    │   │   ├── test_layers.py
    │   │   ├── test_llama.py
    │   │   ├── test_lora.py
    │   │   ├── test_lora_checkpoints.py
    │   │   ├── test_lora_manager.py
    │   │   ├── test_mixtral.py
    │   │   ├── test_punica.py
    │   │   ├── test_quant_model.py
    │   │   ├── test_tokenizer_group.py
    │   │   ├── test_utils.py
    │   │   ├── test_worker.py
    │   │   └── utils.py
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   └── test_metrics.py
    │   ├── model_executor
    │   │   ├── __init__.py
    │   │   └── weight_utils.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── test_aqlm.py
    │   │   ├── test_big_models.py
    │   │   ├── test_embedding.py
    │   │   ├── test_fp8.py
    │   │   ├── test_gptq_marlin.py
    │   │   ├── test_llava.py
    │   │   ├── test_marlin.py
    │   │   ├── test_mistral.py
    │   │   ├── test_models.py
    │   │   ├── test_oot_registration.py
    │   │   └── utils.py
    │   ├── prefix_caching
    │   │   ├── __init__.py
    │   │   └── test_prefix_caching.py
    │   ├── prompts
    │   │   ├── example.txt
    │   │   └── summary.txt
    │   ├── quantization
    │   │   ├── __init__.py
    │   │   ├── test_configs.py
    │   │   └── test_fp8.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── test_beam_search.py
    │   │   ├── test_ignore_eos.py
    │   │   ├── test_logits_processor.py
    │   │   ├── test_logprobs.py
    │   │   ├── test_ranks.py
    │   │   ├── test_rejection_sampler.py
    │   │   ├── test_sampler.py
    │   │   └── test_seeded_generate.py
    │   ├── spec_decode
    │   │   ├── __init__.py
    │   │   ├── e2e
    │   │   │   ├── __init__.py
    │   │   │   ├── conftest.py
    │   │   │   ├── test_compatibility.py
    │   │   │   ├── test_integration.py
    │   │   │   ├── test_integration_dist.py
    │   │   │   ├── test_logprobs.py
    │   │   │   ├── test_multistep_correctness.py
    │   │   │   └── test_ngram_correctness.py
    │   │   ├── test_batch_expansion.py
    │   │   ├── test_dynamic_spec_decode.py
    │   │   ├── test_metrics.py
    │   │   ├── test_multi_step_worker.py
    │   │   ├── test_ngram_worker.py
    │   │   ├── test_spec_decode_worker.py
    │   │   ├── test_utils.py
    │   │   └── utils.py
    │   ├── tensorizer_loader
    │   │   ├── __init__.py
    │   │   └── test_tensorizer.py
    │   ├── test_cache_block_hashing.py
    │   ├── test_config.py
    │   ├── test_logger.py
    │   ├── test_logits_processor.py
    │   ├── test_regression.py
    │   ├── test_sampling_params.py
    │   ├── test_sequence.py
    │   ├── test_sharded_state_loader.py
    │   ├── tokenization
    │   │   ├── __init__.py
    │   │   ├── test_cached_tokenizer.py
    │   │   ├── test_detokenize.py
    │   │   ├── test_tokenizer.py
    │   │   └── test_tokenizer_group.py
    │   ├── utils.py
    │   └── worker
    │   │   ├── __init__.py
    │   │   ├── test_model_runner.py
    │   │   └── test_swap.py
    ├── vllm
    │   ├── __init__.py
    │   ├── _custom_ops.py
    │   ├── attention
    │   │   ├── __init__.py
    │   │   ├── backends
    │   │   │   ├── __init__.py
    │   │   │   ├── abstract.py
    │   │   │   ├── flash_attn.py
    │   │   │   ├── flashinfer.py
    │   │   │   ├── rocm_flash_attn.py
    │   │   │   ├── torch_sdpa.py
    │   │   │   └── xformers.py
    │   │   ├── layer.py
    │   │   ├── ops
    │   │   │   ├── __init__.py
    │   │   │   ├── paged_attn.py
    │   │   │   ├── prefix_prefill.py
    │   │   │   └── triton_flash_attention.py
    │   │   └── selector.py
    │   ├── block.py
    │   ├── config.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── block
    │   │   │   ├── __init__.py
    │   │   │   ├── block_table.py
    │   │   │   ├── common.py
    │   │   │   ├── cpu_gpu_block_allocator.py
    │   │   │   ├── interfaces.py
    │   │   │   ├── naive_block.py
    │   │   │   └── prefix_caching_block.py
    │   │   ├── block_manager_v1.py
    │   │   ├── block_manager_v2.py
    │   │   ├── embedding_model_block_manager.py
    │   │   ├── evictor_v1.py
    │   │   ├── evictor_v2.py
    │   │   ├── interfaces.py
    │   │   ├── policy.py
    │   │   └── scheduler.py
    │   ├── distributed
    │   │   ├── __init__.py
    │   │   ├── communication_op.py
    │   │   ├── device_communicators
    │   │   │   ├── __init__.py
    │   │   │   ├── custom_all_reduce.py
    │   │   │   ├── pynccl.py
    │   │   │   └── pynccl_wrapper.py
    │   │   ├── parallel_state.py
    │   │   └── utils.py
    │   ├── engine
    │   │   ├── __init__.py
    │   │   ├── arg_utils.py
    │   │   ├── async_llm_engine.py
    │   │   ├── llm_engine.py
    │   │   ├── metrics.py
    │   │   └── output_processor
    │   │   │   ├── __init__.py
    │   │   │   ├── interfaces.py
    │   │   │   ├── multi_step.py
    │   │   │   ├── single_step.py
    │   │   │   ├── stop_checker.py
    │   │   │   └── util.py
    │   ├── entrypoints
    │   │   ├── __init__.py
    │   │   ├── api_server.py
    │   │   ├── llm.py
    │   │   └── openai
    │   │   │   ├── __init__.py
    │   │   │   ├── api_server.py
    │   │   │   ├── cli_args.py
    │   │   │   ├── protocol.py
    │   │   │   ├── run_batch.py
    │   │   │   ├── serving_chat.py
    │   │   │   ├── serving_completion.py
    │   │   │   ├── serving_embedding.py
    │   │   │   └── serving_engine.py
    │   ├── envs.py
    │   ├── executor
    │   │   ├── __init__.py
    │   │   ├── cpu_executor.py
    │   │   ├── distributed_gpu_executor.py
    │   │   ├── executor_base.py
    │   │   ├── gpu_executor.py
    │   │   ├── multiproc_gpu_executor.py
    │   │   ├── multiproc_worker_utils.py
    │   │   ├── neuron_executor.py
    │   │   ├── ray_gpu_executor.py
    │   │   └── ray_utils.py
    │   ├── logger.py
    │   ├── logging
    │   │   ├── __init__.py
    │   │   └── formatter.py
    │   ├── lora
    │   │   ├── __init__.py
    │   │   ├── fully_sharded_layers.py
    │   │   ├── layers.py
    │   │   ├── lora.py
    │   │   ├── models.py
    │   │   ├── punica.py
    │   │   ├── request.py
    │   │   ├── utils.py
    │   │   └── worker_manager.py
    │   ├── model_executor
    │   │   ├── __init__.py
    │   │   ├── guided_decoding
    │   │   │   ├── __init__.py
    │   │   │   ├── lm_format_enforcer_decoding.py
    │   │   │   ├── outlines_decoding.py
    │   │   │   └── outlines_logits_processors.py
    │   │   ├── layers
    │   │   │   ├── __init__.py
    │   │   │   ├── activation.py
    │   │   │   ├── fused_moe
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── configs
    │   │   │   │   │   ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json
    │   │   │   │   │   ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json
    │   │   │   │   │   ├── E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json
    │   │   │   │   │   ├── E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json
    │   │   │   │   │   ├── E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json
    │   │   │   │   │   ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json
    │   │   │   │   │   ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
    │   │   │   │   │   ├── E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json
    │   │   │   │   │   ├── E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json
    │   │   │   │   │   ├── E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json
    │   │   │   │   │   ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json
    │   │   │   │   │   ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
    │   │   │   │   │   ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json
    │   │   │   │   │   ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json
    │   │   │   │   │   ├── E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json
    │   │   │   │   │   ├── E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json
    │   │   │   │   │   ├── E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
    │   │   │   │   │   ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json
    │   │   │   │   │   ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json
    │   │   │   │   │   └── README
    │   │   │   │   └── fused_moe.py
    │   │   │   ├── layernorm.py
    │   │   │   ├── linear.py
    │   │   │   ├── logits_processor.py
    │   │   │   ├── ops
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── rand.py
    │   │   │   │   └── sample.py
    │   │   │   ├── pooler.py
    │   │   │   ├── quantization
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── aqlm.py
    │   │   │   │   ├── awq.py
    │   │   │   │   ├── base_config.py
    │   │   │   │   ├── deepspeedfp.py
    │   │   │   │   ├── fp8.py
    │   │   │   │   ├── gptq.py
    │   │   │   │   ├── gptq_marlin.py
    │   │   │   │   ├── marlin.py
    │   │   │   │   ├── schema.py
    │   │   │   │   └── squeezellm.py
    │   │   │   ├── rejection_sampler.py
    │   │   │   ├── rotary_embedding.py
    │   │   │   ├── sampler.py
    │   │   │   └── vocab_parallel_embedding.py
    │   │   ├── model_loader
    │   │   │   ├── __init__.py
    │   │   │   ├── loader.py
    │   │   │   ├── neuron.py
    │   │   │   ├── tensorizer.py
    │   │   │   ├── utils.py
    │   │   │   └── weight_utils.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── arctic.py
    │   │   │   ├── baichuan.py
    │   │   │   ├── bloom.py
    │   │   │   ├── chatglm.py
    │   │   │   ├── commandr.py
    │   │   │   ├── dbrx.py
    │   │   │   ├── decilm.py
    │   │   │   ├── deepseek.py
    │   │   │   ├── falcon.py
    │   │   │   ├── gemma.py
    │   │   │   ├── gpt2.py
    │   │   │   ├── gpt_bigcode.py
    │   │   │   ├── gpt_j.py
    │   │   │   ├── gpt_neox.py
    │   │   │   ├── internlm2.py
    │   │   │   ├── jais.py
    │   │   │   ├── llama.py
    │   │   │   ├── llama_embedding.py
    │   │   │   ├── llava.py
    │   │   │   ├── minicpm.py
    │   │   │   ├── mixtral.py
    │   │   │   ├── mixtral_quant.py
    │   │   │   ├── mpt.py
    │   │   │   ├── olmo.py
    │   │   │   ├── opt.py
    │   │   │   ├── orion.py
    │   │   │   ├── phi.py
    │   │   │   ├── qwen.py
    │   │   │   ├── qwen2.py
    │   │   │   ├── qwen2_moe.py
    │   │   │   ├── stablelm.py
    │   │   │   ├── starcoder2.py
    │   │   │   └── xverse.py
    │   │   ├── pooling_metadata.py
    │   │   ├── sampling_metadata.py
    │   │   └── utils.py
    │   ├── outputs.py
    │   ├── pooling_params.py
    │   ├── py.typed
    │   ├── sampling_params.py
    │   ├── sequence.py
    │   ├── spec_decode
    │   │   ├── __init__.py
    │   │   ├── batch_expansion.py
    │   │   ├── interfaces.py
    │   │   ├── metrics.py
    │   │   ├── multi_step_worker.py
    │   │   ├── ngram_worker.py
    │   │   ├── spec_decode_worker.py
    │   │   ├── top1_proposer.py
    │   │   └── util.py
    │   ├── transformers_utils
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── configs
    │   │   │   ├── __init__.py
    │   │   │   ├── arctic.py
    │   │   │   ├── chatglm.py
    │   │   │   ├── dbrx.py
    │   │   │   ├── falcon.py
    │   │   │   ├── jais.py
    │   │   │   └── mpt.py
    │   │   ├── detokenizer.py
    │   │   ├── tokenizer.py
    │   │   ├── tokenizer_group
    │   │   │   ├── __init__.py
    │   │   │   ├── base_tokenizer_group.py
    │   │   │   ├── ray_tokenizer_group.py
    │   │   │   └── tokenizer_group.py
    │   │   └── tokenizers
    │   │   │   ├── __init__.py
    │   │   │   └── baichuan.py
    │   ├── usage
    │   │   ├── __init__.py
    │   │   └── usage_lib.py
    │   ├── utils.py
    │   └── worker
    │   │   ├── __init__.py
    │   │   ├── cache_engine.py
    │   │   ├── cpu_model_runner.py
    │   │   ├── cpu_worker.py
    │   │   ├── embedding_model_runner.py
    │   │   ├── model_runner.py
    │   │   ├── neuron_model_runner.py
    │   │   ├── neuron_worker.py
    │   │   ├── worker.py
    │   │   └── worker_base.py
    └── vmm_allocator
    │   ├── README.md
    │   ├── __init__.py
    │   ├── radix_cache.py
    │   ├── vmm_allocator.cpp
    │   ├── vmm_allocator.h
    │   └── vmm_allocator.py
├── GMLake
    ├── README.md
    ├── docs
    │   ├── GMLake-tutorial.md
    │   └── figures
    │   │   ├── GMLake.png
    │   │   ├── batch-neox-20b.png
    │   │   ├── batch-opt-1.3b.png
    │   │   ├── batch-opt-13b.png
    │   │   ├── platforms.png
    │   │   ├── scale-neox-20b.png
    │   │   ├── scale-opt-13b.png
    │   │   ├── scale-vicuna-13b.png
    │   │   ├── stra-neox-20b.png
    │   │   ├── stra-opt-1.3b.png
    │   │   └── stra-vicuna-13b.png
    ├── include
    │   └── cuda_vmm_allocator.h
    └── src
    │   └── CUDACachingAllocator.cpp
├── LICENSE
├── MultiPath
    ├── README.md
    ├── src
    │   ├── Makefile
    │   ├── cuda.cpp
    │   ├── glake_cache.h
    │   ├── gmm_api_stats.cpp
    │   ├── gmm_api_stats.h
    │   ├── gmm_client.h
    │   ├── gmm_client_cfg.cpp
    │   ├── gmm_client_cfg.h
    │   ├── gmm_client_impl.cpp
    │   ├── gmm_common.h
    │   ├── gmm_common_impl.cpp
    │   ├── gmm_cuda_common.h
    │   ├── gmm_cuda_mem.h
    │   ├── gmm_cuda_mem_impl.cpp
    │   ├── gmm_cuda_mempool.h
    │   ├── gmm_gdr_plugin.cpp
    │   ├── gmm_gdr_plugin.h
    │   ├── gmm_host_mem.h
    │   ├── gmm_host_shm.h
    │   ├── gmm_host_shm_impl.cpp
    │   ├── gmm_mempool_impl.cpp
    │   ├── gmm_mp.h
    │   ├── gmm_multipath_impl.cu
    │   ├── gmm_queue.h
    │   ├── gmm_server.h
    │   ├── gmm_server_impl.cpp
    │   ├── gmm_shm_nv_impl.cpp
    │   ├── gmm_singleton.h
    │   ├── gmm_util.h
    │   ├── gmm_vstore.h
    │   ├── gmm_worker.h
    │   └── gmm_worker_impl.cpp
    └── test
    │   ├── Makefile
    │   ├── cuda_check.h
    │   ├── gmm_bench.cu
    │   └── gmm_test.cu
├── README.md
└── docs
    ├── figures
        ├── cpu_gpu_bw.png
        ├── dedup.png
        ├── dedup1.png
        ├── glake_arch_cn.png
        ├── glake_arch_en.png
        ├── gmlake-wechat.jpg
        ├── gmlake-wechat.png
        ├── gmlake.png
        └── multi_path_view.png
    ├── readme_cn.md
    └── 蚂蚁-GLake显存与传输优化-AIConf-V1.0.pdf


/GLakeServe/CMakeLists.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/CMakeLists.txt


--------------------------------------------------------------------------------
/GLakeServe/CONTRIBUTING.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/CONTRIBUTING.md


--------------------------------------------------------------------------------
/GLakeServe/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/Dockerfile


--------------------------------------------------------------------------------
/GLakeServe/Dockerfile.cpu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/Dockerfile.cpu


--------------------------------------------------------------------------------
/GLakeServe/Dockerfile.neuron:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/Dockerfile.neuron


--------------------------------------------------------------------------------
/GLakeServe/Dockerfile.rocm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/Dockerfile.rocm


--------------------------------------------------------------------------------
/GLakeServe/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/LICENSE


--------------------------------------------------------------------------------
/GLakeServe/MANIFEST.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/MANIFEST.in


--------------------------------------------------------------------------------
/GLakeServe/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/README.md


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/README.md


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/backend_request_func.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/backend_request_func.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/benchmark_latency.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/benchmark_latency.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/benchmark_prefix_caching.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/benchmark_prefix_caching.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/benchmark_serving.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/benchmark_serving.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/benchmark_throughput.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/benchmark_throughput.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/jsonl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/jsonl.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/kernels/benchmark_aqlm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/kernels/benchmark_aqlm.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/kernels/benchmark_mixtral_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/kernels/benchmark_mixtral_moe.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/kernels/benchmark_paged_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/kernels/benchmark_paged_attention.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/kernels/benchmark_rope.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/kernels/benchmark_rope.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/launch_tgi_server.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/launch_tgi_server.sh


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/overheads/benchmark_hashing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/overheads/benchmark_hashing.py


--------------------------------------------------------------------------------
/GLakeServe/benchmarks/sonnet.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/benchmarks/sonnet.txt


--------------------------------------------------------------------------------
/GLakeServe/cmake/cpu_extension.cmake:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/cmake/cpu_extension.cmake


--------------------------------------------------------------------------------
/GLakeServe/cmake/hipify.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/cmake/hipify.py


--------------------------------------------------------------------------------
/GLakeServe/cmake/utils.cmake:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/cmake/utils.cmake


--------------------------------------------------------------------------------
/GLakeServe/collect_env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/collect_env.py


--------------------------------------------------------------------------------
/GLakeServe/csrc/activation_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/activation_kernels.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/attention/attention_dtypes.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/attention/attention_dtypes.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/attention/attention_generic.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/attention/attention_generic.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/attention/attention_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/attention/attention_kernels.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/attention/attention_utils.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/attention/attention_utils.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/attention/dtype_bfloat16.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/attention/dtype_bfloat16.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/attention/dtype_float16.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/attention/dtype_float16.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/attention/dtype_float32.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/attention/dtype_float32.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/attention/dtype_fp8.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/attention/dtype_fp8.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/cache.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cache.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/cache_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cache_kernels.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/cpu/activation.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cpu/activation.cpp


--------------------------------------------------------------------------------
/GLakeServe/csrc/cpu/attention.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cpu/attention.cpp


--------------------------------------------------------------------------------
/GLakeServe/csrc/cpu/cache.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cpu/cache.cpp


--------------------------------------------------------------------------------
/GLakeServe/csrc/cpu/cpu_types.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cpu/cpu_types.hpp


--------------------------------------------------------------------------------
/GLakeServe/csrc/cpu/layernorm.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cpu/layernorm.cpp


--------------------------------------------------------------------------------
/GLakeServe/csrc/cpu/pos_encoding.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cpu/pos_encoding.cpp


--------------------------------------------------------------------------------
/GLakeServe/csrc/cpu/pybind.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cpu/pybind.cpp


--------------------------------------------------------------------------------
/GLakeServe/csrc/cuda_compat.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cuda_compat.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/cuda_utils.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cuda_utils.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/cuda_utils_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/cuda_utils_kernels.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/custom_all_reduce.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/custom_all_reduce.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/custom_all_reduce.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/custom_all_reduce.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/custom_all_reduce_test.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/custom_all_reduce_test.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/dispatch_utils.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/dispatch_utils.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/layernorm_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/layernorm_kernels.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/moe/moe_ops.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/moe/moe_ops.cpp


--------------------------------------------------------------------------------
/GLakeServe/csrc/moe/moe_ops.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/moe/moe_ops.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/moe/topk_softmax_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/moe/topk_softmax_kernels.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/moe_align_block_size_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/moe_align_block_size_kernels.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/ops.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/ops.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/pos_encoding_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/pos_encoding_kernels.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/LICENSE


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/bgmv/bgmv_bf16_fp32_bf16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/bgmv/bgmv_bf16_fp32_bf16.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/bgmv/bgmv_config.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/bgmv/bgmv_config.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/bgmv/bgmv_fp16_fp16_fp16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/bgmv/bgmv_fp16_fp16_fp16.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/bgmv/bgmv_fp16_fp32_fp16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/bgmv/bgmv_fp16_fp32_fp16.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/bgmv/bgmv_fp32_bf16_bf16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/bgmv/bgmv_fp32_bf16_bf16.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/bgmv/bgmv_fp32_fp16_fp16.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/bgmv/bgmv_fp32_fp16_fp16.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/bgmv/bgmv_impl.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/bgmv/bgmv_impl.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/bgmv/generator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/bgmv/generator.py


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/bgmv/vec_dtypes.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/bgmv/vec_dtypes.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/punica_ops.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/punica_ops.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/punica_ops.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/punica_ops.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/punica_pybind.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/punica_pybind.cpp


--------------------------------------------------------------------------------
/GLakeServe/csrc/punica/type_convert.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/punica/type_convert.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/pybind.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/pybind.cpp


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/aqlm/gemm_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/aqlm/gemm_kernels.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/awq/dequantize.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/awq/dequantize.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/awq/gemm_kernels.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/awq/gemm_kernels.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/fp8/amd/hip_float8.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/fp8/amd/hip_float8.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/fp8/amd/hip_float8_impl.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/fp8/amd/hip_float8_impl.h


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/fp8/amd/quant_utils.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/fp8/amd/quant_utils.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/fp8/common.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/fp8/common.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/fp8/nvidia/quant_utils.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/fp8/nvidia/quant_utils.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq/compat.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq/compat.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq/matrix_view.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq/matrix_view.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq/q_gemm.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq/q_gemm.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq/qdq_2.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq/qdq_2.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq/qdq_3.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq/qdq_3.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq/qdq_4.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq/qdq_4.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq/qdq_8.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq/qdq_8.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq/qdq_util.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq/qdq_util.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq_marlin/gptq_marlin.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq_marlin/gptq_marlin.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq_marlin/gptq_marlin.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq_marlin/gptq_marlin.cuh


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/gptq_marlin/gptq_marlin_repack.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/gptq_marlin/gptq_marlin_repack.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/marlin/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/marlin/LICENSE


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/marlin/marlin_cuda_kernel.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/marlin/marlin_cuda_kernel.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/quantization/squeezellm/quant_cuda_kernel.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/quantization/squeezellm/quant_cuda_kernel.cu


--------------------------------------------------------------------------------
/GLakeServe/csrc/reduction_utils.cuh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/csrc/reduction_utils.cuh


--------------------------------------------------------------------------------
/GLakeServe/examples/api_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/api_client.py


--------------------------------------------------------------------------------
/GLakeServe/examples/aqlm_example.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/aqlm_example.py


--------------------------------------------------------------------------------
/GLakeServe/examples/fp8/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/fp8/README.md


--------------------------------------------------------------------------------
/GLakeServe/examples/fp8/extract_scales.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/fp8/extract_scales.py


--------------------------------------------------------------------------------
/GLakeServe/examples/fp8/quantizer/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/fp8/quantizer/README.md


--------------------------------------------------------------------------------
/GLakeServe/examples/fp8/quantizer/quantize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/fp8/quantizer/quantize.py


--------------------------------------------------------------------------------
/GLakeServe/examples/gradio_openai_chatbot_webserver.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/gradio_openai_chatbot_webserver.py


--------------------------------------------------------------------------------
/GLakeServe/examples/gradio_webserver.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/gradio_webserver.py


--------------------------------------------------------------------------------
/GLakeServe/examples/llava_example.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/llava_example.py


--------------------------------------------------------------------------------
/GLakeServe/examples/llm_engine_example.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/llm_engine_example.py


--------------------------------------------------------------------------------
/GLakeServe/examples/logging_configuration.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/logging_configuration.md


--------------------------------------------------------------------------------
/GLakeServe/examples/multilora_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/multilora_inference.py


--------------------------------------------------------------------------------
/GLakeServe/examples/offline_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/offline_inference.py


--------------------------------------------------------------------------------
/GLakeServe/examples/offline_inference_arctic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/offline_inference_arctic.py


--------------------------------------------------------------------------------
/GLakeServe/examples/offline_inference_distributed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/offline_inference_distributed.py


--------------------------------------------------------------------------------
/GLakeServe/examples/offline_inference_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/offline_inference_embedding.py


--------------------------------------------------------------------------------
/GLakeServe/examples/offline_inference_neuron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/offline_inference_neuron.py


--------------------------------------------------------------------------------
/GLakeServe/examples/offline_inference_openai.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/offline_inference_openai.md


--------------------------------------------------------------------------------
/GLakeServe/examples/offline_inference_with_prefix.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/offline_inference_with_prefix.py


--------------------------------------------------------------------------------
/GLakeServe/examples/openai_chat_completion_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/openai_chat_completion_client.py


--------------------------------------------------------------------------------
/GLakeServe/examples/openai_completion_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/openai_completion_client.py


--------------------------------------------------------------------------------
/GLakeServe/examples/openai_embedding_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/openai_embedding_client.py


--------------------------------------------------------------------------------
/GLakeServe/examples/openi_example_batch.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/openi_example_batch.jsonl


--------------------------------------------------------------------------------
/GLakeServe/examples/production_monitoring/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/production_monitoring/README.md


--------------------------------------------------------------------------------
/GLakeServe/examples/production_monitoring/docker-compose.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/production_monitoring/docker-compose.yaml


--------------------------------------------------------------------------------
/GLakeServe/examples/production_monitoring/grafana.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/production_monitoring/grafana.json


--------------------------------------------------------------------------------
/GLakeServe/examples/production_monitoring/prometheus.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/production_monitoring/prometheus.yaml


--------------------------------------------------------------------------------
/GLakeServe/examples/save_sharded_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/save_sharded_state.py


--------------------------------------------------------------------------------
/GLakeServe/examples/template_alpaca.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/template_alpaca.jinja


--------------------------------------------------------------------------------
/GLakeServe/examples/template_baichuan.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/template_baichuan.jinja


--------------------------------------------------------------------------------
/GLakeServe/examples/template_chatglm.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/template_chatglm.jinja


--------------------------------------------------------------------------------
/GLakeServe/examples/template_chatglm2.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/template_chatglm2.jinja


--------------------------------------------------------------------------------
/GLakeServe/examples/template_chatml.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/template_chatml.jinja


--------------------------------------------------------------------------------
/GLakeServe/examples/template_falcon.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/template_falcon.jinja


--------------------------------------------------------------------------------
/GLakeServe/examples/template_falcon_180b.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/template_falcon_180b.jinja


--------------------------------------------------------------------------------
/GLakeServe/examples/template_inkbot.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/template_inkbot.jinja


--------------------------------------------------------------------------------
/GLakeServe/examples/tensorize_vllm_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/examples/tensorize_vllm_model.py


--------------------------------------------------------------------------------
/GLakeServe/format.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/format.sh


--------------------------------------------------------------------------------
/GLakeServe/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/pyproject.toml


--------------------------------------------------------------------------------
/GLakeServe/requirements-build.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/requirements-build.txt


--------------------------------------------------------------------------------
/GLakeServe/requirements-common.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/requirements-common.txt


--------------------------------------------------------------------------------
/GLakeServe/requirements-cuda.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/requirements-cuda.txt


--------------------------------------------------------------------------------
/GLakeServe/rocm_patch/rocm_bf16.patch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/rocm_patch/rocm_bf16.patch


--------------------------------------------------------------------------------
/GLakeServe/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/setup.py


--------------------------------------------------------------------------------
/GLakeServe/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/async_engine/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/async_engine/api_server_async_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/async_engine/api_server_async_engine.py


--------------------------------------------------------------------------------
/GLakeServe/tests/async_engine/test_api_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/async_engine/test_api_server.py


--------------------------------------------------------------------------------
/GLakeServe/tests/async_engine/test_async_llm_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/async_engine/test_async_llm_engine.py


--------------------------------------------------------------------------------
/GLakeServe/tests/async_engine/test_chat_template.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/async_engine/test_chat_template.py


--------------------------------------------------------------------------------
/GLakeServe/tests/async_engine/test_merge_async_iterators.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/async_engine/test_merge_async_iterators.py


--------------------------------------------------------------------------------
/GLakeServe/tests/async_engine/test_openapi_server_ray.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/async_engine/test_openapi_server_ray.py


--------------------------------------------------------------------------------
/GLakeServe/tests/async_engine/test_request_tracker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/async_engine/test_request_tracker.py


--------------------------------------------------------------------------------
/GLakeServe/tests/basic_correctness/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/basic_correctness/test_basic_correctness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/basic_correctness/test_basic_correctness.py


--------------------------------------------------------------------------------
/GLakeServe/tests/basic_correctness/test_chunked_prefill.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/basic_correctness/test_chunked_prefill.py


--------------------------------------------------------------------------------
/GLakeServe/tests/basic_correctness/test_preemption.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/basic_correctness/test_preemption.py


--------------------------------------------------------------------------------
/GLakeServe/tests/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/conftest.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/block/conftest.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/e2e/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/e2e/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/block/e2e/conftest.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/e2e/test_correctness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/block/e2e/test_correctness.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/test_block_manager_v2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/block/test_block_manager_v2.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/test_block_table.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/block/test_block_table.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/test_common.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/block/test_common.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/test_cpu_gpu_block_allocator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/block/test_cpu_gpu_block_allocator.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/test_naive_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/block/test_naive_block.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/block/test_prefix_caching_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/block/test_prefix_caching_block.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/test_block_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/test_block_manager.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/test_chunked_prefill_scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/test_chunked_prefill_scheduler.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/test_scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/test_scheduler.py


--------------------------------------------------------------------------------
/GLakeServe/tests/core/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/core/utils.py


--------------------------------------------------------------------------------
/GLakeServe/tests/distributed/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/distributed/test_basic_distributed_correctness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/distributed/test_basic_distributed_correctness.py


--------------------------------------------------------------------------------
/GLakeServe/tests/distributed/test_chunked_prefill_distributed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/distributed/test_chunked_prefill_distributed.py


--------------------------------------------------------------------------------
/GLakeServe/tests/distributed/test_comm_ops.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/distributed/test_comm_ops.py


--------------------------------------------------------------------------------
/GLakeServe/tests/distributed/test_custom_all_reduce.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/distributed/test_custom_all_reduce.py


--------------------------------------------------------------------------------
/GLakeServe/tests/distributed/test_pynccl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/distributed/test_pynccl.py


--------------------------------------------------------------------------------
/GLakeServe/tests/distributed/test_pynccl_library.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/distributed/test_pynccl_library.py


--------------------------------------------------------------------------------
/GLakeServe/tests/engine/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/engine/output_processor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/engine/output_processor/test_multi_step.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/engine/output_processor/test_multi_step.py


--------------------------------------------------------------------------------
/GLakeServe/tests/engine/test_computed_prefix_blocks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/engine/test_computed_prefix_blocks.py


--------------------------------------------------------------------------------
/GLakeServe/tests/engine/test_detokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/engine/test_detokenization.py


--------------------------------------------------------------------------------
/GLakeServe/tests/engine/test_multiproc_workers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/engine/test_multiproc_workers.py


--------------------------------------------------------------------------------
/GLakeServe/tests/engine/test_skip_tokenizer_init.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/engine/test_skip_tokenizer_init.py


--------------------------------------------------------------------------------
/GLakeServe/tests/engine/test_stop_reason.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/engine/test_stop_reason.py


--------------------------------------------------------------------------------
/GLakeServe/tests/engine/test_stop_strings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/engine/test_stop_strings.py


--------------------------------------------------------------------------------
/GLakeServe/tests/entrypoints/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/entrypoints/openai/test_serving_chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/entrypoints/openai/test_serving_chat.py


--------------------------------------------------------------------------------
/GLakeServe/tests/entrypoints/test_guided_processors.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/entrypoints/test_guided_processors.py


--------------------------------------------------------------------------------
/GLakeServe/tests/entrypoints/test_llm_generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/entrypoints/test_llm_generate.py


--------------------------------------------------------------------------------
/GLakeServe/tests/entrypoints/test_openai_run_batch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/entrypoints/test_openai_run_batch.py


--------------------------------------------------------------------------------
/GLakeServe/tests/entrypoints/test_openai_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/entrypoints/test_openai_server.py


--------------------------------------------------------------------------------
/GLakeServe/tests/entrypoints/test_server_oot_registration.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/entrypoints/test_server_oot_registration.py


--------------------------------------------------------------------------------
/GLakeServe/tests/fp8_kv/llama2-70b-fp8-kv/kv_cache_scales.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/fp8_kv/llama2-70b-fp8-kv/kv_cache_scales.json


--------------------------------------------------------------------------------
/GLakeServe/tests/fp8_kv/llama2-7b-fp8-kv/kv_cache_scales.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/fp8_kv/llama2-7b-fp8-kv/kv_cache_scales.json


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/allclose_default.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/allclose_default.py


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/conftest.py


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/test_activation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/test_activation.py


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/test_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/test_attention.py


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/test_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/test_cache.py


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/test_layernorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/test_layernorm.py


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/test_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/test_moe.py


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/test_pos_encoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/test_pos_encoding.py


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/test_prefix_prefill.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/test_prefix_prefill.py


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/test_rand.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/test_rand.py


--------------------------------------------------------------------------------
/GLakeServe/tests/kernels/test_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/kernels/test_sampler.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/conftest.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_baichuan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_baichuan.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_chatglm3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_chatglm3.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_gemma.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_gemma.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_layer_variation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_layer_variation.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_layers.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_llama.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_lora.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_lora_checkpoints.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_lora_checkpoints.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_lora_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_lora_manager.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_mixtral.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_mixtral.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_punica.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_punica.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_quant_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_quant_model.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_tokenizer_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_tokenizer_group.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_utils.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/test_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/test_worker.py


--------------------------------------------------------------------------------
/GLakeServe/tests/lora/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/lora/utils.py


--------------------------------------------------------------------------------
/GLakeServe/tests/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/metrics/test_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/metrics/test_metrics.py


--------------------------------------------------------------------------------
/GLakeServe/tests/model_executor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/model_executor/weight_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/model_executor/weight_utils.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/models/test_aqlm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/test_aqlm.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/test_big_models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/test_big_models.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/test_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/test_embedding.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/test_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/test_fp8.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/test_gptq_marlin.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/test_gptq_marlin.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/test_llava.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/test_llava.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/test_marlin.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/test_marlin.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/test_mistral.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/test_mistral.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/test_models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/test_models.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/test_oot_registration.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/test_oot_registration.py


--------------------------------------------------------------------------------
/GLakeServe/tests/models/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/models/utils.py


--------------------------------------------------------------------------------
/GLakeServe/tests/prefix_caching/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/prefix_caching/test_prefix_caching.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/prefix_caching/test_prefix_caching.py


--------------------------------------------------------------------------------
/GLakeServe/tests/prompts/example.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/prompts/example.txt


--------------------------------------------------------------------------------
/GLakeServe/tests/prompts/summary.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/prompts/summary.txt


--------------------------------------------------------------------------------
/GLakeServe/tests/quantization/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/quantization/test_configs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/quantization/test_configs.py


--------------------------------------------------------------------------------
/GLakeServe/tests/quantization/test_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/quantization/test_fp8.py


--------------------------------------------------------------------------------
/GLakeServe/tests/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/samplers/test_beam_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/samplers/test_beam_search.py


--------------------------------------------------------------------------------
/GLakeServe/tests/samplers/test_ignore_eos.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/samplers/test_ignore_eos.py


--------------------------------------------------------------------------------
/GLakeServe/tests/samplers/test_logits_processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/samplers/test_logits_processor.py


--------------------------------------------------------------------------------
/GLakeServe/tests/samplers/test_logprobs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/samplers/test_logprobs.py


--------------------------------------------------------------------------------
/GLakeServe/tests/samplers/test_ranks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/samplers/test_ranks.py


--------------------------------------------------------------------------------
/GLakeServe/tests/samplers/test_rejection_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/samplers/test_rejection_sampler.py


--------------------------------------------------------------------------------
/GLakeServe/tests/samplers/test_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/samplers/test_sampler.py


--------------------------------------------------------------------------------
/GLakeServe/tests/samplers/test_seeded_generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/samplers/test_seeded_generate.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/e2e/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/e2e/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/e2e/conftest.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/e2e/test_compatibility.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/e2e/test_compatibility.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/e2e/test_integration.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/e2e/test_integration.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/e2e/test_integration_dist.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/e2e/test_integration_dist.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/e2e/test_logprobs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/e2e/test_logprobs.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/e2e/test_multistep_correctness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/e2e/test_multistep_correctness.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/e2e/test_ngram_correctness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/e2e/test_ngram_correctness.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/test_batch_expansion.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/test_batch_expansion.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/test_dynamic_spec_decode.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/test_dynamic_spec_decode.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/test_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/test_metrics.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/test_multi_step_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/test_multi_step_worker.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/test_ngram_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/test_ngram_worker.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/test_spec_decode_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/test_spec_decode_worker.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/test_utils.py


--------------------------------------------------------------------------------
/GLakeServe/tests/spec_decode/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/spec_decode/utils.py


--------------------------------------------------------------------------------
/GLakeServe/tests/tensorizer_loader/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/tensorizer_loader/test_tensorizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/tensorizer_loader/test_tensorizer.py


--------------------------------------------------------------------------------
/GLakeServe/tests/test_cache_block_hashing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/test_cache_block_hashing.py


--------------------------------------------------------------------------------
/GLakeServe/tests/test_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/test_config.py


--------------------------------------------------------------------------------
/GLakeServe/tests/test_logger.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/test_logger.py


--------------------------------------------------------------------------------
/GLakeServe/tests/test_logits_processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/test_logits_processor.py


--------------------------------------------------------------------------------
/GLakeServe/tests/test_regression.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/test_regression.py


--------------------------------------------------------------------------------
/GLakeServe/tests/test_sampling_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/test_sampling_params.py


--------------------------------------------------------------------------------
/GLakeServe/tests/test_sequence.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/test_sequence.py


--------------------------------------------------------------------------------
/GLakeServe/tests/test_sharded_state_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/test_sharded_state_loader.py


--------------------------------------------------------------------------------
/GLakeServe/tests/tokenization/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/tokenization/test_cached_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/tokenization/test_cached_tokenizer.py


--------------------------------------------------------------------------------
/GLakeServe/tests/tokenization/test_detokenize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/tokenization/test_detokenize.py


--------------------------------------------------------------------------------
/GLakeServe/tests/tokenization/test_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/tokenization/test_tokenizer.py


--------------------------------------------------------------------------------
/GLakeServe/tests/tokenization/test_tokenizer_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/tokenization/test_tokenizer_group.py


--------------------------------------------------------------------------------
/GLakeServe/tests/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/utils.py


--------------------------------------------------------------------------------
/GLakeServe/tests/worker/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/tests/worker/test_model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/worker/test_model_runner.py


--------------------------------------------------------------------------------
/GLakeServe/tests/worker/test_swap.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/tests/worker/test_swap.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/_custom_ops.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/_custom_ops.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/backends/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/backends/abstract.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/backends/abstract.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/backends/flash_attn.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/backends/flash_attn.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/backends/flashinfer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/backends/flashinfer.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/backends/rocm_flash_attn.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/backends/rocm_flash_attn.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/backends/torch_sdpa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/backends/torch_sdpa.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/backends/xformers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/backends/xformers.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/layer.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/ops/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/ops/paged_attn.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/ops/paged_attn.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/ops/prefix_prefill.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/ops/prefix_prefill.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/ops/triton_flash_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/ops/triton_flash_attention.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/attention/selector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/attention/selector.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/block.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/config.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/block/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/block/block_table.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/block/block_table.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/block/common.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/block/common.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/block/cpu_gpu_block_allocator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/block/cpu_gpu_block_allocator.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/block/interfaces.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/block/interfaces.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/block/naive_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/block/naive_block.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/block/prefix_caching_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/block/prefix_caching_block.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/block_manager_v1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/block_manager_v1.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/block_manager_v2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/block_manager_v2.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/embedding_model_block_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/embedding_model_block_manager.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/evictor_v1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/evictor_v1.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/evictor_v2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/evictor_v2.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/interfaces.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/interfaces.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/policy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/policy.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/core/scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/core/scheduler.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/distributed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/distributed/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/distributed/communication_op.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/distributed/communication_op.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/distributed/device_communicators/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/distributed/device_communicators/custom_all_reduce.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/distributed/device_communicators/custom_all_reduce.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/distributed/device_communicators/pynccl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/distributed/device_communicators/pynccl.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/distributed/device_communicators/pynccl_wrapper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/distributed/device_communicators/pynccl_wrapper.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/distributed/parallel_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/distributed/parallel_state.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/distributed/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/distributed/utils.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/arg_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/engine/arg_utils.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/async_llm_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/engine/async_llm_engine.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/llm_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/engine/llm_engine.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/engine/metrics.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/output_processor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/output_processor/interfaces.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/engine/output_processor/interfaces.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/output_processor/multi_step.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/engine/output_processor/multi_step.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/output_processor/single_step.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/engine/output_processor/single_step.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/output_processor/stop_checker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/engine/output_processor/stop_checker.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/engine/output_processor/util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/engine/output_processor/util.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/api_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/entrypoints/api_server.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/llm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/entrypoints/llm.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/openai/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/openai/api_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/entrypoints/openai/api_server.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/openai/cli_args.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/entrypoints/openai/cli_args.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/openai/protocol.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/entrypoints/openai/protocol.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/openai/run_batch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/entrypoints/openai/run_batch.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/openai/serving_chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/entrypoints/openai/serving_chat.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/openai/serving_completion.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/entrypoints/openai/serving_completion.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/openai/serving_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/entrypoints/openai/serving_embedding.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/entrypoints/openai/serving_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/entrypoints/openai/serving_engine.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/envs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/envs.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/executor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/executor/cpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/executor/cpu_executor.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/executor/distributed_gpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/executor/distributed_gpu_executor.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/executor/executor_base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/executor/executor_base.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/executor/gpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/executor/gpu_executor.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/executor/multiproc_gpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/executor/multiproc_gpu_executor.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/executor/multiproc_worker_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/executor/multiproc_worker_utils.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/executor/neuron_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/executor/neuron_executor.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/executor/ray_gpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/executor/ray_gpu_executor.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/executor/ray_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/executor/ray_utils.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/logger.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/logger.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/logging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/logging/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/logging/formatter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/logging/formatter.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/lora/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/lora/fully_sharded_layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/lora/fully_sharded_layers.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/lora/layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/lora/layers.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/lora/lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/lora/lora.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/lora/models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/lora/models.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/lora/punica.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/lora/punica.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/lora/request.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/lora/request.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/lora/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/lora/utils.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/lora/worker_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/lora/worker_manager.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/guided_decoding/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/guided_decoding/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/guided_decoding/outlines_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/guided_decoding/outlines_decoding.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/guided_decoding/outlines_logits_processors.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/guided_decoding/outlines_logits_processors.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/activation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/activation.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/configs/README:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/configs/README


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/fused_moe/fused_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/fused_moe/fused_moe.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/layernorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/layernorm.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/linear.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/linear.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/logits_processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/logits_processor.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/ops/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/ops/rand.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/ops/rand.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/ops/sample.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/ops/sample.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/pooler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/pooler.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/aqlm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/aqlm.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/awq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/awq.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/base_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/base_config.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/deepspeedfp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/deepspeedfp.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/fp8.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/gptq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/gptq.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/gptq_marlin.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/gptq_marlin.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/marlin.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/marlin.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/schema.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/schema.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/quantization/squeezellm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/quantization/squeezellm.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/rejection_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/rejection_sampler.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/rotary_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/rotary_embedding.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/sampler.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/layers/vocab_parallel_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/layers/vocab_parallel_embedding.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/model_loader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/model_loader/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/model_loader/loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/model_loader/loader.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/model_loader/neuron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/model_loader/neuron.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/model_loader/tensorizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/model_loader/tensorizer.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/model_loader/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/model_loader/utils.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/model_loader/weight_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/model_loader/weight_utils.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/arctic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/arctic.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/baichuan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/baichuan.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/bloom.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/bloom.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/chatglm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/chatglm.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/commandr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/commandr.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/dbrx.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/dbrx.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/decilm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/decilm.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/deepseek.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/deepseek.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/falcon.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/falcon.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/gemma.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/gemma.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/gpt2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/gpt2.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/gpt_bigcode.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/gpt_bigcode.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/gpt_j.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/gpt_j.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/gpt_neox.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/gpt_neox.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/internlm2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/internlm2.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/jais.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/jais.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/llama.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/llama_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/llama_embedding.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/llava.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/llava.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/minicpm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/minicpm.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/mixtral.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/mixtral.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/mixtral_quant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/mixtral_quant.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/mpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/mpt.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/olmo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/olmo.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/opt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/opt.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/orion.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/orion.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/phi.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/phi.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/qwen.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/qwen.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/qwen2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/qwen2.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/qwen2_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/qwen2_moe.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/stablelm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/stablelm.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/starcoder2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/starcoder2.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/models/xverse.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/models/xverse.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/pooling_metadata.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/pooling_metadata.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/sampling_metadata.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/sampling_metadata.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/model_executor/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/model_executor/utils.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/outputs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/outputs.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/pooling_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/pooling_params.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/py.typed:
--------------------------------------------------------------------------------
1 | # Marker file for PEP 561.
2 | # The vllm package uses inline types.
3 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/sampling_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/sampling_params.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/sequence.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/sequence.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/spec_decode/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/spec_decode/batch_expansion.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/spec_decode/batch_expansion.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/spec_decode/interfaces.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/spec_decode/interfaces.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/spec_decode/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/spec_decode/metrics.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/spec_decode/multi_step_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/spec_decode/multi_step_worker.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/spec_decode/ngram_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/spec_decode/ngram_worker.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/spec_decode/spec_decode_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/spec_decode/spec_decode_worker.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/spec_decode/top1_proposer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/spec_decode/top1_proposer.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/spec_decode/util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/spec_decode/util.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/config.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/configs/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/configs/arctic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/configs/arctic.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/configs/chatglm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/configs/chatglm.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/configs/dbrx.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/configs/dbrx.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/configs/falcon.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/configs/falcon.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/configs/jais.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/configs/jais.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/configs/mpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/configs/mpt.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/detokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/detokenizer.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/tokenizer.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/tokenizer_group/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/tokenizer_group/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/tokenizer_group/tokenizer_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/tokenizer_group/tokenizer_group.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/tokenizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/tokenizers/__init__.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/transformers_utils/tokenizers/baichuan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/transformers_utils/tokenizers/baichuan.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/usage/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/usage/usage_lib.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/usage/usage_lib.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/utils.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/worker/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vllm/worker/cache_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/worker/cache_engine.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/worker/cpu_model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/worker/cpu_model_runner.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/worker/cpu_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/worker/cpu_worker.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/worker/embedding_model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/worker/embedding_model_runner.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/worker/model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/worker/model_runner.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/worker/neuron_model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/worker/neuron_model_runner.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/worker/neuron_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/worker/neuron_worker.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/worker/worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/worker/worker.py


--------------------------------------------------------------------------------
/GLakeServe/vllm/worker/worker_base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vllm/worker/worker_base.py


--------------------------------------------------------------------------------
/GLakeServe/vmm_allocator/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vmm_allocator/README.md


--------------------------------------------------------------------------------
/GLakeServe/vmm_allocator/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GLakeServe/vmm_allocator/radix_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vmm_allocator/radix_cache.py


--------------------------------------------------------------------------------
/GLakeServe/vmm_allocator/vmm_allocator.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vmm_allocator/vmm_allocator.cpp


--------------------------------------------------------------------------------
/GLakeServe/vmm_allocator/vmm_allocator.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vmm_allocator/vmm_allocator.h


--------------------------------------------------------------------------------
/GLakeServe/vmm_allocator/vmm_allocator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GLakeServe/vmm_allocator/vmm_allocator.py


--------------------------------------------------------------------------------
/GMLake/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/README.md


--------------------------------------------------------------------------------
/GMLake/docs/GMLake-tutorial.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/GMLake-tutorial.md


--------------------------------------------------------------------------------
/GMLake/docs/figures/GMLake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/GMLake.png


--------------------------------------------------------------------------------
/GMLake/docs/figures/batch-neox-20b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/batch-neox-20b.png


--------------------------------------------------------------------------------
/GMLake/docs/figures/batch-opt-1.3b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/batch-opt-1.3b.png


--------------------------------------------------------------------------------
/GMLake/docs/figures/batch-opt-13b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/batch-opt-13b.png


--------------------------------------------------------------------------------
/GMLake/docs/figures/platforms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/platforms.png


--------------------------------------------------------------------------------
/GMLake/docs/figures/scale-neox-20b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/scale-neox-20b.png


--------------------------------------------------------------------------------
/GMLake/docs/figures/scale-opt-13b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/scale-opt-13b.png


--------------------------------------------------------------------------------
/GMLake/docs/figures/scale-vicuna-13b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/scale-vicuna-13b.png


--------------------------------------------------------------------------------
/GMLake/docs/figures/stra-neox-20b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/stra-neox-20b.png


--------------------------------------------------------------------------------
/GMLake/docs/figures/stra-opt-1.3b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/stra-opt-1.3b.png


--------------------------------------------------------------------------------
/GMLake/docs/figures/stra-vicuna-13b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/docs/figures/stra-vicuna-13b.png


--------------------------------------------------------------------------------
/GMLake/include/cuda_vmm_allocator.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/include/cuda_vmm_allocator.h


--------------------------------------------------------------------------------
/GMLake/src/CUDACachingAllocator.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/GMLake/src/CUDACachingAllocator.cpp


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/LICENSE


--------------------------------------------------------------------------------
/MultiPath/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/README.md


--------------------------------------------------------------------------------
/MultiPath/src/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/Makefile


--------------------------------------------------------------------------------
/MultiPath/src/cuda.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/cuda.cpp


--------------------------------------------------------------------------------
/MultiPath/src/glake_cache.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/glake_cache.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_api_stats.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_api_stats.cpp


--------------------------------------------------------------------------------
/MultiPath/src/gmm_api_stats.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_api_stats.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_client.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_client.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_client_cfg.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_client_cfg.cpp


--------------------------------------------------------------------------------
/MultiPath/src/gmm_client_cfg.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_client_cfg.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_client_impl.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_client_impl.cpp


--------------------------------------------------------------------------------
/MultiPath/src/gmm_common.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_common.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_common_impl.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_common_impl.cpp


--------------------------------------------------------------------------------
/MultiPath/src/gmm_cuda_common.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_cuda_common.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_cuda_mem.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_cuda_mem.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_cuda_mem_impl.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_cuda_mem_impl.cpp


--------------------------------------------------------------------------------
/MultiPath/src/gmm_cuda_mempool.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_cuda_mempool.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_gdr_plugin.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_gdr_plugin.cpp


--------------------------------------------------------------------------------
/MultiPath/src/gmm_gdr_plugin.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_gdr_plugin.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_host_mem.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_host_mem.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_host_shm.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_host_shm.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_host_shm_impl.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_host_shm_impl.cpp


--------------------------------------------------------------------------------
/MultiPath/src/gmm_mempool_impl.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_mempool_impl.cpp


--------------------------------------------------------------------------------
/MultiPath/src/gmm_mp.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_mp.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_multipath_impl.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_multipath_impl.cu


--------------------------------------------------------------------------------
/MultiPath/src/gmm_queue.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_queue.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_server.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_server.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_server_impl.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_server_impl.cpp


--------------------------------------------------------------------------------
/MultiPath/src/gmm_shm_nv_impl.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_shm_nv_impl.cpp


--------------------------------------------------------------------------------
/MultiPath/src/gmm_singleton.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_singleton.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_util.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_util.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_vstore.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_vstore.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_worker.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_worker.h


--------------------------------------------------------------------------------
/MultiPath/src/gmm_worker_impl.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/src/gmm_worker_impl.cpp


--------------------------------------------------------------------------------
/MultiPath/test/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/test/Makefile


--------------------------------------------------------------------------------
/MultiPath/test/cuda_check.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/test/cuda_check.h


--------------------------------------------------------------------------------
/MultiPath/test/gmm_bench.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/test/gmm_bench.cu


--------------------------------------------------------------------------------
/MultiPath/test/gmm_test.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/MultiPath/test/gmm_test.cu


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/README.md


--------------------------------------------------------------------------------
/docs/figures/cpu_gpu_bw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/figures/cpu_gpu_bw.png


--------------------------------------------------------------------------------
/docs/figures/dedup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/figures/dedup.png


--------------------------------------------------------------------------------
/docs/figures/dedup1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/figures/dedup1.png


--------------------------------------------------------------------------------
/docs/figures/glake_arch_cn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/figures/glake_arch_cn.png


--------------------------------------------------------------------------------
/docs/figures/glake_arch_en.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/figures/glake_arch_en.png


--------------------------------------------------------------------------------
/docs/figures/gmlake-wechat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/figures/gmlake-wechat.jpg


--------------------------------------------------------------------------------
/docs/figures/gmlake-wechat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/figures/gmlake-wechat.png


--------------------------------------------------------------------------------
/docs/figures/gmlake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/figures/gmlake.png


--------------------------------------------------------------------------------
/docs/figures/multi_path_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/figures/multi_path_view.png


--------------------------------------------------------------------------------
/docs/readme_cn.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/readme_cn.md


--------------------------------------------------------------------------------
/docs/蚂蚁-GLake显存与传输优化-AIConf-V1.0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antgroup/glake/HEAD/docs/蚂蚁-GLake显存与传输优化-AIConf-V1.0.pdf


--------------------------------------------------------------------------------