├── .gitignore ├── .gitmodules ├── .isort.cfg ├── LICENSE ├── README.md ├── arg_utils.py ├── assets ├── llama_7b.jpg └── mixtral_8x7b.jpg ├── benchmark ├── dspy │ ├── README.md │ └── bench_dspy_intro.py ├── generative_agents │ ├── README.md │ ├── agent_functions.py │ ├── bench_other.py │ └── bench_sglang.py ├── gsm8k │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── hellaswag │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── json_decode_regex │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── build_dataset.py ├── json_jump_forward │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ ├── build_dataset.py │ └── dataset.txt ├── latency_throughput │ ├── README.md │ ├── bench_throughput.py │ └── test_latency.py ├── line_retrieval │ ├── README.md │ ├── bench_sglang.py │ └── gen_data.py ├── llava_bench │ ├── README.md │ ├── bench_hf_llava_bench.sh │ ├── bench_hf_mme.sh │ ├── bench_sglang.py │ ├── bench_sglang_mme.sh │ └── download_images.py ├── llm_judge │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── long_json_decode │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── build_dataset.py ├── mmlu │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── mtbench │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── multi_chain_reasoning │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── multi_document_qa │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── build_dataset.py ├── multi_turn_chat │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── data_gen.py ├── react │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── tip_suggestion │ ├── .gitignore │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ ├── lmql_funcs.py │ └── topic.jsonl ├── tree_of_thought_deep │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── lmql_funcs.py └── tree_of_thought_v0 │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── debug.txt ├── docs ├── benchmark_results.md ├── flashinfer.md ├── model_support.md ├── release_process.md ├── sampling_params.md └── test_process.md ├── eventsim.py ├── examples ├── quick_start │ ├── anthropic_example_chat.py │ ├── anthropic_example_complete.py │ ├── azure_openai_example_chat.py │ ├── gemini_example_chat.py │ ├── gemini_example_complete.py │ ├── gemini_example_multimodal_chat.py │ ├── images │ │ ├── cat.jpeg │ │ └── dog.jpeg │ ├── openai_example_chat.py │ ├── openai_example_complete.py │ ├── openrouter_example_chat.py │ ├── srt_example_chat.py │ ├── srt_example_complete.py │ ├── srt_example_llava.py │ ├── srt_example_yi_vl.py │ ├── together_example_chat.py │ └── together_example_complete.py └── usage │ ├── async_io.py │ ├── choices_logprob.py │ ├── json_decode.py │ ├── llava │ ├── http_llama3_llava_test.py │ ├── http_qwen_llava_test.py │ └── srt_llava_next_test.py │ ├── llava_video │ ├── srt_example_llava_v.py │ ├── srt_example_llava_v.sh │ └── videos │ │ └── Q98Z4OTh8RwmDonc.mp4 │ ├── openai_speculative.py │ ├── parallel_sample.py │ ├── rag_using_parea │ ├── max-tokens-fixed-rag-trace.png │ └── trace_and_evaluate_rag_using_parea.ipynb │ ├── readme_examples.py │ ├── streaming.py │ └── triton │ ├── Dockerfile │ ├── README.md │ └── models │ └── character_generation │ ├── 1 │ └── model.py │ └── config.pbtxt ├── model_equation_aio_regression.py ├── model_equation_fitting.py ├── nsdi_plots ├── e2e.ipynb ├── exploration_e2e.ipynb ├── plot.py └── plot_utils.py ├── playground ├── launch_tgi.sh └── load_tokenizer.py ├── preble ├── README.md ├── __init__.py ├── benchmarks │ ├── __init__.py │ ├── benchmark_utils.py │ ├── benchmark_workload_gen.py │ ├── chameleon │ │ ├── __init__.py │ │ ├── prompt_cl.py │ │ ├── prompt_kr.py │ │ ├── prompt_pg.py │ │ ├── prompt_policy.py │ │ ├── prompt_rl.py │ │ ├── prompt_sg.py │ │ └── prompt_tv.py │ ├── exp_configs │ │ ├── __init__.py │ │ ├── exp_config_utils.py │ │ └── model_equations.py │ ├── multi_bench_exp_routing.py │ ├── multi_exp_configs │ │ ├── __init__.py │ │ ├── all_experiments.py │ │ ├── e2e_234r_common_share_micro_config.py │ │ ├── e2e_234r_loogle_config.py │ │ ├── e2e_234r_toolbench_config.py │ │ ├── e2e_234r_toolbench_zipf.py │ │ ├── e2e_234r_videoQA_config.py │ │ ├── e2e_2r_toolbench_config.py │ │ ├── e2e_4r_loogle_config.py │ │ ├── e2e_4r_toolbench_config.py │ │ ├── e2e_4r_videoQA_config.py │ │ ├── e2e_loogle_config.py │ │ ├── e2e_mix_config.py │ │ ├── e2e_programming.py │ │ ├── e2e_programming_cropped_decode.py │ │ ├── e2e_programming_percent_shared_micro_bench.py │ │ ├── e2e_toolbench_config.py │ │ ├── e2e_tp_toolbench_config.py │ │ ├── e2e_videoQA_config.py │ │ ├── e2e_virtualenv_config.py │ │ ├── loogle_config.py │ │ ├── motivation_prefix_caching_matters.py │ │ ├── multi_exp_utils.py │ │ ├── test_high_decode.py │ │ ├── test_realisitic_arrival_pattern.py │ │ ├── test_varying_random_regular_workload_prefix.py │ │ └── test_varying_random_share_gpt.py │ ├── multi_experiment_benchmark_utils.py │ ├── profile_interface.py │ └── toolqa │ │ ├── __init__.py │ │ ├── fewshots.py │ │ └── prompts.py ├── data_parallel_request_cache.py ├── debug │ └── launch_multi_node_server.py ├── global_lru_cache.py ├── global_scheduler_with_time.py ├── global_scheduler_with_time_perf.py ├── model_runtime_manager.py ├── multi_node_loader.py ├── profile_model_forwarding.py ├── profile_ragged_forwarding.py ├── requirements.txt ├── server │ ├── __init__.py │ └── server.py ├── simulator.py ├── ssh_runtime.py ├── test_important_node_stealing.py ├── test_model_loading_and_execution.py ├── test_runtime.py ├── tests │ ├── test_data_parallel_routing.py │ ├── test_gpu_profiling.py │ ├── test_hit_ratio_retrieval.py │ ├── test_lp_scheduler.py │ ├── test_multi_node_loader.py │ └── test_sglang_server_metrics.py ├── ttft_overload_detector.py └── vllm_runtime.py ├── profile.txt ├── python ├── pyproject.toml ├── sglang │ ├── __init__.py │ ├── api.py │ ├── backend │ │ ├── __init__.py │ │ ├── anthropic.py │ │ ├── base_backend.py │ │ ├── openai.py │ │ ├── runtime_endpoint.py │ │ └── vertexai.py │ ├── global_config.py │ ├── lang │ │ ├── __init__.py │ │ ├── chat_template.py │ │ ├── compiler.py │ │ ├── interpreter.py │ │ ├── ir.py │ │ └── tracer.py │ ├── launch_server.py │ ├── launch_server_llavavid.py │ ├── srt │ │ ├── constrained │ │ │ ├── __init__.py │ │ │ ├── base_cache.py │ │ │ ├── fsm_cache.py │ │ │ └── jump_forward.py │ │ ├── conversation.py │ │ ├── flush_cache.py │ │ ├── hf_transformers_utils.py │ │ ├── layers │ │ │ ├── context_flashattention_nopad.py │ │ │ ├── extend_attention.py │ │ │ ├── logits_processor.py │ │ │ ├── radix_attention.py │ │ │ └── token_attention.py │ │ ├── managers │ │ │ ├── detokenizer_manager.py │ │ │ ├── io_struct.py │ │ │ ├── router │ │ │ │ ├── infer_batch.py │ │ │ │ ├── manager.py │ │ │ │ ├── model_rpc.py │ │ │ │ ├── model_runner.py │ │ │ │ ├── radix_cache.py │ │ │ │ └── scheduler.py │ │ │ └── tokenizer_manager.py │ │ ├── memory_pool.py │ │ ├── mm_utils.py │ │ ├── model_config.py │ │ ├── models │ │ │ ├── commandr.py │ │ │ ├── dbrx.py │ │ │ ├── dbrx_config.py │ │ │ ├── gemma.py │ │ │ ├── llama2.py │ │ │ ├── llava.py │ │ │ ├── llava_mistral.py │ │ │ ├── llava_qwen.py │ │ │ ├── llavavid.py │ │ │ ├── mistral.py │ │ │ ├── mixtral.py │ │ │ ├── qwen.py │ │ │ ├── qwen2.py │ │ │ ├── stablelm.py │ │ │ └── yivl.py │ │ ├── openai_api_adapter.py │ │ ├── openai_protocol.py │ │ ├── sampling_params.py │ │ ├── server.py │ │ ├── server_args.py │ │ ├── utils.py │ │ └── weight_utils.py │ ├── test │ │ ├── test_conversation.py │ │ ├── test_openai_protocol.py │ │ ├── test_programs.py │ │ └── test_utils.py │ └── utils.py └── upload_pypi.sh ├── sample_server_async_call.py ├── scripts ├── convert_yi_vl.py ├── convert_yi_vl.sh ├── format.sh └── launch_tgi.sh ├── setup.py ├── setup.sh ├── sim.py └── test ├── __init__.py ├── killall_sglang.sh ├── lang ├── example_image.png ├── run_all.py ├── test_anthropic_backend.py ├── test_bind_cache.py ├── test_openai_backend.py ├── test_srt_backend.py ├── test_tracing.py └── test_vertexai_backend.py └── srt ├── example_image.png ├── model ├── bench_llama_low_api.py ├── reference_hf.py ├── test_llama_extend.py ├── test_llama_low_api.py └── test_llava_low_api.py ├── test_curl.sh ├── test_flashinfer.py ├── test_httpserver_concurrent.py ├── test_httpserver_decode.py ├── test_httpserver_decode_stream.py ├── test_httpserver_llava.py ├── test_httpserver_reuse.py ├── test_jump_forward.py ├── test_openai_server.py └── test_robust.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | profile=black 3 | known_first_party=sglang -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/README.md -------------------------------------------------------------------------------- /arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/arg_utils.py -------------------------------------------------------------------------------- /assets/llama_7b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/assets/llama_7b.jpg -------------------------------------------------------------------------------- /assets/mixtral_8x7b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/assets/mixtral_8x7b.jpg -------------------------------------------------------------------------------- /benchmark/dspy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/dspy/README.md -------------------------------------------------------------------------------- /benchmark/dspy/bench_dspy_intro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/dspy/bench_dspy_intro.py -------------------------------------------------------------------------------- /benchmark/generative_agents/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/generative_agents/README.md -------------------------------------------------------------------------------- /benchmark/generative_agents/agent_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/generative_agents/agent_functions.py -------------------------------------------------------------------------------- /benchmark/generative_agents/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/generative_agents/bench_other.py -------------------------------------------------------------------------------- /benchmark/generative_agents/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/generative_agents/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/gsm8k/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/gsm8k/README.md -------------------------------------------------------------------------------- /benchmark/gsm8k/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/gsm8k/bench_other.py -------------------------------------------------------------------------------- /benchmark/gsm8k/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/gsm8k/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/hellaswag/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/hellaswag/README.md -------------------------------------------------------------------------------- /benchmark/hellaswag/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/hellaswag/bench_other.py -------------------------------------------------------------------------------- /benchmark/hellaswag/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/hellaswag/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/json_decode_regex/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/json_decode_regex/README.md -------------------------------------------------------------------------------- /benchmark/json_decode_regex/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/json_decode_regex/bench_other.py -------------------------------------------------------------------------------- /benchmark/json_decode_regex/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/json_decode_regex/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/json_decode_regex/build_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/json_decode_regex/build_dataset.py -------------------------------------------------------------------------------- /benchmark/json_jump_forward/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/json_jump_forward/README.md -------------------------------------------------------------------------------- /benchmark/json_jump_forward/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/json_jump_forward/bench_other.py -------------------------------------------------------------------------------- /benchmark/json_jump_forward/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/json_jump_forward/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/json_jump_forward/build_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/json_jump_forward/build_dataset.py -------------------------------------------------------------------------------- /benchmark/json_jump_forward/dataset.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/json_jump_forward/dataset.txt -------------------------------------------------------------------------------- /benchmark/latency_throughput/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/latency_throughput/README.md -------------------------------------------------------------------------------- /benchmark/latency_throughput/bench_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/latency_throughput/bench_throughput.py -------------------------------------------------------------------------------- /benchmark/latency_throughput/test_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/latency_throughput/test_latency.py -------------------------------------------------------------------------------- /benchmark/line_retrieval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/line_retrieval/README.md -------------------------------------------------------------------------------- /benchmark/line_retrieval/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/line_retrieval/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/line_retrieval/gen_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/line_retrieval/gen_data.py -------------------------------------------------------------------------------- /benchmark/llava_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/llava_bench/README.md -------------------------------------------------------------------------------- /benchmark/llava_bench/bench_hf_llava_bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/llava_bench/bench_hf_llava_bench.sh -------------------------------------------------------------------------------- /benchmark/llava_bench/bench_hf_mme.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/llava_bench/bench_hf_mme.sh -------------------------------------------------------------------------------- /benchmark/llava_bench/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/llava_bench/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/llava_bench/bench_sglang_mme.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/llava_bench/bench_sglang_mme.sh -------------------------------------------------------------------------------- /benchmark/llava_bench/download_images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/llava_bench/download_images.py -------------------------------------------------------------------------------- /benchmark/llm_judge/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/llm_judge/README.md -------------------------------------------------------------------------------- /benchmark/llm_judge/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/llm_judge/bench_other.py -------------------------------------------------------------------------------- /benchmark/llm_judge/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/llm_judge/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/long_json_decode/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/long_json_decode/README.md -------------------------------------------------------------------------------- /benchmark/long_json_decode/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/long_json_decode/bench_other.py -------------------------------------------------------------------------------- /benchmark/long_json_decode/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/long_json_decode/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/long_json_decode/build_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/long_json_decode/build_dataset.py -------------------------------------------------------------------------------- /benchmark/mmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/mmlu/README.md -------------------------------------------------------------------------------- /benchmark/mmlu/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/mmlu/bench_other.py -------------------------------------------------------------------------------- /benchmark/mmlu/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/mmlu/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/mtbench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/mtbench/README.md -------------------------------------------------------------------------------- /benchmark/mtbench/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/mtbench/bench_other.py -------------------------------------------------------------------------------- /benchmark/mtbench/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/mtbench/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/multi_chain_reasoning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_chain_reasoning/README.md -------------------------------------------------------------------------------- /benchmark/multi_chain_reasoning/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_chain_reasoning/bench_other.py -------------------------------------------------------------------------------- /benchmark/multi_chain_reasoning/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_chain_reasoning/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/multi_document_qa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_document_qa/README.md -------------------------------------------------------------------------------- /benchmark/multi_document_qa/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_document_qa/bench_other.py -------------------------------------------------------------------------------- /benchmark/multi_document_qa/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_document_qa/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/multi_document_qa/build_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_document_qa/build_dataset.py -------------------------------------------------------------------------------- /benchmark/multi_turn_chat/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_turn_chat/README.md -------------------------------------------------------------------------------- /benchmark/multi_turn_chat/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_turn_chat/bench_other.py -------------------------------------------------------------------------------- /benchmark/multi_turn_chat/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_turn_chat/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/multi_turn_chat/data_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/multi_turn_chat/data_gen.py -------------------------------------------------------------------------------- /benchmark/react/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/react/README.md -------------------------------------------------------------------------------- /benchmark/react/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/react/bench_other.py -------------------------------------------------------------------------------- /benchmark/react/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/react/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/tip_suggestion/.gitignore: -------------------------------------------------------------------------------- 1 | !topic.jsonl -------------------------------------------------------------------------------- /benchmark/tip_suggestion/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tip_suggestion/README.md -------------------------------------------------------------------------------- /benchmark/tip_suggestion/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tip_suggestion/bench_other.py -------------------------------------------------------------------------------- /benchmark/tip_suggestion/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tip_suggestion/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/tip_suggestion/lmql_funcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tip_suggestion/lmql_funcs.py -------------------------------------------------------------------------------- /benchmark/tip_suggestion/topic.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tip_suggestion/topic.jsonl -------------------------------------------------------------------------------- /benchmark/tree_of_thought_deep/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tree_of_thought_deep/README.md -------------------------------------------------------------------------------- /benchmark/tree_of_thought_deep/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tree_of_thought_deep/bench_other.py -------------------------------------------------------------------------------- /benchmark/tree_of_thought_deep/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tree_of_thought_deep/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/tree_of_thought_deep/lmql_funcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tree_of_thought_deep/lmql_funcs.py -------------------------------------------------------------------------------- /benchmark/tree_of_thought_v0/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tree_of_thought_v0/README.md -------------------------------------------------------------------------------- /benchmark/tree_of_thought_v0/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tree_of_thought_v0/bench_other.py -------------------------------------------------------------------------------- /benchmark/tree_of_thought_v0/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/benchmark/tree_of_thought_v0/bench_sglang.py -------------------------------------------------------------------------------- /debug.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/debug.txt -------------------------------------------------------------------------------- /docs/benchmark_results.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/docs/benchmark_results.md -------------------------------------------------------------------------------- /docs/flashinfer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/docs/flashinfer.md -------------------------------------------------------------------------------- /docs/model_support.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/docs/model_support.md -------------------------------------------------------------------------------- /docs/release_process.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/docs/release_process.md -------------------------------------------------------------------------------- /docs/sampling_params.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/docs/sampling_params.md -------------------------------------------------------------------------------- /docs/test_process.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/docs/test_process.md -------------------------------------------------------------------------------- /eventsim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/eventsim.py -------------------------------------------------------------------------------- /examples/quick_start/anthropic_example_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/anthropic_example_chat.py -------------------------------------------------------------------------------- /examples/quick_start/anthropic_example_complete.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/anthropic_example_complete.py -------------------------------------------------------------------------------- /examples/quick_start/azure_openai_example_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/azure_openai_example_chat.py -------------------------------------------------------------------------------- /examples/quick_start/gemini_example_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/gemini_example_chat.py -------------------------------------------------------------------------------- /examples/quick_start/gemini_example_complete.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/gemini_example_complete.py -------------------------------------------------------------------------------- /examples/quick_start/gemini_example_multimodal_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/gemini_example_multimodal_chat.py -------------------------------------------------------------------------------- /examples/quick_start/images/cat.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/images/cat.jpeg -------------------------------------------------------------------------------- /examples/quick_start/images/dog.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/images/dog.jpeg -------------------------------------------------------------------------------- /examples/quick_start/openai_example_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/openai_example_chat.py -------------------------------------------------------------------------------- /examples/quick_start/openai_example_complete.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/openai_example_complete.py -------------------------------------------------------------------------------- /examples/quick_start/openrouter_example_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/openrouter_example_chat.py -------------------------------------------------------------------------------- /examples/quick_start/srt_example_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/srt_example_chat.py -------------------------------------------------------------------------------- /examples/quick_start/srt_example_complete.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/srt_example_complete.py -------------------------------------------------------------------------------- /examples/quick_start/srt_example_llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/srt_example_llava.py -------------------------------------------------------------------------------- /examples/quick_start/srt_example_yi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/srt_example_yi_vl.py -------------------------------------------------------------------------------- /examples/quick_start/together_example_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/together_example_chat.py -------------------------------------------------------------------------------- /examples/quick_start/together_example_complete.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/quick_start/together_example_complete.py -------------------------------------------------------------------------------- /examples/usage/async_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/async_io.py -------------------------------------------------------------------------------- /examples/usage/choices_logprob.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/choices_logprob.py -------------------------------------------------------------------------------- /examples/usage/json_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/json_decode.py -------------------------------------------------------------------------------- /examples/usage/llava/http_llama3_llava_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/llava/http_llama3_llava_test.py -------------------------------------------------------------------------------- /examples/usage/llava/http_qwen_llava_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/llava/http_qwen_llava_test.py -------------------------------------------------------------------------------- /examples/usage/llava/srt_llava_next_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/llava/srt_llava_next_test.py -------------------------------------------------------------------------------- /examples/usage/llava_video/srt_example_llava_v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/llava_video/srt_example_llava_v.py -------------------------------------------------------------------------------- /examples/usage/llava_video/srt_example_llava_v.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/llava_video/srt_example_llava_v.sh -------------------------------------------------------------------------------- /examples/usage/llava_video/videos/Q98Z4OTh8RwmDonc.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/llava_video/videos/Q98Z4OTh8RwmDonc.mp4 -------------------------------------------------------------------------------- /examples/usage/openai_speculative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/openai_speculative.py -------------------------------------------------------------------------------- /examples/usage/parallel_sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/parallel_sample.py -------------------------------------------------------------------------------- /examples/usage/rag_using_parea/max-tokens-fixed-rag-trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/rag_using_parea/max-tokens-fixed-rag-trace.png -------------------------------------------------------------------------------- /examples/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb -------------------------------------------------------------------------------- /examples/usage/readme_examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/readme_examples.py -------------------------------------------------------------------------------- /examples/usage/streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/streaming.py -------------------------------------------------------------------------------- /examples/usage/triton/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/triton/Dockerfile -------------------------------------------------------------------------------- /examples/usage/triton/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/triton/README.md -------------------------------------------------------------------------------- /examples/usage/triton/models/character_generation/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/triton/models/character_generation/1/model.py -------------------------------------------------------------------------------- /examples/usage/triton/models/character_generation/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/examples/usage/triton/models/character_generation/config.pbtxt -------------------------------------------------------------------------------- /model_equation_aio_regression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/model_equation_aio_regression.py -------------------------------------------------------------------------------- /model_equation_fitting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/model_equation_fitting.py -------------------------------------------------------------------------------- /nsdi_plots/e2e.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/nsdi_plots/e2e.ipynb -------------------------------------------------------------------------------- /nsdi_plots/exploration_e2e.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/nsdi_plots/exploration_e2e.ipynb -------------------------------------------------------------------------------- /nsdi_plots/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/nsdi_plots/plot.py -------------------------------------------------------------------------------- /nsdi_plots/plot_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/nsdi_plots/plot_utils.py -------------------------------------------------------------------------------- /playground/launch_tgi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/playground/launch_tgi.sh -------------------------------------------------------------------------------- /playground/load_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/playground/load_tokenizer.py -------------------------------------------------------------------------------- /preble/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/README.md -------------------------------------------------------------------------------- /preble/__init__.py: -------------------------------------------------------------------------------- 1 | import preble.server -------------------------------------------------------------------------------- /preble/benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /preble/benchmarks/benchmark_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/benchmark_utils.py -------------------------------------------------------------------------------- /preble/benchmarks/benchmark_workload_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/benchmark_workload_gen.py -------------------------------------------------------------------------------- /preble/benchmarks/chameleon/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/chameleon/__init__.py -------------------------------------------------------------------------------- /preble/benchmarks/chameleon/prompt_cl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/chameleon/prompt_cl.py -------------------------------------------------------------------------------- /preble/benchmarks/chameleon/prompt_kr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/chameleon/prompt_kr.py -------------------------------------------------------------------------------- /preble/benchmarks/chameleon/prompt_pg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/chameleon/prompt_pg.py -------------------------------------------------------------------------------- /preble/benchmarks/chameleon/prompt_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/chameleon/prompt_policy.py -------------------------------------------------------------------------------- /preble/benchmarks/chameleon/prompt_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/chameleon/prompt_rl.py -------------------------------------------------------------------------------- /preble/benchmarks/chameleon/prompt_sg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/chameleon/prompt_sg.py -------------------------------------------------------------------------------- /preble/benchmarks/chameleon/prompt_tv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/chameleon/prompt_tv.py -------------------------------------------------------------------------------- /preble/benchmarks/exp_configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /preble/benchmarks/exp_configs/exp_config_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/exp_configs/exp_config_utils.py -------------------------------------------------------------------------------- /preble/benchmarks/exp_configs/model_equations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/exp_configs/model_equations.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_bench_exp_routing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_bench_exp_routing.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/all_experiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/all_experiments.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_234r_common_share_micro_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_234r_common_share_micro_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_234r_loogle_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_234r_loogle_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_234r_toolbench_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_234r_toolbench_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_234r_toolbench_zipf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_234r_toolbench_zipf.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_234r_videoQA_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_234r_videoQA_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_2r_toolbench_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_2r_toolbench_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_4r_loogle_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_4r_loogle_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_4r_toolbench_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_4r_toolbench_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_4r_videoQA_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_4r_videoQA_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_loogle_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_loogle_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_mix_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_mix_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_programming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_programming.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_programming_cropped_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_programming_cropped_decode.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_programming_percent_shared_micro_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_programming_percent_shared_micro_bench.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_toolbench_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_toolbench_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_tp_toolbench_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_tp_toolbench_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_videoQA_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_videoQA_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/e2e_virtualenv_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/e2e_virtualenv_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/loogle_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/loogle_config.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/motivation_prefix_caching_matters.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/multi_exp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/multi_exp_utils.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/test_high_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/test_high_decode.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/test_realisitic_arrival_pattern.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/test_realisitic_arrival_pattern.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/test_varying_random_regular_workload_prefix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/test_varying_random_regular_workload_prefix.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_exp_configs/test_varying_random_share_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_exp_configs/test_varying_random_share_gpt.py -------------------------------------------------------------------------------- /preble/benchmarks/multi_experiment_benchmark_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/multi_experiment_benchmark_utils.py -------------------------------------------------------------------------------- /preble/benchmarks/profile_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/profile_interface.py -------------------------------------------------------------------------------- /preble/benchmarks/toolqa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/toolqa/__init__.py -------------------------------------------------------------------------------- /preble/benchmarks/toolqa/fewshots.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/toolqa/fewshots.py -------------------------------------------------------------------------------- /preble/benchmarks/toolqa/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/benchmarks/toolqa/prompts.py -------------------------------------------------------------------------------- /preble/data_parallel_request_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/data_parallel_request_cache.py -------------------------------------------------------------------------------- /preble/debug/launch_multi_node_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/debug/launch_multi_node_server.py -------------------------------------------------------------------------------- /preble/global_lru_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/global_lru_cache.py -------------------------------------------------------------------------------- /preble/global_scheduler_with_time.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/global_scheduler_with_time.py -------------------------------------------------------------------------------- /preble/global_scheduler_with_time_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/global_scheduler_with_time_perf.py -------------------------------------------------------------------------------- /preble/model_runtime_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/model_runtime_manager.py -------------------------------------------------------------------------------- /preble/multi_node_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/multi_node_loader.py -------------------------------------------------------------------------------- /preble/profile_model_forwarding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/profile_model_forwarding.py -------------------------------------------------------------------------------- /preble/profile_ragged_forwarding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/profile_ragged_forwarding.py -------------------------------------------------------------------------------- /preble/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/requirements.txt -------------------------------------------------------------------------------- /preble/server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /preble/server/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/server/server.py -------------------------------------------------------------------------------- /preble/simulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/simulator.py -------------------------------------------------------------------------------- /preble/ssh_runtime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/ssh_runtime.py -------------------------------------------------------------------------------- /preble/test_important_node_stealing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/test_important_node_stealing.py -------------------------------------------------------------------------------- /preble/test_model_loading_and_execution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/test_model_loading_and_execution.py -------------------------------------------------------------------------------- /preble/test_runtime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/test_runtime.py -------------------------------------------------------------------------------- /preble/tests/test_data_parallel_routing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/tests/test_data_parallel_routing.py -------------------------------------------------------------------------------- /preble/tests/test_gpu_profiling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/tests/test_gpu_profiling.py -------------------------------------------------------------------------------- /preble/tests/test_hit_ratio_retrieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/tests/test_hit_ratio_retrieval.py -------------------------------------------------------------------------------- /preble/tests/test_lp_scheduler.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /preble/tests/test_multi_node_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/tests/test_multi_node_loader.py -------------------------------------------------------------------------------- /preble/tests/test_sglang_server_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/tests/test_sglang_server_metrics.py -------------------------------------------------------------------------------- /preble/ttft_overload_detector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/ttft_overload_detector.py -------------------------------------------------------------------------------- /preble/vllm_runtime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/preble/vllm_runtime.py -------------------------------------------------------------------------------- /profile.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/profile.txt -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/pyproject.toml -------------------------------------------------------------------------------- /python/sglang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/__init__.py -------------------------------------------------------------------------------- /python/sglang/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/api.py -------------------------------------------------------------------------------- /python/sglang/backend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/sglang/backend/anthropic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/backend/anthropic.py -------------------------------------------------------------------------------- /python/sglang/backend/base_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/backend/base_backend.py -------------------------------------------------------------------------------- /python/sglang/backend/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/backend/openai.py -------------------------------------------------------------------------------- /python/sglang/backend/runtime_endpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/backend/runtime_endpoint.py -------------------------------------------------------------------------------- /python/sglang/backend/vertexai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/backend/vertexai.py -------------------------------------------------------------------------------- /python/sglang/global_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/global_config.py -------------------------------------------------------------------------------- /python/sglang/lang/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/sglang/lang/chat_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/lang/chat_template.py -------------------------------------------------------------------------------- /python/sglang/lang/compiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/lang/compiler.py -------------------------------------------------------------------------------- /python/sglang/lang/interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/lang/interpreter.py -------------------------------------------------------------------------------- /python/sglang/lang/ir.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/lang/ir.py -------------------------------------------------------------------------------- /python/sglang/lang/tracer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/lang/tracer.py -------------------------------------------------------------------------------- /python/sglang/launch_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/launch_server.py -------------------------------------------------------------------------------- /python/sglang/launch_server_llavavid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/launch_server_llavavid.py -------------------------------------------------------------------------------- /python/sglang/srt/constrained/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/constrained/__init__.py -------------------------------------------------------------------------------- /python/sglang/srt/constrained/base_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/constrained/base_cache.py -------------------------------------------------------------------------------- /python/sglang/srt/constrained/fsm_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/constrained/fsm_cache.py -------------------------------------------------------------------------------- /python/sglang/srt/constrained/jump_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/constrained/jump_forward.py -------------------------------------------------------------------------------- /python/sglang/srt/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/conversation.py -------------------------------------------------------------------------------- /python/sglang/srt/flush_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/flush_cache.py -------------------------------------------------------------------------------- /python/sglang/srt/hf_transformers_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/hf_transformers_utils.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/context_flashattention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/layers/context_flashattention_nopad.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/extend_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/layers/extend_attention.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/logits_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/layers/logits_processor.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/radix_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/layers/radix_attention.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/token_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/layers/token_attention.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/detokenizer_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/managers/detokenizer_manager.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/io_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/managers/io_struct.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/router/infer_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/managers/router/infer_batch.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/router/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/managers/router/manager.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/router/model_rpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/managers/router/model_rpc.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/router/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/managers/router/model_runner.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/router/radix_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/managers/router/radix_cache.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/router/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/managers/router/scheduler.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/tokenizer_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/managers/tokenizer_manager.py -------------------------------------------------------------------------------- /python/sglang/srt/memory_pool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/memory_pool.py -------------------------------------------------------------------------------- /python/sglang/srt/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/mm_utils.py -------------------------------------------------------------------------------- /python/sglang/srt/model_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/model_config.py -------------------------------------------------------------------------------- /python/sglang/srt/models/commandr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/commandr.py -------------------------------------------------------------------------------- /python/sglang/srt/models/dbrx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/dbrx.py -------------------------------------------------------------------------------- /python/sglang/srt/models/dbrx_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/dbrx_config.py -------------------------------------------------------------------------------- /python/sglang/srt/models/gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/gemma.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/llama2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/llava.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llava_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/llava_mistral.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llava_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/llava_qwen.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llavavid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/llavavid.py -------------------------------------------------------------------------------- /python/sglang/srt/models/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/mistral.py -------------------------------------------------------------------------------- /python/sglang/srt/models/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/mixtral.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/qwen.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/qwen2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/stablelm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/stablelm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/yivl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/models/yivl.py -------------------------------------------------------------------------------- /python/sglang/srt/openai_api_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/openai_api_adapter.py -------------------------------------------------------------------------------- /python/sglang/srt/openai_protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/openai_protocol.py -------------------------------------------------------------------------------- /python/sglang/srt/sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/sampling_params.py -------------------------------------------------------------------------------- /python/sglang/srt/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/server.py -------------------------------------------------------------------------------- /python/sglang/srt/server_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/server_args.py -------------------------------------------------------------------------------- /python/sglang/srt/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/weight_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/srt/weight_utils.py -------------------------------------------------------------------------------- /python/sglang/test/test_conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/test/test_conversation.py -------------------------------------------------------------------------------- /python/sglang/test/test_openai_protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/test/test_openai_protocol.py -------------------------------------------------------------------------------- /python/sglang/test/test_programs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/test/test_programs.py -------------------------------------------------------------------------------- /python/sglang/test/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/test/test_utils.py -------------------------------------------------------------------------------- /python/sglang/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/sglang/utils.py -------------------------------------------------------------------------------- /python/upload_pypi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/python/upload_pypi.sh -------------------------------------------------------------------------------- /sample_server_async_call.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/sample_server_async_call.py -------------------------------------------------------------------------------- /scripts/convert_yi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/scripts/convert_yi_vl.py -------------------------------------------------------------------------------- /scripts/convert_yi_vl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/scripts/convert_yi_vl.sh -------------------------------------------------------------------------------- /scripts/format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/scripts/format.sh -------------------------------------------------------------------------------- /scripts/launch_tgi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/scripts/launch_tgi.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/setup.py -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/setup.sh -------------------------------------------------------------------------------- /sim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/sim.py -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/killall_sglang.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/killall_sglang.sh -------------------------------------------------------------------------------- /test/lang/example_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/lang/example_image.png -------------------------------------------------------------------------------- /test/lang/run_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/lang/run_all.py -------------------------------------------------------------------------------- /test/lang/test_anthropic_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/lang/test_anthropic_backend.py -------------------------------------------------------------------------------- /test/lang/test_bind_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/lang/test_bind_cache.py -------------------------------------------------------------------------------- /test/lang/test_openai_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/lang/test_openai_backend.py -------------------------------------------------------------------------------- /test/lang/test_srt_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/lang/test_srt_backend.py -------------------------------------------------------------------------------- /test/lang/test_tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/lang/test_tracing.py -------------------------------------------------------------------------------- /test/lang/test_vertexai_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/lang/test_vertexai_backend.py -------------------------------------------------------------------------------- /test/srt/example_image.png: -------------------------------------------------------------------------------- 1 | ../lang/example_image.png -------------------------------------------------------------------------------- /test/srt/model/bench_llama_low_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/model/bench_llama_low_api.py -------------------------------------------------------------------------------- /test/srt/model/reference_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/model/reference_hf.py -------------------------------------------------------------------------------- /test/srt/model/test_llama_extend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/model/test_llama_extend.py -------------------------------------------------------------------------------- /test/srt/model/test_llama_low_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/model/test_llama_low_api.py -------------------------------------------------------------------------------- /test/srt/model/test_llava_low_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/model/test_llava_low_api.py -------------------------------------------------------------------------------- /test/srt/test_curl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/test_curl.sh -------------------------------------------------------------------------------- /test/srt/test_flashinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/test_flashinfer.py -------------------------------------------------------------------------------- /test/srt/test_httpserver_concurrent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/test_httpserver_concurrent.py -------------------------------------------------------------------------------- /test/srt/test_httpserver_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/test_httpserver_decode.py -------------------------------------------------------------------------------- /test/srt/test_httpserver_decode_stream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/test_httpserver_decode_stream.py -------------------------------------------------------------------------------- /test/srt/test_httpserver_llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/test_httpserver_llava.py -------------------------------------------------------------------------------- /test/srt/test_httpserver_reuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/test_httpserver_reuse.py -------------------------------------------------------------------------------- /test/srt/test_jump_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/test_jump_forward.py -------------------------------------------------------------------------------- /test/srt/test_openai_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/test_openai_server.py -------------------------------------------------------------------------------- /test/srt/test_robust.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WukLab/preble/HEAD/test/srt/test_robust.py --------------------------------------------------------------------------------