├── LICENSE ├── README.md ├── characterization ├── characterization.py ├── get_token_length.py ├── token_length_boxplots_all.pdf ├── token_lengths_all.csv └── vicuna-13b-histogram.pdf ├── docs ├── predictor.pdf ├── predictor.png ├── scheduler.pdf └── scheduler.png ├── model-serving ├── .gitignore ├── README.md ├── auto_eval.py ├── auto_eval_barplot.py ├── auto_eval_lineplot.py ├── eval.py ├── fcfs.py ├── fcfs_dynamic.py ├── job.py ├── pairwise-prediction │ ├── eval_predictions.py │ ├── pairwise_warmup_1000K.csv │ └── pairwise_warmup_1000K_grouped.csv ├── prediction │ ├── final │ │ ├── eval_prediction.py │ │ ├── gen_combined_csv.py │ │ ├── output_distribution.py │ │ ├── predictions_all.csv │ │ ├── predictions_multiround_all.csv │ │ ├── predictions_multiround_tail_warmup_cls_1000K.csv │ │ ├── predictions_multiround_tail_warmup_multi_cls_1000K.csv │ │ ├── predictions_multiround_tail_warmup_ordinal_multi_cls_l1_1000K.csv │ │ ├── predictions_multiround_tail_warmup_ordinal_multi_cls_mse_1000K.csv │ │ ├── predictions_multiround_tail_warmup_reg_l1_1000K.csv │ │ ├── predictions_multiround_tail_warmup_reg_mse_1000K.csv │ │ ├── predictions_warmup_cls_1000K.csv │ │ ├── predictions_warmup_multi_cls_1000K.csv │ │ ├── predictions_warmup_ordinal_multi_cls_l1_1000K.csv │ │ ├── predictions_warmup_ordinal_multi_cls_mse_1000K.csv │ │ ├── predictions_warmup_reg_l1_1000K.csv │ │ ├── predictions_warmup_reg_mse_1000K.csv │ │ ├── ranking_eval_prediction.py │ │ ├── residuals_histogram.png │ │ └── residuals_scatter.png │ ├── predictor_overhead.pdf │ └── predictor_overhead_vs_model_serving_latency.py ├── run.sh ├── simulator.py ├── sjf.py ├── sjf_dynamic.py ├── traces │ ├── AzureLLMInferenceDataset2023.ipynb │ ├── AzureLLMInferenceTrace_code.csv │ ├── AzureLLMInferenceTrace_code_int.csv │ ├── AzureLLMInferenceTrace_conv.csv │ ├── AzureLLMInferenceTrace_conv_int.csv │ └── convert_timestamps.py └── util.py ├── output-token-len-prediction ├── .gitignore ├── README.md ├── latency_prediction.py ├── pairwise │ ├── latency_prediction_pairwise.py │ └── preprocess_pairwise_dataset.py ├── preprocess_customized_dataset.py ├── preprocess_dataset.py ├── script.sh ├── test_customized_dataset.csv └── visualization.py └── requirements.txt /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/README.md -------------------------------------------------------------------------------- /characterization/characterization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/characterization/characterization.py -------------------------------------------------------------------------------- /characterization/get_token_length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/characterization/get_token_length.py -------------------------------------------------------------------------------- /characterization/token_length_boxplots_all.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/characterization/token_length_boxplots_all.pdf -------------------------------------------------------------------------------- /characterization/token_lengths_all.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/characterization/token_lengths_all.csv -------------------------------------------------------------------------------- /characterization/vicuna-13b-histogram.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/characterization/vicuna-13b-histogram.pdf -------------------------------------------------------------------------------- /docs/predictor.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/docs/predictor.pdf -------------------------------------------------------------------------------- /docs/predictor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/docs/predictor.png -------------------------------------------------------------------------------- /docs/scheduler.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/docs/scheduler.pdf -------------------------------------------------------------------------------- /docs/scheduler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/docs/scheduler.png -------------------------------------------------------------------------------- /model-serving/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | __pycache__/ 3 | .DS_Store 4 | results/ 5 | -------------------------------------------------------------------------------- /model-serving/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/README.md -------------------------------------------------------------------------------- /model-serving/auto_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/auto_eval.py -------------------------------------------------------------------------------- /model-serving/auto_eval_barplot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/auto_eval_barplot.py -------------------------------------------------------------------------------- /model-serving/auto_eval_lineplot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/auto_eval_lineplot.py -------------------------------------------------------------------------------- /model-serving/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/eval.py -------------------------------------------------------------------------------- /model-serving/fcfs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/fcfs.py -------------------------------------------------------------------------------- /model-serving/fcfs_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/fcfs_dynamic.py -------------------------------------------------------------------------------- /model-serving/job.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/job.py -------------------------------------------------------------------------------- /model-serving/pairwise-prediction/eval_predictions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/pairwise-prediction/eval_predictions.py -------------------------------------------------------------------------------- /model-serving/pairwise-prediction/pairwise_warmup_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/pairwise-prediction/pairwise_warmup_1000K.csv -------------------------------------------------------------------------------- /model-serving/pairwise-prediction/pairwise_warmup_1000K_grouped.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/pairwise-prediction/pairwise_warmup_1000K_grouped.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/eval_prediction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/eval_prediction.py -------------------------------------------------------------------------------- /model-serving/prediction/final/gen_combined_csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/gen_combined_csv.py -------------------------------------------------------------------------------- /model-serving/prediction/final/output_distribution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/output_distribution.py -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_all.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_all.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_multiround_all.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_multiround_all.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_multiround_tail_warmup_cls_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_multiround_tail_warmup_cls_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_multiround_tail_warmup_multi_cls_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_multiround_tail_warmup_multi_cls_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_multiround_tail_warmup_ordinal_multi_cls_l1_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_multiround_tail_warmup_ordinal_multi_cls_l1_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_multiround_tail_warmup_ordinal_multi_cls_mse_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_multiround_tail_warmup_ordinal_multi_cls_mse_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_multiround_tail_warmup_reg_l1_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_multiround_tail_warmup_reg_l1_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_multiround_tail_warmup_reg_mse_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_multiround_tail_warmup_reg_mse_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_warmup_cls_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_warmup_cls_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_warmup_multi_cls_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_warmup_multi_cls_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_warmup_ordinal_multi_cls_l1_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_warmup_ordinal_multi_cls_l1_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_warmup_ordinal_multi_cls_mse_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_warmup_ordinal_multi_cls_mse_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_warmup_reg_l1_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_warmup_reg_l1_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/predictions_warmup_reg_mse_1000K.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/predictions_warmup_reg_mse_1000K.csv -------------------------------------------------------------------------------- /model-serving/prediction/final/ranking_eval_prediction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/ranking_eval_prediction.py -------------------------------------------------------------------------------- /model-serving/prediction/final/residuals_histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/residuals_histogram.png -------------------------------------------------------------------------------- /model-serving/prediction/final/residuals_scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/final/residuals_scatter.png -------------------------------------------------------------------------------- /model-serving/prediction/predictor_overhead.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/predictor_overhead.pdf -------------------------------------------------------------------------------- /model-serving/prediction/predictor_overhead_vs_model_serving_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/prediction/predictor_overhead_vs_model_serving_latency.py -------------------------------------------------------------------------------- /model-serving/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/run.sh -------------------------------------------------------------------------------- /model-serving/simulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/simulator.py -------------------------------------------------------------------------------- /model-serving/sjf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/sjf.py -------------------------------------------------------------------------------- /model-serving/sjf_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/sjf_dynamic.py -------------------------------------------------------------------------------- /model-serving/traces/AzureLLMInferenceDataset2023.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/traces/AzureLLMInferenceDataset2023.ipynb -------------------------------------------------------------------------------- /model-serving/traces/AzureLLMInferenceTrace_code.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/traces/AzureLLMInferenceTrace_code.csv -------------------------------------------------------------------------------- /model-serving/traces/AzureLLMInferenceTrace_code_int.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/traces/AzureLLMInferenceTrace_code_int.csv -------------------------------------------------------------------------------- /model-serving/traces/AzureLLMInferenceTrace_conv.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/traces/AzureLLMInferenceTrace_conv.csv -------------------------------------------------------------------------------- /model-serving/traces/AzureLLMInferenceTrace_conv_int.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/traces/AzureLLMInferenceTrace_conv_int.csv -------------------------------------------------------------------------------- /model-serving/traces/convert_timestamps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/traces/convert_timestamps.py -------------------------------------------------------------------------------- /model-serving/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/model-serving/util.py -------------------------------------------------------------------------------- /output-token-len-prediction/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/output-token-len-prediction/.gitignore -------------------------------------------------------------------------------- /output-token-len-prediction/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/output-token-len-prediction/README.md -------------------------------------------------------------------------------- /output-token-len-prediction/latency_prediction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/output-token-len-prediction/latency_prediction.py -------------------------------------------------------------------------------- /output-token-len-prediction/pairwise/latency_prediction_pairwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/output-token-len-prediction/pairwise/latency_prediction_pairwise.py -------------------------------------------------------------------------------- /output-token-len-prediction/pairwise/preprocess_pairwise_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/output-token-len-prediction/pairwise/preprocess_pairwise_dataset.py -------------------------------------------------------------------------------- /output-token-len-prediction/preprocess_customized_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/output-token-len-prediction/preprocess_customized_dataset.py -------------------------------------------------------------------------------- /output-token-len-prediction/preprocess_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/output-token-len-prediction/preprocess_dataset.py -------------------------------------------------------------------------------- /output-token-len-prediction/script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/output-token-len-prediction/script.sh -------------------------------------------------------------------------------- /output-token-len-prediction/test_customized_dataset.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/output-token-len-prediction/test_customized_dataset.csv -------------------------------------------------------------------------------- /output-token-len-prediction/visualization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/output-token-len-prediction/visualization.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/James-QiuHaoran/LLM-serving-with-proxy-models/HEAD/requirements.txt --------------------------------------------------------------------------------