├── .gitignore ├── Deepspeed-MII ├── A100 │ ├── README.md │ ├── pipeline.py │ └── requirements.txt ├── Gaudi2 │ ├── README.md │ ├── habana_power.py │ ├── run-power-bench.sh │ ├── run-throughput-bench.sh │ ├── run_generation.py │ └── run_generation_power.py └── README.md ├── LICENSE ├── Plots ├── All_results.csv ├── Fig_10 │ ├── All_results.csv │ ├── Figure 10.ipynb │ └── Perplexity_vs_Throughput_7B_vLLM_A100.pdf ├── Fig_11 │ ├── DS_MII_7B_scaling_A100.pdf │ └── Fig_11.ipynb ├── Fig_12 │ ├── Fig 12.ipynb │ └── TRT_LLM_vLLM_DS_MII.pdf ├── Fig_13 │ ├── Fig_13.ipynb │ └── llama_cpp_7B_Batch_size_across_hardware.pdf ├── Fig_14 │ ├── Fig 14.ipynb │ └── llama_cpp_7B_scaling_across_hardware.pdf ├── Fig_15 │ ├── 7B_Models_Framework_Comparison_Batch_Size.pdf │ └── Fig 15.ipynb ├── Fig_16_a │ ├── Fig 16 a.ipynb │ ├── llama_3_8b_A100_H100_power.pdf │ └── power_results.csv ├── Fig_16_a_b │ ├── Fig 16 a b.ipynb │ ├── Fig 16 a.ipynb │ ├── Fig 16 b.ipynb │ ├── llama_3_8b_power_perf_per_watt.pdf │ └── power_results.csv ├── Fig_16_b │ ├── Fig 16 b.ipynb │ ├── llama_3_8b_perf_per_watt.pdf │ └── power_results.csv ├── Fig_17 │ ├── Fig_17.ipynb │ └── llama_3_batch_size_num_GPUs_input_output_size.pdf ├── Fig_18 │ ├── Fig 18.ipynb │ ├── SN40L_A100_7B.pdf │ ├── SN40L_latency_throughput.csv │ └── get_SN40L_results.ipynb ├── Fig_19 │ ├── Fig 19.ipynb │ └── SN40L_A100_H100_70B.pdf ├── Fig_1_a │ ├── Fig_1_a.ipynb │ └── llama_3_batch_size_vs_input_length_vLLM.pdf ├── Fig_1_b │ ├── Heatmap.ipynb │ ├── Heatmap_input_vs_output.csv │ └── llama_3_8B_Heatmap_TensorRT_LLM.pdf ├── Fig_20 │ ├── A100_Gaudi2_compare.pdf │ └── Fig 20.ipynb ├── Fig_21 │ ├── Fig 21.ipynb │ ├── TTFT.pdf │ └── all_results_TTFT.csv ├── Fig_22 │ ├── Fig 22.ipynb │ ├── ITL.pdf │ └── all_results_TTFT_ITL.csv ├── Fig_23 │ ├── Fig 23.ipynb │ └── llama_3_8b_across_HW_comparison.pdf ├── Fig_24 │ ├── Fig 24.ipynb │ └── llama_3_8b_across_HW_token_length_compare.pdf ├── Fig_25 │ ├── Fig 25.ipynb │ └── HW_peak_perf.pdf ├── Fig_29 │ ├── All_results.csv │ ├── Fig 29.ipynb │ └── Perplexity_vs_Throughput_7B_vLLM_H100.pdf ├── Fig_2_a │ ├── All_results.csv │ ├── Fig 2 a.ipynb │ └── w_o_KV_Cache_70B.pdf ├── Fig_2_b │ ├── All_results.csv │ ├── Fig 2 b.ipynb │ └── KV_Cache_Block_size_llama_3_8B.pdf ├── Fig_3 │ ├── A100_H100_quant_all_results.csv │ ├── Fig 3.ipynb │ ├── LLaMA_3_8b_quant.pdf │ └── qaunt_results │ │ ├── GH200_results_Meta-Llama-3-8B-Instruct-FP8.csv │ │ ├── kv_cache_Meta-Llama-3-8B-Instruct-FP8.csv │ │ ├── quant_all_results.csv │ │ ├── results_Meta-Llama-3-8B-Instruct-FP8.csv │ │ └── throughput_results_Meta-Llama-3-8B-Instruct-FP8 (1).csv ├── Fig_30 │ ├── 7B_A100_Plots_TRT_LLM.pdf │ └── Fig_30.ipynb ├── Fig_31 │ ├── Fig 31.ipynb │ └── vLLM_A100_H100_PVC_MI250_7B_Models.pdf ├── Fig_32 │ ├── Fig_32.ipynb │ └── llama_cpp_70B_Batch_size_across_hardware.pdf ├── Fig_33 │ ├── 7B_Batch_Size_Framework_Comparison.pdf │ └── Fig_33.ipynb ├── Fig_34 │ ├── 70B_A100_H100.pdf │ └── Fig_34.ipynb ├── Fig_35 │ ├── 7B_Models_Framework_Comparison_Batch_Size_vLLM_MI250.pdf │ └── Fig_35.ipynb ├── Fig_36 │ ├── 7B_Models_Framework_Comparison_Batch_Size_llamacpp_MI250.pdf │ └── Fig_36.ipynb ├── Fig_37 │ ├── Fig_37.ipynb │ └── MI250_70B_Batch_size_vLLM.pdf ├── Fig_38 │ ├── A100_Gaudi2_compare_70b.pdf │ └── Fig 38.ipynb ├── Fig_39 │ ├── Fig 39.ipynb │ └── mistral_7b_across_HW_comparison.pdf ├── Fig_40 │ ├── Fig 40.ipynb │ └── llama_3_8b_across_HW_token_length_compare.pdf ├── Fig_4_a │ ├── Fig 4 a.ipynb │ └── NAS.pdf ├── Fig_4_b │ ├── All_results.csv │ ├── Fig 4b.ipynb │ └── Speculative_decoding.pdf ├── Fig_5_a │ ├── Fig 5a.ipynb │ ├── TP_PP_Results.csv │ └── llama_3_TP_PP.pdf ├── Fig_5_b │ ├── Fig 5b.ipynb │ ├── Mixtral_TP_PP_EP.pdf │ └── TP_EP_PP_results.csv ├── Fig_6 │ ├── 7B_A100_H100.pdf │ └── Fig_6.ipynb ├── Fig_7 │ ├── 70B_Plots_TRT_LLM.pdf │ └── Fig_7.ipynb ├── Fig_8 │ ├── 7B_Models_A100_H100_PVC_MI250.pdf │ └── Fig_8.ipynb └── Fig_9 │ ├── 70B_Models_A100_H100_MI250.pdf │ └── Fig_9.ipynb ├── README.md ├── Sambaflow ├── README.md └── SN40L │ └── README.md ├── TensorRT-LLM ├── A100 │ ├── Benchmarking_Throughput │ │ ├── README.MD │ │ ├── convert_checkpoint.py │ │ ├── p-llama2-7b.sh │ │ ├── power_utils.py │ │ ├── q-llama2-7b.sh │ │ ├── requirements.txt │ │ ├── run.py │ │ ├── run_power.py │ │ ├── run_precision_bench.py │ │ └── utils.py │ └── README.MD ├── GH200 │ ├── README.MD │ ├── build-container.sh │ ├── convert_checkpoint.py │ ├── power_utils.py │ ├── run-container-power.sh │ ├── run-container-precision.sh │ ├── run-container-throughput.sh │ ├── run-power-bench.sh │ ├── run-precision-bench.sh │ ├── run-throughput-bench.sh │ ├── run.py │ ├── run_power.py │ ├── run_precision.py │ ├── trt-llm-gh200.def │ └── utils.py ├── H100 │ ├── README.MD │ ├── convert_checkpoint.py │ ├── p-llama2-7b.sh │ ├── power_utils.py │ ├── q-llama2-7b.sh │ ├── requirements.txt │ ├── run.py │ ├── run_power.py │ ├── run_precision_bench.py │ └── utils.py └── README.md ├── llama.cpp ├── A100 │ └── README.MD ├── GH200 │ ├── README.MD │ ├── build-container.sh │ ├── llama-cpp-gh200.def │ ├── llama2-7b.sh │ └── rc-llama2-7b.sh ├── H100 │ ├── README.MD │ ├── llama2-70b.sh │ ├── llama2-7b.sh │ ├── llama3-70b.sh │ ├── llama3-8b.sh │ ├── mistral-7b.sh │ ├── mixtral8x7b.sh │ ├── qwen2-72b.sh │ └── qwen2-7b.sh ├── MI250 │ ├── README.MD │ ├── llama2-70b.sh │ ├── llama2-7b.sh │ ├── llama3-70b.sh │ ├── llama3-8b.sh │ ├── mistral-7b.sh │ ├── mixtral8x7b.sh │ ├── qwen2-72b.sh │ └── qwen2-7b.sh ├── MI300X │ ├── README.md │ └── run-benchmark.sh ├── Max1550 │ ├── README.MD │ ├── llama2-70b.sh │ ├── llama2-7b.sh │ ├── llama3-70b.sh │ ├── llama3-8b.sh │ ├── mistral-7b.sh │ ├── mixtral7x8b.sh │ ├── qwen2-72b.sh │ └── qwen2-7b.sh └── README.md └── vLLM ├── A100 ├── Power_measurement │ ├── README.MD │ ├── benchmark_power.py │ ├── benchmark_power.sh │ ├── power_utils.py │ ├── requirements-common.txt │ └── requirements-cuda.txt ├── README.MD └── Throughput_measurement │ ├── README.MD │ ├── benchmark_throughput.py │ ├── benchmark_throughput.sh │ ├── requirements-common.txt │ └── requirements-cuda.txt ├── GH200 ├── README.MD ├── benchmark_latency.py ├── benchmark_power.py ├── build-container.sh ├── power_utils.py ├── run-container-power.sh ├── run-container-throughput.sh ├── run-power-bench.sh ├── run-throughput-bench.sh └── vllm-gh200.def ├── Gaudi2 └── README.MD ├── H100 ├── README.MD ├── benchmark_power.py ├── benchmark_throughput.py ├── power_utils.py ├── run-power-bench.sh └── run-throughput-bench.sh ├── MI250 ├── README.MD ├── benchmark_throughput.py ├── run-container.sh └── run-throughput-bench.sh ├── MI300X ├── README.md ├── benchmark_throughput.py └── run-benchmark.sh ├── Max1550 ├── README.md ├── benchmark_latency.py └── run-bench.sh └── README.md /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/.gitignore -------------------------------------------------------------------------------- /Deepspeed-MII/A100/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/A100/README.md -------------------------------------------------------------------------------- /Deepspeed-MII/A100/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/A100/pipeline.py -------------------------------------------------------------------------------- /Deepspeed-MII/A100/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/A100/requirements.txt -------------------------------------------------------------------------------- /Deepspeed-MII/Gaudi2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/README.md -------------------------------------------------------------------------------- /Deepspeed-MII/Gaudi2/habana_power.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/habana_power.py -------------------------------------------------------------------------------- /Deepspeed-MII/Gaudi2/run-power-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/run-power-bench.sh -------------------------------------------------------------------------------- /Deepspeed-MII/Gaudi2/run-throughput-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/run-throughput-bench.sh -------------------------------------------------------------------------------- /Deepspeed-MII/Gaudi2/run_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/run_generation.py -------------------------------------------------------------------------------- /Deepspeed-MII/Gaudi2/run_generation_power.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/run_generation_power.py -------------------------------------------------------------------------------- /Deepspeed-MII/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/README.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/LICENSE -------------------------------------------------------------------------------- /Plots/All_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/All_results.csv -------------------------------------------------------------------------------- /Plots/Fig_10/All_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_10/All_results.csv -------------------------------------------------------------------------------- /Plots/Fig_10/Figure 10.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_10/Figure 10.ipynb -------------------------------------------------------------------------------- /Plots/Fig_10/Perplexity_vs_Throughput_7B_vLLM_A100.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_10/Perplexity_vs_Throughput_7B_vLLM_A100.pdf -------------------------------------------------------------------------------- /Plots/Fig_11/DS_MII_7B_scaling_A100.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_11/DS_MII_7B_scaling_A100.pdf -------------------------------------------------------------------------------- /Plots/Fig_11/Fig_11.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_11/Fig_11.ipynb -------------------------------------------------------------------------------- /Plots/Fig_12/Fig 12.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_12/Fig 12.ipynb -------------------------------------------------------------------------------- /Plots/Fig_12/TRT_LLM_vLLM_DS_MII.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_12/TRT_LLM_vLLM_DS_MII.pdf -------------------------------------------------------------------------------- /Plots/Fig_13/Fig_13.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_13/Fig_13.ipynb -------------------------------------------------------------------------------- /Plots/Fig_13/llama_cpp_7B_Batch_size_across_hardware.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_13/llama_cpp_7B_Batch_size_across_hardware.pdf -------------------------------------------------------------------------------- /Plots/Fig_14/Fig 14.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_14/Fig 14.ipynb -------------------------------------------------------------------------------- /Plots/Fig_14/llama_cpp_7B_scaling_across_hardware.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_14/llama_cpp_7B_scaling_across_hardware.pdf -------------------------------------------------------------------------------- /Plots/Fig_15/7B_Models_Framework_Comparison_Batch_Size.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_15/7B_Models_Framework_Comparison_Batch_Size.pdf -------------------------------------------------------------------------------- /Plots/Fig_15/Fig 15.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_15/Fig 15.ipynb -------------------------------------------------------------------------------- /Plots/Fig_16_a/Fig 16 a.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a/Fig 16 a.ipynb -------------------------------------------------------------------------------- /Plots/Fig_16_a/llama_3_8b_A100_H100_power.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a/llama_3_8b_A100_H100_power.pdf -------------------------------------------------------------------------------- /Plots/Fig_16_a/power_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a/power_results.csv -------------------------------------------------------------------------------- /Plots/Fig_16_a_b/Fig 16 a b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a_b/Fig 16 a b.ipynb -------------------------------------------------------------------------------- /Plots/Fig_16_a_b/Fig 16 a.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a_b/Fig 16 a.ipynb -------------------------------------------------------------------------------- /Plots/Fig_16_a_b/Fig 16 b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a_b/Fig 16 b.ipynb -------------------------------------------------------------------------------- /Plots/Fig_16_a_b/llama_3_8b_power_perf_per_watt.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a_b/llama_3_8b_power_perf_per_watt.pdf -------------------------------------------------------------------------------- /Plots/Fig_16_a_b/power_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a_b/power_results.csv -------------------------------------------------------------------------------- /Plots/Fig_16_b/Fig 16 b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_b/Fig 16 b.ipynb -------------------------------------------------------------------------------- /Plots/Fig_16_b/llama_3_8b_perf_per_watt.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_b/llama_3_8b_perf_per_watt.pdf -------------------------------------------------------------------------------- /Plots/Fig_16_b/power_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_b/power_results.csv -------------------------------------------------------------------------------- /Plots/Fig_17/Fig_17.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_17/Fig_17.ipynb -------------------------------------------------------------------------------- /Plots/Fig_17/llama_3_batch_size_num_GPUs_input_output_size.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_17/llama_3_batch_size_num_GPUs_input_output_size.pdf -------------------------------------------------------------------------------- /Plots/Fig_18/Fig 18.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_18/Fig 18.ipynb -------------------------------------------------------------------------------- /Plots/Fig_18/SN40L_A100_7B.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_18/SN40L_A100_7B.pdf -------------------------------------------------------------------------------- /Plots/Fig_18/SN40L_latency_throughput.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_18/SN40L_latency_throughput.csv -------------------------------------------------------------------------------- /Plots/Fig_18/get_SN40L_results.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_18/get_SN40L_results.ipynb -------------------------------------------------------------------------------- /Plots/Fig_19/Fig 19.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_19/Fig 19.ipynb -------------------------------------------------------------------------------- /Plots/Fig_19/SN40L_A100_H100_70B.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_19/SN40L_A100_H100_70B.pdf -------------------------------------------------------------------------------- /Plots/Fig_1_a/Fig_1_a.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_1_a/Fig_1_a.ipynb -------------------------------------------------------------------------------- /Plots/Fig_1_a/llama_3_batch_size_vs_input_length_vLLM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_1_a/llama_3_batch_size_vs_input_length_vLLM.pdf -------------------------------------------------------------------------------- /Plots/Fig_1_b/Heatmap.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_1_b/Heatmap.ipynb -------------------------------------------------------------------------------- /Plots/Fig_1_b/Heatmap_input_vs_output.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_1_b/Heatmap_input_vs_output.csv -------------------------------------------------------------------------------- /Plots/Fig_1_b/llama_3_8B_Heatmap_TensorRT_LLM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_1_b/llama_3_8B_Heatmap_TensorRT_LLM.pdf -------------------------------------------------------------------------------- /Plots/Fig_20/A100_Gaudi2_compare.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_20/A100_Gaudi2_compare.pdf -------------------------------------------------------------------------------- /Plots/Fig_20/Fig 20.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_20/Fig 20.ipynb -------------------------------------------------------------------------------- /Plots/Fig_21/Fig 21.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_21/Fig 21.ipynb -------------------------------------------------------------------------------- /Plots/Fig_21/TTFT.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_21/TTFT.pdf -------------------------------------------------------------------------------- /Plots/Fig_21/all_results_TTFT.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_21/all_results_TTFT.csv -------------------------------------------------------------------------------- /Plots/Fig_22/Fig 22.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_22/Fig 22.ipynb -------------------------------------------------------------------------------- /Plots/Fig_22/ITL.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_22/ITL.pdf -------------------------------------------------------------------------------- /Plots/Fig_22/all_results_TTFT_ITL.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_22/all_results_TTFT_ITL.csv -------------------------------------------------------------------------------- /Plots/Fig_23/Fig 23.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_23/Fig 23.ipynb -------------------------------------------------------------------------------- /Plots/Fig_23/llama_3_8b_across_HW_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_23/llama_3_8b_across_HW_comparison.pdf -------------------------------------------------------------------------------- /Plots/Fig_24/Fig 24.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_24/Fig 24.ipynb -------------------------------------------------------------------------------- /Plots/Fig_24/llama_3_8b_across_HW_token_length_compare.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_24/llama_3_8b_across_HW_token_length_compare.pdf -------------------------------------------------------------------------------- /Plots/Fig_25/Fig 25.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_25/Fig 25.ipynb -------------------------------------------------------------------------------- /Plots/Fig_25/HW_peak_perf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_25/HW_peak_perf.pdf -------------------------------------------------------------------------------- /Plots/Fig_29/All_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_29/All_results.csv -------------------------------------------------------------------------------- /Plots/Fig_29/Fig 29.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_29/Fig 29.ipynb -------------------------------------------------------------------------------- /Plots/Fig_29/Perplexity_vs_Throughput_7B_vLLM_H100.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_29/Perplexity_vs_Throughput_7B_vLLM_H100.pdf -------------------------------------------------------------------------------- /Plots/Fig_2_a/All_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_a/All_results.csv -------------------------------------------------------------------------------- /Plots/Fig_2_a/Fig 2 a.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_a/Fig 2 a.ipynb -------------------------------------------------------------------------------- /Plots/Fig_2_a/w_o_KV_Cache_70B.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_a/w_o_KV_Cache_70B.pdf -------------------------------------------------------------------------------- /Plots/Fig_2_b/All_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_b/All_results.csv -------------------------------------------------------------------------------- /Plots/Fig_2_b/Fig 2 b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_b/Fig 2 b.ipynb -------------------------------------------------------------------------------- /Plots/Fig_2_b/KV_Cache_Block_size_llama_3_8B.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_b/KV_Cache_Block_size_llama_3_8B.pdf -------------------------------------------------------------------------------- /Plots/Fig_3/A100_H100_quant_all_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/A100_H100_quant_all_results.csv -------------------------------------------------------------------------------- /Plots/Fig_3/Fig 3.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/Fig 3.ipynb -------------------------------------------------------------------------------- /Plots/Fig_3/LLaMA_3_8b_quant.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/LLaMA_3_8b_quant.pdf -------------------------------------------------------------------------------- /Plots/Fig_3/qaunt_results/GH200_results_Meta-Llama-3-8B-Instruct-FP8.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/qaunt_results/GH200_results_Meta-Llama-3-8B-Instruct-FP8.csv -------------------------------------------------------------------------------- /Plots/Fig_3/qaunt_results/kv_cache_Meta-Llama-3-8B-Instruct-FP8.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/qaunt_results/kv_cache_Meta-Llama-3-8B-Instruct-FP8.csv -------------------------------------------------------------------------------- /Plots/Fig_3/qaunt_results/quant_all_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/qaunt_results/quant_all_results.csv -------------------------------------------------------------------------------- /Plots/Fig_3/qaunt_results/results_Meta-Llama-3-8B-Instruct-FP8.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/qaunt_results/results_Meta-Llama-3-8B-Instruct-FP8.csv -------------------------------------------------------------------------------- /Plots/Fig_3/qaunt_results/throughput_results_Meta-Llama-3-8B-Instruct-FP8 (1).csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/qaunt_results/throughput_results_Meta-Llama-3-8B-Instruct-FP8 (1).csv -------------------------------------------------------------------------------- /Plots/Fig_30/7B_A100_Plots_TRT_LLM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_30/7B_A100_Plots_TRT_LLM.pdf -------------------------------------------------------------------------------- /Plots/Fig_30/Fig_30.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_30/Fig_30.ipynb -------------------------------------------------------------------------------- /Plots/Fig_31/Fig 31.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_31/Fig 31.ipynb -------------------------------------------------------------------------------- /Plots/Fig_31/vLLM_A100_H100_PVC_MI250_7B_Models.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_31/vLLM_A100_H100_PVC_MI250_7B_Models.pdf -------------------------------------------------------------------------------- /Plots/Fig_32/Fig_32.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_32/Fig_32.ipynb -------------------------------------------------------------------------------- /Plots/Fig_32/llama_cpp_70B_Batch_size_across_hardware.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_32/llama_cpp_70B_Batch_size_across_hardware.pdf -------------------------------------------------------------------------------- /Plots/Fig_33/7B_Batch_Size_Framework_Comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_33/7B_Batch_Size_Framework_Comparison.pdf -------------------------------------------------------------------------------- /Plots/Fig_33/Fig_33.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_33/Fig_33.ipynb -------------------------------------------------------------------------------- /Plots/Fig_34/70B_A100_H100.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_34/70B_A100_H100.pdf -------------------------------------------------------------------------------- /Plots/Fig_34/Fig_34.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_34/Fig_34.ipynb -------------------------------------------------------------------------------- /Plots/Fig_35/7B_Models_Framework_Comparison_Batch_Size_vLLM_MI250.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_35/7B_Models_Framework_Comparison_Batch_Size_vLLM_MI250.pdf -------------------------------------------------------------------------------- /Plots/Fig_35/Fig_35.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_35/Fig_35.ipynb -------------------------------------------------------------------------------- /Plots/Fig_36/7B_Models_Framework_Comparison_Batch_Size_llamacpp_MI250.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_36/7B_Models_Framework_Comparison_Batch_Size_llamacpp_MI250.pdf -------------------------------------------------------------------------------- /Plots/Fig_36/Fig_36.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_36/Fig_36.ipynb -------------------------------------------------------------------------------- /Plots/Fig_37/Fig_37.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_37/Fig_37.ipynb -------------------------------------------------------------------------------- /Plots/Fig_37/MI250_70B_Batch_size_vLLM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_37/MI250_70B_Batch_size_vLLM.pdf -------------------------------------------------------------------------------- /Plots/Fig_38/A100_Gaudi2_compare_70b.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_38/A100_Gaudi2_compare_70b.pdf -------------------------------------------------------------------------------- /Plots/Fig_38/Fig 38.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_38/Fig 38.ipynb -------------------------------------------------------------------------------- /Plots/Fig_39/Fig 39.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_39/Fig 39.ipynb -------------------------------------------------------------------------------- /Plots/Fig_39/mistral_7b_across_HW_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_39/mistral_7b_across_HW_comparison.pdf -------------------------------------------------------------------------------- /Plots/Fig_40/Fig 40.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_40/Fig 40.ipynb -------------------------------------------------------------------------------- /Plots/Fig_40/llama_3_8b_across_HW_token_length_compare.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_40/llama_3_8b_across_HW_token_length_compare.pdf -------------------------------------------------------------------------------- /Plots/Fig_4_a/Fig 4 a.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_4_a/Fig 4 a.ipynb -------------------------------------------------------------------------------- /Plots/Fig_4_a/NAS.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_4_a/NAS.pdf -------------------------------------------------------------------------------- /Plots/Fig_4_b/All_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_4_b/All_results.csv -------------------------------------------------------------------------------- /Plots/Fig_4_b/Fig 4b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_4_b/Fig 4b.ipynb -------------------------------------------------------------------------------- /Plots/Fig_4_b/Speculative_decoding.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_4_b/Speculative_decoding.pdf -------------------------------------------------------------------------------- /Plots/Fig_5_a/Fig 5a.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_a/Fig 5a.ipynb -------------------------------------------------------------------------------- /Plots/Fig_5_a/TP_PP_Results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_a/TP_PP_Results.csv -------------------------------------------------------------------------------- /Plots/Fig_5_a/llama_3_TP_PP.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_a/llama_3_TP_PP.pdf -------------------------------------------------------------------------------- /Plots/Fig_5_b/Fig 5b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_b/Fig 5b.ipynb -------------------------------------------------------------------------------- /Plots/Fig_5_b/Mixtral_TP_PP_EP.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_b/Mixtral_TP_PP_EP.pdf -------------------------------------------------------------------------------- /Plots/Fig_5_b/TP_EP_PP_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_b/TP_EP_PP_results.csv -------------------------------------------------------------------------------- /Plots/Fig_6/7B_A100_H100.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_6/7B_A100_H100.pdf -------------------------------------------------------------------------------- /Plots/Fig_6/Fig_6.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_6/Fig_6.ipynb -------------------------------------------------------------------------------- /Plots/Fig_7/70B_Plots_TRT_LLM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_7/70B_Plots_TRT_LLM.pdf -------------------------------------------------------------------------------- /Plots/Fig_7/Fig_7.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_7/Fig_7.ipynb -------------------------------------------------------------------------------- /Plots/Fig_8/7B_Models_A100_H100_PVC_MI250.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_8/7B_Models_A100_H100_PVC_MI250.pdf -------------------------------------------------------------------------------- /Plots/Fig_8/Fig_8.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_8/Fig_8.ipynb -------------------------------------------------------------------------------- /Plots/Fig_9/70B_Models_A100_H100_MI250.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_9/70B_Models_A100_H100_MI250.pdf -------------------------------------------------------------------------------- /Plots/Fig_9/Fig_9.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_9/Fig_9.ipynb -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/README.md -------------------------------------------------------------------------------- /Sambaflow/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Sambaflow/README.md -------------------------------------------------------------------------------- /Sambaflow/SN40L/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Sambaflow/SN40L/README.md -------------------------------------------------------------------------------- /TensorRT-LLM/A100/Benchmarking_Throughput/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/README.MD -------------------------------------------------------------------------------- /TensorRT-LLM/A100/Benchmarking_Throughput/convert_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/convert_checkpoint.py -------------------------------------------------------------------------------- /TensorRT-LLM/A100/Benchmarking_Throughput/p-llama2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/p-llama2-7b.sh -------------------------------------------------------------------------------- /TensorRT-LLM/A100/Benchmarking_Throughput/power_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/power_utils.py -------------------------------------------------------------------------------- /TensorRT-LLM/A100/Benchmarking_Throughput/q-llama2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/q-llama2-7b.sh -------------------------------------------------------------------------------- /TensorRT-LLM/A100/Benchmarking_Throughput/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/requirements.txt -------------------------------------------------------------------------------- /TensorRT-LLM/A100/Benchmarking_Throughput/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/run.py -------------------------------------------------------------------------------- /TensorRT-LLM/A100/Benchmarking_Throughput/run_power.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/run_power.py -------------------------------------------------------------------------------- /TensorRT-LLM/A100/Benchmarking_Throughput/run_precision_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/run_precision_bench.py -------------------------------------------------------------------------------- /TensorRT-LLM/A100/Benchmarking_Throughput/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/utils.py -------------------------------------------------------------------------------- /TensorRT-LLM/A100/README.MD: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/README.MD -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/build-container.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/build-container.sh -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/convert_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/convert_checkpoint.py -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/power_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/power_utils.py -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/run-container-power.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-container-power.sh -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/run-container-precision.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-container-precision.sh -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/run-container-throughput.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-container-throughput.sh -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/run-power-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-power-bench.sh -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/run-precision-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-precision-bench.sh -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/run-throughput-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-throughput-bench.sh -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run.py -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/run_power.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run_power.py -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/run_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run_precision.py -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/trt-llm-gh200.def: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/trt-llm-gh200.def -------------------------------------------------------------------------------- /TensorRT-LLM/GH200/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/utils.py -------------------------------------------------------------------------------- /TensorRT-LLM/H100/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/README.MD -------------------------------------------------------------------------------- /TensorRT-LLM/H100/convert_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/convert_checkpoint.py -------------------------------------------------------------------------------- /TensorRT-LLM/H100/p-llama2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/p-llama2-7b.sh -------------------------------------------------------------------------------- /TensorRT-LLM/H100/power_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/power_utils.py -------------------------------------------------------------------------------- /TensorRT-LLM/H100/q-llama2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/q-llama2-7b.sh -------------------------------------------------------------------------------- /TensorRT-LLM/H100/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/requirements.txt -------------------------------------------------------------------------------- /TensorRT-LLM/H100/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/run.py -------------------------------------------------------------------------------- /TensorRT-LLM/H100/run_power.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/run_power.py -------------------------------------------------------------------------------- /TensorRT-LLM/H100/run_precision_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/run_precision_bench.py -------------------------------------------------------------------------------- /TensorRT-LLM/H100/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/utils.py -------------------------------------------------------------------------------- /TensorRT-LLM/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/README.md -------------------------------------------------------------------------------- /llama.cpp/A100/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/A100/README.MD -------------------------------------------------------------------------------- /llama.cpp/GH200/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/GH200/README.MD -------------------------------------------------------------------------------- /llama.cpp/GH200/build-container.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/GH200/build-container.sh -------------------------------------------------------------------------------- /llama.cpp/GH200/llama-cpp-gh200.def: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/GH200/llama-cpp-gh200.def -------------------------------------------------------------------------------- /llama.cpp/GH200/llama2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/GH200/llama2-7b.sh -------------------------------------------------------------------------------- /llama.cpp/GH200/rc-llama2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/GH200/rc-llama2-7b.sh -------------------------------------------------------------------------------- /llama.cpp/H100/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/README.MD -------------------------------------------------------------------------------- /llama.cpp/H100/llama2-70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/llama2-70b.sh -------------------------------------------------------------------------------- /llama.cpp/H100/llama2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/llama2-7b.sh -------------------------------------------------------------------------------- /llama.cpp/H100/llama3-70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/llama3-70b.sh -------------------------------------------------------------------------------- /llama.cpp/H100/llama3-8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/llama3-8b.sh -------------------------------------------------------------------------------- /llama.cpp/H100/mistral-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/mistral-7b.sh -------------------------------------------------------------------------------- /llama.cpp/H100/mixtral8x7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/mixtral8x7b.sh -------------------------------------------------------------------------------- /llama.cpp/H100/qwen2-72b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/qwen2-72b.sh -------------------------------------------------------------------------------- /llama.cpp/H100/qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/qwen2-7b.sh -------------------------------------------------------------------------------- /llama.cpp/MI250/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/README.MD -------------------------------------------------------------------------------- /llama.cpp/MI250/llama2-70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/llama2-70b.sh -------------------------------------------------------------------------------- /llama.cpp/MI250/llama2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/llama2-7b.sh -------------------------------------------------------------------------------- /llama.cpp/MI250/llama3-70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/llama3-70b.sh -------------------------------------------------------------------------------- /llama.cpp/MI250/llama3-8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/llama3-8b.sh -------------------------------------------------------------------------------- /llama.cpp/MI250/mistral-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/mistral-7b.sh -------------------------------------------------------------------------------- /llama.cpp/MI250/mixtral8x7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/mixtral8x7b.sh -------------------------------------------------------------------------------- /llama.cpp/MI250/qwen2-72b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/qwen2-72b.sh -------------------------------------------------------------------------------- /llama.cpp/MI250/qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/qwen2-7b.sh -------------------------------------------------------------------------------- /llama.cpp/MI300X/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI300X/README.md -------------------------------------------------------------------------------- /llama.cpp/MI300X/run-benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI300X/run-benchmark.sh -------------------------------------------------------------------------------- /llama.cpp/Max1550/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/README.MD -------------------------------------------------------------------------------- /llama.cpp/Max1550/llama2-70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/llama2-70b.sh -------------------------------------------------------------------------------- /llama.cpp/Max1550/llama2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/llama2-7b.sh -------------------------------------------------------------------------------- /llama.cpp/Max1550/llama3-70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/llama3-70b.sh -------------------------------------------------------------------------------- /llama.cpp/Max1550/llama3-8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/llama3-8b.sh -------------------------------------------------------------------------------- /llama.cpp/Max1550/mistral-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/mistral-7b.sh -------------------------------------------------------------------------------- /llama.cpp/Max1550/mixtral7x8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/mixtral7x8b.sh -------------------------------------------------------------------------------- /llama.cpp/Max1550/qwen2-72b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/qwen2-72b.sh -------------------------------------------------------------------------------- /llama.cpp/Max1550/qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/qwen2-7b.sh -------------------------------------------------------------------------------- /llama.cpp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/README.md -------------------------------------------------------------------------------- /vLLM/A100/Power_measurement/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/README.MD -------------------------------------------------------------------------------- /vLLM/A100/Power_measurement/benchmark_power.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/benchmark_power.py -------------------------------------------------------------------------------- /vLLM/A100/Power_measurement/benchmark_power.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/benchmark_power.sh -------------------------------------------------------------------------------- /vLLM/A100/Power_measurement/power_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/power_utils.py -------------------------------------------------------------------------------- /vLLM/A100/Power_measurement/requirements-common.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/requirements-common.txt -------------------------------------------------------------------------------- /vLLM/A100/Power_measurement/requirements-cuda.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/requirements-cuda.txt -------------------------------------------------------------------------------- /vLLM/A100/README.MD: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vLLM/A100/Throughput_measurement/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Throughput_measurement/README.MD -------------------------------------------------------------------------------- /vLLM/A100/Throughput_measurement/benchmark_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Throughput_measurement/benchmark_throughput.py -------------------------------------------------------------------------------- /vLLM/A100/Throughput_measurement/benchmark_throughput.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Throughput_measurement/benchmark_throughput.sh -------------------------------------------------------------------------------- /vLLM/A100/Throughput_measurement/requirements-common.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Throughput_measurement/requirements-common.txt -------------------------------------------------------------------------------- /vLLM/A100/Throughput_measurement/requirements-cuda.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Throughput_measurement/requirements-cuda.txt -------------------------------------------------------------------------------- /vLLM/GH200/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/README.MD -------------------------------------------------------------------------------- /vLLM/GH200/benchmark_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/benchmark_latency.py -------------------------------------------------------------------------------- /vLLM/GH200/benchmark_power.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/benchmark_power.py -------------------------------------------------------------------------------- /vLLM/GH200/build-container.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/build-container.sh -------------------------------------------------------------------------------- /vLLM/GH200/power_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/power_utils.py -------------------------------------------------------------------------------- /vLLM/GH200/run-container-power.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/run-container-power.sh -------------------------------------------------------------------------------- /vLLM/GH200/run-container-throughput.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/run-container-throughput.sh -------------------------------------------------------------------------------- /vLLM/GH200/run-power-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/run-power-bench.sh -------------------------------------------------------------------------------- /vLLM/GH200/run-throughput-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/run-throughput-bench.sh -------------------------------------------------------------------------------- /vLLM/GH200/vllm-gh200.def: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/vllm-gh200.def -------------------------------------------------------------------------------- /vLLM/Gaudi2/README.MD: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vLLM/H100/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/README.MD -------------------------------------------------------------------------------- /vLLM/H100/benchmark_power.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/benchmark_power.py -------------------------------------------------------------------------------- /vLLM/H100/benchmark_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/benchmark_throughput.py -------------------------------------------------------------------------------- /vLLM/H100/power_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/power_utils.py -------------------------------------------------------------------------------- /vLLM/H100/run-power-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/run-power-bench.sh -------------------------------------------------------------------------------- /vLLM/H100/run-throughput-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/run-throughput-bench.sh -------------------------------------------------------------------------------- /vLLM/MI250/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI250/README.MD -------------------------------------------------------------------------------- /vLLM/MI250/benchmark_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI250/benchmark_throughput.py -------------------------------------------------------------------------------- /vLLM/MI250/run-container.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI250/run-container.sh -------------------------------------------------------------------------------- /vLLM/MI250/run-throughput-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI250/run-throughput-bench.sh -------------------------------------------------------------------------------- /vLLM/MI300X/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI300X/README.md -------------------------------------------------------------------------------- /vLLM/MI300X/benchmark_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI300X/benchmark_throughput.py -------------------------------------------------------------------------------- /vLLM/MI300X/run-benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI300X/run-benchmark.sh -------------------------------------------------------------------------------- /vLLM/Max1550/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/Max1550/README.md -------------------------------------------------------------------------------- /vLLM/Max1550/benchmark_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/Max1550/benchmark_latency.py -------------------------------------------------------------------------------- /vLLM/Max1550/run-bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/Max1550/run-bench.sh -------------------------------------------------------------------------------- /vLLM/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/README.md --------------------------------------------------------------------------------