├── .gitignore
├── Deepspeed-MII
    ├── A100
    │   ├── README.md
    │   ├── pipeline.py
    │   └── requirements.txt
    ├── Gaudi2
    │   ├── README.md
    │   ├── habana_power.py
    │   ├── run-power-bench.sh
    │   ├── run-throughput-bench.sh
    │   ├── run_generation.py
    │   └── run_generation_power.py
    └── README.md
├── LICENSE
├── Plots
    ├── All_results.csv
    ├── Fig_10
    │   ├── All_results.csv
    │   ├── Figure 10.ipynb
    │   └── Perplexity_vs_Throughput_7B_vLLM_A100.pdf
    ├── Fig_11
    │   ├── DS_MII_7B_scaling_A100.pdf
    │   └── Fig_11.ipynb
    ├── Fig_12
    │   ├── Fig 12.ipynb
    │   └── TRT_LLM_vLLM_DS_MII.pdf
    ├── Fig_13
    │   ├── Fig_13.ipynb
    │   └── llama_cpp_7B_Batch_size_across_hardware.pdf
    ├── Fig_14
    │   ├── Fig 14.ipynb
    │   └── llama_cpp_7B_scaling_across_hardware.pdf
    ├── Fig_15
    │   ├── 7B_Models_Framework_Comparison_Batch_Size.pdf
    │   └── Fig 15.ipynb
    ├── Fig_16_a
    │   ├── Fig 16 a.ipynb
    │   ├── llama_3_8b_A100_H100_power.pdf
    │   └── power_results.csv
    ├── Fig_16_a_b
    │   ├── Fig 16 a b.ipynb
    │   ├── Fig 16 a.ipynb
    │   ├── Fig 16 b.ipynb
    │   ├── llama_3_8b_power_perf_per_watt.pdf
    │   └── power_results.csv
    ├── Fig_16_b
    │   ├── Fig 16 b.ipynb
    │   ├── llama_3_8b_perf_per_watt.pdf
    │   └── power_results.csv
    ├── Fig_17
    │   ├── Fig_17.ipynb
    │   └── llama_3_batch_size_num_GPUs_input_output_size.pdf
    ├── Fig_18
    │   ├── Fig 18.ipynb
    │   ├── SN40L_A100_7B.pdf
    │   ├── SN40L_latency_throughput.csv
    │   └── get_SN40L_results.ipynb
    ├── Fig_19
    │   ├── Fig 19.ipynb
    │   └── SN40L_A100_H100_70B.pdf
    ├── Fig_1_a
    │   ├── Fig_1_a.ipynb
    │   └── llama_3_batch_size_vs_input_length_vLLM.pdf
    ├── Fig_1_b
    │   ├── Heatmap.ipynb
    │   ├── Heatmap_input_vs_output.csv
    │   └── llama_3_8B_Heatmap_TensorRT_LLM.pdf
    ├── Fig_20
    │   ├── A100_Gaudi2_compare.pdf
    │   └── Fig 20.ipynb
    ├── Fig_21
    │   ├── Fig 21.ipynb
    │   ├── TTFT.pdf
    │   └── all_results_TTFT.csv
    ├── Fig_22
    │   ├── Fig 22.ipynb
    │   ├── ITL.pdf
    │   └── all_results_TTFT_ITL.csv
    ├── Fig_23
    │   ├── Fig 23.ipynb
    │   └── llama_3_8b_across_HW_comparison.pdf
    ├── Fig_24
    │   ├── Fig 24.ipynb
    │   └── llama_3_8b_across_HW_token_length_compare.pdf
    ├── Fig_25
    │   ├── Fig 25.ipynb
    │   └── HW_peak_perf.pdf
    ├── Fig_29
    │   ├── All_results.csv
    │   ├── Fig 29.ipynb
    │   └── Perplexity_vs_Throughput_7B_vLLM_H100.pdf
    ├── Fig_2_a
    │   ├── All_results.csv
    │   ├── Fig 2 a.ipynb
    │   └── w_o_KV_Cache_70B.pdf
    ├── Fig_2_b
    │   ├── All_results.csv
    │   ├── Fig 2 b.ipynb
    │   └── KV_Cache_Block_size_llama_3_8B.pdf
    ├── Fig_3
    │   ├── A100_H100_quant_all_results.csv
    │   ├── Fig 3.ipynb
    │   ├── LLaMA_3_8b_quant.pdf
    │   └── qaunt_results
    │   │   ├── GH200_results_Meta-Llama-3-8B-Instruct-FP8.csv
    │   │   ├── kv_cache_Meta-Llama-3-8B-Instruct-FP8.csv
    │   │   ├── quant_all_results.csv
    │   │   ├── results_Meta-Llama-3-8B-Instruct-FP8.csv
    │   │   └── throughput_results_Meta-Llama-3-8B-Instruct-FP8 (1).csv
    ├── Fig_30
    │   ├── 7B_A100_Plots_TRT_LLM.pdf
    │   └── Fig_30.ipynb
    ├── Fig_31
    │   ├── Fig 31.ipynb
    │   └── vLLM_A100_H100_PVC_MI250_7B_Models.pdf
    ├── Fig_32
    │   ├── Fig_32.ipynb
    │   └── llama_cpp_70B_Batch_size_across_hardware.pdf
    ├── Fig_33
    │   ├── 7B_Batch_Size_Framework_Comparison.pdf
    │   └── Fig_33.ipynb
    ├── Fig_34
    │   ├── 70B_A100_H100.pdf
    │   └── Fig_34.ipynb
    ├── Fig_35
    │   ├── 7B_Models_Framework_Comparison_Batch_Size_vLLM_MI250.pdf
    │   └── Fig_35.ipynb
    ├── Fig_36
    │   ├── 7B_Models_Framework_Comparison_Batch_Size_llamacpp_MI250.pdf
    │   └── Fig_36.ipynb
    ├── Fig_37
    │   ├── Fig_37.ipynb
    │   └── MI250_70B_Batch_size_vLLM.pdf
    ├── Fig_38
    │   ├── A100_Gaudi2_compare_70b.pdf
    │   └── Fig 38.ipynb
    ├── Fig_39
    │   ├── Fig 39.ipynb
    │   └── mistral_7b_across_HW_comparison.pdf
    ├── Fig_40
    │   ├── Fig 40.ipynb
    │   └── llama_3_8b_across_HW_token_length_compare.pdf
    ├── Fig_4_a
    │   ├── Fig 4 a.ipynb
    │   └── NAS.pdf
    ├── Fig_4_b
    │   ├── All_results.csv
    │   ├── Fig 4b.ipynb
    │   └── Speculative_decoding.pdf
    ├── Fig_5_a
    │   ├── Fig 5a.ipynb
    │   ├── TP_PP_Results.csv
    │   └── llama_3_TP_PP.pdf
    ├── Fig_5_b
    │   ├── Fig 5b.ipynb
    │   ├── Mixtral_TP_PP_EP.pdf
    │   └── TP_EP_PP_results.csv
    ├── Fig_6
    │   ├── 7B_A100_H100.pdf
    │   └── Fig_6.ipynb
    ├── Fig_7
    │   ├── 70B_Plots_TRT_LLM.pdf
    │   └── Fig_7.ipynb
    ├── Fig_8
    │   ├── 7B_Models_A100_H100_PVC_MI250.pdf
    │   └── Fig_8.ipynb
    └── Fig_9
    │   ├── 70B_Models_A100_H100_MI250.pdf
    │   └── Fig_9.ipynb
├── README.md
├── Sambaflow
    ├── README.md
    └── SN40L
    │   └── README.md
├── TensorRT-LLM
    ├── A100
    │   ├── Benchmarking_Throughput
    │   │   ├── README.MD
    │   │   ├── convert_checkpoint.py
    │   │   ├── p-llama2-7b.sh
    │   │   ├── power_utils.py
    │   │   ├── q-llama2-7b.sh
    │   │   ├── requirements.txt
    │   │   ├── run.py
    │   │   ├── run_power.py
    │   │   ├── run_precision_bench.py
    │   │   └── utils.py
    │   └── README.MD
    ├── GH200
    │   ├── README.MD
    │   ├── build-container.sh
    │   ├── convert_checkpoint.py
    │   ├── power_utils.py
    │   ├── run-container-power.sh
    │   ├── run-container-precision.sh
    │   ├── run-container-throughput.sh
    │   ├── run-power-bench.sh
    │   ├── run-precision-bench.sh
    │   ├── run-throughput-bench.sh
    │   ├── run.py
    │   ├── run_power.py
    │   ├── run_precision.py
    │   ├── trt-llm-gh200.def
    │   └── utils.py
    ├── H100
    │   ├── README.MD
    │   ├── convert_checkpoint.py
    │   ├── p-llama2-7b.sh
    │   ├── power_utils.py
    │   ├── q-llama2-7b.sh
    │   ├── requirements.txt
    │   ├── run.py
    │   ├── run_power.py
    │   ├── run_precision_bench.py
    │   └── utils.py
    └── README.md
├── llama.cpp
    ├── A100
    │   └── README.MD
    ├── GH200
    │   ├── README.MD
    │   ├── build-container.sh
    │   ├── llama-cpp-gh200.def
    │   ├── llama2-7b.sh
    │   └── rc-llama2-7b.sh
    ├── H100
    │   ├── README.MD
    │   ├── llama2-70b.sh
    │   ├── llama2-7b.sh
    │   ├── llama3-70b.sh
    │   ├── llama3-8b.sh
    │   ├── mistral-7b.sh
    │   ├── mixtral8x7b.sh
    │   ├── qwen2-72b.sh
    │   └── qwen2-7b.sh
    ├── MI250
    │   ├── README.MD
    │   ├── llama2-70b.sh
    │   ├── llama2-7b.sh
    │   ├── llama3-70b.sh
    │   ├── llama3-8b.sh
    │   ├── mistral-7b.sh
    │   ├── mixtral8x7b.sh
    │   ├── qwen2-72b.sh
    │   └── qwen2-7b.sh
    ├── MI300X
    │   ├── README.md
    │   └── run-benchmark.sh
    ├── Max1550
    │   ├── README.MD
    │   ├── llama2-70b.sh
    │   ├── llama2-7b.sh
    │   ├── llama3-70b.sh
    │   ├── llama3-8b.sh
    │   ├── mistral-7b.sh
    │   ├── mixtral7x8b.sh
    │   ├── qwen2-72b.sh
    │   └── qwen2-7b.sh
    └── README.md
└── vLLM
    ├── A100
        ├── Power_measurement
        │   ├── README.MD
        │   ├── benchmark_power.py
        │   ├── benchmark_power.sh
        │   ├── power_utils.py
        │   ├── requirements-common.txt
        │   └── requirements-cuda.txt
        ├── README.MD
        └── Throughput_measurement
        │   ├── README.MD
        │   ├── benchmark_throughput.py
        │   ├── benchmark_throughput.sh
        │   ├── requirements-common.txt
        │   └── requirements-cuda.txt
    ├── GH200
        ├── README.MD
        ├── benchmark_latency.py
        ├── benchmark_power.py
        ├── build-container.sh
        ├── power_utils.py
        ├── run-container-power.sh
        ├── run-container-throughput.sh
        ├── run-power-bench.sh
        ├── run-throughput-bench.sh
        └── vllm-gh200.def
    ├── Gaudi2
        └── README.MD
    ├── H100
        ├── README.MD
        ├── benchmark_power.py
        ├── benchmark_throughput.py
        ├── power_utils.py
        ├── run-power-bench.sh
        └── run-throughput-bench.sh
    ├── MI250
        ├── README.MD
        ├── benchmark_throughput.py
        ├── run-container.sh
        └── run-throughput-bench.sh
    ├── MI300X
        ├── README.md
        ├── benchmark_throughput.py
        └── run-benchmark.sh
    ├── Max1550
        ├── README.md
        ├── benchmark_latency.py
        └── run-bench.sh
    └── README.md


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/.gitignore


--------------------------------------------------------------------------------
/Deepspeed-MII/A100/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/A100/README.md


--------------------------------------------------------------------------------
/Deepspeed-MII/A100/pipeline.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/A100/pipeline.py


--------------------------------------------------------------------------------
/Deepspeed-MII/A100/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/A100/requirements.txt


--------------------------------------------------------------------------------
/Deepspeed-MII/Gaudi2/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/README.md


--------------------------------------------------------------------------------
/Deepspeed-MII/Gaudi2/habana_power.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/habana_power.py


--------------------------------------------------------------------------------
/Deepspeed-MII/Gaudi2/run-power-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/run-power-bench.sh


--------------------------------------------------------------------------------
/Deepspeed-MII/Gaudi2/run-throughput-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/run-throughput-bench.sh


--------------------------------------------------------------------------------
/Deepspeed-MII/Gaudi2/run_generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/run_generation.py


--------------------------------------------------------------------------------
/Deepspeed-MII/Gaudi2/run_generation_power.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/Gaudi2/run_generation_power.py


--------------------------------------------------------------------------------
/Deepspeed-MII/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Deepspeed-MII/README.md


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/LICENSE


--------------------------------------------------------------------------------
/Plots/All_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/All_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_10/All_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_10/All_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_10/Figure 10.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_10/Figure 10.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_10/Perplexity_vs_Throughput_7B_vLLM_A100.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_10/Perplexity_vs_Throughput_7B_vLLM_A100.pdf


--------------------------------------------------------------------------------
/Plots/Fig_11/DS_MII_7B_scaling_A100.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_11/DS_MII_7B_scaling_A100.pdf


--------------------------------------------------------------------------------
/Plots/Fig_11/Fig_11.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_11/Fig_11.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_12/Fig 12.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_12/Fig 12.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_12/TRT_LLM_vLLM_DS_MII.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_12/TRT_LLM_vLLM_DS_MII.pdf


--------------------------------------------------------------------------------
/Plots/Fig_13/Fig_13.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_13/Fig_13.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_13/llama_cpp_7B_Batch_size_across_hardware.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_13/llama_cpp_7B_Batch_size_across_hardware.pdf


--------------------------------------------------------------------------------
/Plots/Fig_14/Fig 14.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_14/Fig 14.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_14/llama_cpp_7B_scaling_across_hardware.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_14/llama_cpp_7B_scaling_across_hardware.pdf


--------------------------------------------------------------------------------
/Plots/Fig_15/7B_Models_Framework_Comparison_Batch_Size.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_15/7B_Models_Framework_Comparison_Batch_Size.pdf


--------------------------------------------------------------------------------
/Plots/Fig_15/Fig 15.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_15/Fig 15.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_16_a/Fig 16 a.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a/Fig 16 a.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_16_a/llama_3_8b_A100_H100_power.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a/llama_3_8b_A100_H100_power.pdf


--------------------------------------------------------------------------------
/Plots/Fig_16_a/power_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a/power_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_16_a_b/Fig 16 a b.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a_b/Fig 16 a b.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_16_a_b/Fig 16 a.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a_b/Fig 16 a.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_16_a_b/Fig 16 b.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a_b/Fig 16 b.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_16_a_b/llama_3_8b_power_perf_per_watt.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a_b/llama_3_8b_power_perf_per_watt.pdf


--------------------------------------------------------------------------------
/Plots/Fig_16_a_b/power_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_a_b/power_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_16_b/Fig 16 b.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_b/Fig 16 b.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_16_b/llama_3_8b_perf_per_watt.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_b/llama_3_8b_perf_per_watt.pdf


--------------------------------------------------------------------------------
/Plots/Fig_16_b/power_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_16_b/power_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_17/Fig_17.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_17/Fig_17.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_17/llama_3_batch_size_num_GPUs_input_output_size.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_17/llama_3_batch_size_num_GPUs_input_output_size.pdf


--------------------------------------------------------------------------------
/Plots/Fig_18/Fig 18.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_18/Fig 18.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_18/SN40L_A100_7B.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_18/SN40L_A100_7B.pdf


--------------------------------------------------------------------------------
/Plots/Fig_18/SN40L_latency_throughput.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_18/SN40L_latency_throughput.csv


--------------------------------------------------------------------------------
/Plots/Fig_18/get_SN40L_results.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_18/get_SN40L_results.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_19/Fig 19.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_19/Fig 19.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_19/SN40L_A100_H100_70B.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_19/SN40L_A100_H100_70B.pdf


--------------------------------------------------------------------------------
/Plots/Fig_1_a/Fig_1_a.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_1_a/Fig_1_a.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_1_a/llama_3_batch_size_vs_input_length_vLLM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_1_a/llama_3_batch_size_vs_input_length_vLLM.pdf


--------------------------------------------------------------------------------
/Plots/Fig_1_b/Heatmap.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_1_b/Heatmap.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_1_b/Heatmap_input_vs_output.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_1_b/Heatmap_input_vs_output.csv


--------------------------------------------------------------------------------
/Plots/Fig_1_b/llama_3_8B_Heatmap_TensorRT_LLM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_1_b/llama_3_8B_Heatmap_TensorRT_LLM.pdf


--------------------------------------------------------------------------------
/Plots/Fig_20/A100_Gaudi2_compare.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_20/A100_Gaudi2_compare.pdf


--------------------------------------------------------------------------------
/Plots/Fig_20/Fig 20.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_20/Fig 20.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_21/Fig 21.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_21/Fig 21.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_21/TTFT.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_21/TTFT.pdf


--------------------------------------------------------------------------------
/Plots/Fig_21/all_results_TTFT.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_21/all_results_TTFT.csv


--------------------------------------------------------------------------------
/Plots/Fig_22/Fig 22.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_22/Fig 22.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_22/ITL.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_22/ITL.pdf


--------------------------------------------------------------------------------
/Plots/Fig_22/all_results_TTFT_ITL.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_22/all_results_TTFT_ITL.csv


--------------------------------------------------------------------------------
/Plots/Fig_23/Fig 23.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_23/Fig 23.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_23/llama_3_8b_across_HW_comparison.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_23/llama_3_8b_across_HW_comparison.pdf


--------------------------------------------------------------------------------
/Plots/Fig_24/Fig 24.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_24/Fig 24.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_24/llama_3_8b_across_HW_token_length_compare.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_24/llama_3_8b_across_HW_token_length_compare.pdf


--------------------------------------------------------------------------------
/Plots/Fig_25/Fig 25.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_25/Fig 25.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_25/HW_peak_perf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_25/HW_peak_perf.pdf


--------------------------------------------------------------------------------
/Plots/Fig_29/All_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_29/All_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_29/Fig 29.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_29/Fig 29.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_29/Perplexity_vs_Throughput_7B_vLLM_H100.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_29/Perplexity_vs_Throughput_7B_vLLM_H100.pdf


--------------------------------------------------------------------------------
/Plots/Fig_2_a/All_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_a/All_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_2_a/Fig 2 a.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_a/Fig 2 a.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_2_a/w_o_KV_Cache_70B.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_a/w_o_KV_Cache_70B.pdf


--------------------------------------------------------------------------------
/Plots/Fig_2_b/All_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_b/All_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_2_b/Fig 2 b.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_b/Fig 2 b.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_2_b/KV_Cache_Block_size_llama_3_8B.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_2_b/KV_Cache_Block_size_llama_3_8B.pdf


--------------------------------------------------------------------------------
/Plots/Fig_3/A100_H100_quant_all_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/A100_H100_quant_all_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_3/Fig 3.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/Fig 3.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_3/LLaMA_3_8b_quant.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/LLaMA_3_8b_quant.pdf


--------------------------------------------------------------------------------
/Plots/Fig_3/qaunt_results/GH200_results_Meta-Llama-3-8B-Instruct-FP8.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/qaunt_results/GH200_results_Meta-Llama-3-8B-Instruct-FP8.csv


--------------------------------------------------------------------------------
/Plots/Fig_3/qaunt_results/kv_cache_Meta-Llama-3-8B-Instruct-FP8.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/qaunt_results/kv_cache_Meta-Llama-3-8B-Instruct-FP8.csv


--------------------------------------------------------------------------------
/Plots/Fig_3/qaunt_results/quant_all_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/qaunt_results/quant_all_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_3/qaunt_results/results_Meta-Llama-3-8B-Instruct-FP8.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/qaunt_results/results_Meta-Llama-3-8B-Instruct-FP8.csv


--------------------------------------------------------------------------------
/Plots/Fig_3/qaunt_results/throughput_results_Meta-Llama-3-8B-Instruct-FP8 (1).csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_3/qaunt_results/throughput_results_Meta-Llama-3-8B-Instruct-FP8 (1).csv


--------------------------------------------------------------------------------
/Plots/Fig_30/7B_A100_Plots_TRT_LLM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_30/7B_A100_Plots_TRT_LLM.pdf


--------------------------------------------------------------------------------
/Plots/Fig_30/Fig_30.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_30/Fig_30.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_31/Fig 31.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_31/Fig 31.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_31/vLLM_A100_H100_PVC_MI250_7B_Models.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_31/vLLM_A100_H100_PVC_MI250_7B_Models.pdf


--------------------------------------------------------------------------------
/Plots/Fig_32/Fig_32.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_32/Fig_32.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_32/llama_cpp_70B_Batch_size_across_hardware.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_32/llama_cpp_70B_Batch_size_across_hardware.pdf


--------------------------------------------------------------------------------
/Plots/Fig_33/7B_Batch_Size_Framework_Comparison.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_33/7B_Batch_Size_Framework_Comparison.pdf


--------------------------------------------------------------------------------
/Plots/Fig_33/Fig_33.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_33/Fig_33.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_34/70B_A100_H100.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_34/70B_A100_H100.pdf


--------------------------------------------------------------------------------
/Plots/Fig_34/Fig_34.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_34/Fig_34.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_35/7B_Models_Framework_Comparison_Batch_Size_vLLM_MI250.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_35/7B_Models_Framework_Comparison_Batch_Size_vLLM_MI250.pdf


--------------------------------------------------------------------------------
/Plots/Fig_35/Fig_35.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_35/Fig_35.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_36/7B_Models_Framework_Comparison_Batch_Size_llamacpp_MI250.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_36/7B_Models_Framework_Comparison_Batch_Size_llamacpp_MI250.pdf


--------------------------------------------------------------------------------
/Plots/Fig_36/Fig_36.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_36/Fig_36.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_37/Fig_37.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_37/Fig_37.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_37/MI250_70B_Batch_size_vLLM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_37/MI250_70B_Batch_size_vLLM.pdf


--------------------------------------------------------------------------------
/Plots/Fig_38/A100_Gaudi2_compare_70b.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_38/A100_Gaudi2_compare_70b.pdf


--------------------------------------------------------------------------------
/Plots/Fig_38/Fig 38.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_38/Fig 38.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_39/Fig 39.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_39/Fig 39.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_39/mistral_7b_across_HW_comparison.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_39/mistral_7b_across_HW_comparison.pdf


--------------------------------------------------------------------------------
/Plots/Fig_40/Fig 40.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_40/Fig 40.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_40/llama_3_8b_across_HW_token_length_compare.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_40/llama_3_8b_across_HW_token_length_compare.pdf


--------------------------------------------------------------------------------
/Plots/Fig_4_a/Fig 4 a.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_4_a/Fig 4 a.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_4_a/NAS.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_4_a/NAS.pdf


--------------------------------------------------------------------------------
/Plots/Fig_4_b/All_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_4_b/All_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_4_b/Fig 4b.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_4_b/Fig 4b.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_4_b/Speculative_decoding.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_4_b/Speculative_decoding.pdf


--------------------------------------------------------------------------------
/Plots/Fig_5_a/Fig 5a.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_a/Fig 5a.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_5_a/TP_PP_Results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_a/TP_PP_Results.csv


--------------------------------------------------------------------------------
/Plots/Fig_5_a/llama_3_TP_PP.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_a/llama_3_TP_PP.pdf


--------------------------------------------------------------------------------
/Plots/Fig_5_b/Fig 5b.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_b/Fig 5b.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_5_b/Mixtral_TP_PP_EP.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_b/Mixtral_TP_PP_EP.pdf


--------------------------------------------------------------------------------
/Plots/Fig_5_b/TP_EP_PP_results.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_5_b/TP_EP_PP_results.csv


--------------------------------------------------------------------------------
/Plots/Fig_6/7B_A100_H100.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_6/7B_A100_H100.pdf


--------------------------------------------------------------------------------
/Plots/Fig_6/Fig_6.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_6/Fig_6.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_7/70B_Plots_TRT_LLM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_7/70B_Plots_TRT_LLM.pdf


--------------------------------------------------------------------------------
/Plots/Fig_7/Fig_7.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_7/Fig_7.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_8/7B_Models_A100_H100_PVC_MI250.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_8/7B_Models_A100_H100_PVC_MI250.pdf


--------------------------------------------------------------------------------
/Plots/Fig_8/Fig_8.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_8/Fig_8.ipynb


--------------------------------------------------------------------------------
/Plots/Fig_9/70B_Models_A100_H100_MI250.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_9/70B_Models_A100_H100_MI250.pdf


--------------------------------------------------------------------------------
/Plots/Fig_9/Fig_9.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Plots/Fig_9/Fig_9.ipynb


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/README.md


--------------------------------------------------------------------------------
/Sambaflow/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Sambaflow/README.md


--------------------------------------------------------------------------------
/Sambaflow/SN40L/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/Sambaflow/SN40L/README.md


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/Benchmarking_Throughput/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/README.MD


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/Benchmarking_Throughput/convert_checkpoint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/convert_checkpoint.py


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/Benchmarking_Throughput/p-llama2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/p-llama2-7b.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/Benchmarking_Throughput/power_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/power_utils.py


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/Benchmarking_Throughput/q-llama2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/q-llama2-7b.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/Benchmarking_Throughput/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/requirements.txt


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/Benchmarking_Throughput/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/run.py


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/Benchmarking_Throughput/run_power.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/run_power.py


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/Benchmarking_Throughput/run_precision_bench.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/run_precision_bench.py


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/Benchmarking_Throughput/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/A100/Benchmarking_Throughput/utils.py


--------------------------------------------------------------------------------
/TensorRT-LLM/A100/README.MD:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/README.MD


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/build-container.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/build-container.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/convert_checkpoint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/convert_checkpoint.py


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/power_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/power_utils.py


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/run-container-power.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-container-power.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/run-container-precision.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-container-precision.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/run-container-throughput.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-container-throughput.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/run-power-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-power-bench.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/run-precision-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-precision-bench.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/run-throughput-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run-throughput-bench.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run.py


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/run_power.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run_power.py


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/run_precision.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/run_precision.py


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/trt-llm-gh200.def:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/trt-llm-gh200.def


--------------------------------------------------------------------------------
/TensorRT-LLM/GH200/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/GH200/utils.py


--------------------------------------------------------------------------------
/TensorRT-LLM/H100/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/README.MD


--------------------------------------------------------------------------------
/TensorRT-LLM/H100/convert_checkpoint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/convert_checkpoint.py


--------------------------------------------------------------------------------
/TensorRT-LLM/H100/p-llama2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/p-llama2-7b.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/H100/power_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/power_utils.py


--------------------------------------------------------------------------------
/TensorRT-LLM/H100/q-llama2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/q-llama2-7b.sh


--------------------------------------------------------------------------------
/TensorRT-LLM/H100/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/requirements.txt


--------------------------------------------------------------------------------
/TensorRT-LLM/H100/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/run.py


--------------------------------------------------------------------------------
/TensorRT-LLM/H100/run_power.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/run_power.py


--------------------------------------------------------------------------------
/TensorRT-LLM/H100/run_precision_bench.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/run_precision_bench.py


--------------------------------------------------------------------------------
/TensorRT-LLM/H100/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/H100/utils.py


--------------------------------------------------------------------------------
/TensorRT-LLM/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/TensorRT-LLM/README.md


--------------------------------------------------------------------------------
/llama.cpp/A100/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/A100/README.MD


--------------------------------------------------------------------------------
/llama.cpp/GH200/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/GH200/README.MD


--------------------------------------------------------------------------------
/llama.cpp/GH200/build-container.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/GH200/build-container.sh


--------------------------------------------------------------------------------
/llama.cpp/GH200/llama-cpp-gh200.def:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/GH200/llama-cpp-gh200.def


--------------------------------------------------------------------------------
/llama.cpp/GH200/llama2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/GH200/llama2-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/GH200/rc-llama2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/GH200/rc-llama2-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/H100/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/README.MD


--------------------------------------------------------------------------------
/llama.cpp/H100/llama2-70b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/llama2-70b.sh


--------------------------------------------------------------------------------
/llama.cpp/H100/llama2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/llama2-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/H100/llama3-70b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/llama3-70b.sh


--------------------------------------------------------------------------------
/llama.cpp/H100/llama3-8b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/llama3-8b.sh


--------------------------------------------------------------------------------
/llama.cpp/H100/mistral-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/mistral-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/H100/mixtral8x7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/mixtral8x7b.sh


--------------------------------------------------------------------------------
/llama.cpp/H100/qwen2-72b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/qwen2-72b.sh


--------------------------------------------------------------------------------
/llama.cpp/H100/qwen2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/H100/qwen2-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/MI250/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/README.MD


--------------------------------------------------------------------------------
/llama.cpp/MI250/llama2-70b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/llama2-70b.sh


--------------------------------------------------------------------------------
/llama.cpp/MI250/llama2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/llama2-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/MI250/llama3-70b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/llama3-70b.sh


--------------------------------------------------------------------------------
/llama.cpp/MI250/llama3-8b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/llama3-8b.sh


--------------------------------------------------------------------------------
/llama.cpp/MI250/mistral-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/mistral-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/MI250/mixtral8x7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/mixtral8x7b.sh


--------------------------------------------------------------------------------
/llama.cpp/MI250/qwen2-72b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/qwen2-72b.sh


--------------------------------------------------------------------------------
/llama.cpp/MI250/qwen2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI250/qwen2-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/MI300X/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI300X/README.md


--------------------------------------------------------------------------------
/llama.cpp/MI300X/run-benchmark.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/MI300X/run-benchmark.sh


--------------------------------------------------------------------------------
/llama.cpp/Max1550/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/README.MD


--------------------------------------------------------------------------------
/llama.cpp/Max1550/llama2-70b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/llama2-70b.sh


--------------------------------------------------------------------------------
/llama.cpp/Max1550/llama2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/llama2-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/Max1550/llama3-70b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/llama3-70b.sh


--------------------------------------------------------------------------------
/llama.cpp/Max1550/llama3-8b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/llama3-8b.sh


--------------------------------------------------------------------------------
/llama.cpp/Max1550/mistral-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/mistral-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/Max1550/mixtral7x8b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/mixtral7x8b.sh


--------------------------------------------------------------------------------
/llama.cpp/Max1550/qwen2-72b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/qwen2-72b.sh


--------------------------------------------------------------------------------
/llama.cpp/Max1550/qwen2-7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/Max1550/qwen2-7b.sh


--------------------------------------------------------------------------------
/llama.cpp/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/llama.cpp/README.md


--------------------------------------------------------------------------------
/vLLM/A100/Power_measurement/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/README.MD


--------------------------------------------------------------------------------
/vLLM/A100/Power_measurement/benchmark_power.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/benchmark_power.py


--------------------------------------------------------------------------------
/vLLM/A100/Power_measurement/benchmark_power.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/benchmark_power.sh


--------------------------------------------------------------------------------
/vLLM/A100/Power_measurement/power_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/power_utils.py


--------------------------------------------------------------------------------
/vLLM/A100/Power_measurement/requirements-common.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/requirements-common.txt


--------------------------------------------------------------------------------
/vLLM/A100/Power_measurement/requirements-cuda.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Power_measurement/requirements-cuda.txt


--------------------------------------------------------------------------------
/vLLM/A100/README.MD:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vLLM/A100/Throughput_measurement/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Throughput_measurement/README.MD


--------------------------------------------------------------------------------
/vLLM/A100/Throughput_measurement/benchmark_throughput.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Throughput_measurement/benchmark_throughput.py


--------------------------------------------------------------------------------
/vLLM/A100/Throughput_measurement/benchmark_throughput.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Throughput_measurement/benchmark_throughput.sh


--------------------------------------------------------------------------------
/vLLM/A100/Throughput_measurement/requirements-common.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Throughput_measurement/requirements-common.txt


--------------------------------------------------------------------------------
/vLLM/A100/Throughput_measurement/requirements-cuda.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/A100/Throughput_measurement/requirements-cuda.txt


--------------------------------------------------------------------------------
/vLLM/GH200/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/README.MD


--------------------------------------------------------------------------------
/vLLM/GH200/benchmark_latency.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/benchmark_latency.py


--------------------------------------------------------------------------------
/vLLM/GH200/benchmark_power.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/benchmark_power.py


--------------------------------------------------------------------------------
/vLLM/GH200/build-container.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/build-container.sh


--------------------------------------------------------------------------------
/vLLM/GH200/power_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/power_utils.py


--------------------------------------------------------------------------------
/vLLM/GH200/run-container-power.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/run-container-power.sh


--------------------------------------------------------------------------------
/vLLM/GH200/run-container-throughput.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/run-container-throughput.sh


--------------------------------------------------------------------------------
/vLLM/GH200/run-power-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/run-power-bench.sh


--------------------------------------------------------------------------------
/vLLM/GH200/run-throughput-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/run-throughput-bench.sh


--------------------------------------------------------------------------------
/vLLM/GH200/vllm-gh200.def:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/GH200/vllm-gh200.def


--------------------------------------------------------------------------------
/vLLM/Gaudi2/README.MD:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/vLLM/H100/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/README.MD


--------------------------------------------------------------------------------
/vLLM/H100/benchmark_power.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/benchmark_power.py


--------------------------------------------------------------------------------
/vLLM/H100/benchmark_throughput.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/benchmark_throughput.py


--------------------------------------------------------------------------------
/vLLM/H100/power_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/power_utils.py


--------------------------------------------------------------------------------
/vLLM/H100/run-power-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/run-power-bench.sh


--------------------------------------------------------------------------------
/vLLM/H100/run-throughput-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/H100/run-throughput-bench.sh


--------------------------------------------------------------------------------
/vLLM/MI250/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI250/README.MD


--------------------------------------------------------------------------------
/vLLM/MI250/benchmark_throughput.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI250/benchmark_throughput.py


--------------------------------------------------------------------------------
/vLLM/MI250/run-container.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI250/run-container.sh


--------------------------------------------------------------------------------
/vLLM/MI250/run-throughput-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI250/run-throughput-bench.sh


--------------------------------------------------------------------------------
/vLLM/MI300X/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI300X/README.md


--------------------------------------------------------------------------------
/vLLM/MI300X/benchmark_throughput.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI300X/benchmark_throughput.py


--------------------------------------------------------------------------------
/vLLM/MI300X/run-benchmark.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/MI300X/run-benchmark.sh


--------------------------------------------------------------------------------
/vLLM/Max1550/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/Max1550/README.md


--------------------------------------------------------------------------------
/vLLM/Max1550/benchmark_latency.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/Max1550/benchmark_latency.py


--------------------------------------------------------------------------------
/vLLM/Max1550/run-bench.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/Max1550/run-bench.sh


--------------------------------------------------------------------------------
/vLLM/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/argonne-lcf/LLM-Inference-Bench/HEAD/vLLM/README.md


--------------------------------------------------------------------------------