├── .gitignore ├── README.md ├── cli_benchmark.py ├── cli_perf_visual.py ├── cli_structure_analyzer.py ├── images ├── Qwen3-30B-A3B │ ├── graph_decode_Qwen3-30B-A3B_tp1_bs16_seqlen600_genlen128.png │ └── graph_prefill_Qwen3-30B-A3B_tp1_bs16_seqlen600_genlen128.png ├── flops_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png ├── flops_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png ├── grpah_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png ├── grpah_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png ├── latency_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png ├── latency_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png ├── llama2-70b │ ├── flops_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png │ ├── flops_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png │ ├── grpah_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png │ ├── grpah_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png │ ├── latency_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png │ ├── latency_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png │ └── params_llama2-70b_tp8_bs32_seqlen1024_genlen128.png └── params_llama2-70b_tp8_bs32_seqlen1024_genlen128.png ├── llm_counts ├── __init__.py ├── benchmark_analyzer.py ├── configs │ ├── configs │ │ ├── gpu_configs.json │ │ ├── gpu_perf.ini │ │ └── model_configs.json │ ├── gpu_configs.json │ ├── gpu_info.csv │ ├── gpu_perf.ini │ └── model_configs.json ├── count_flops.py ├── count_latency.py ├── count_memory.py ├── count_params.py ├── layer_graph_visualizer.py └── utils │ ├── __init__.py │ ├── config.py │ ├── constants.py │ ├── roofline_model.py │ ├── utils.py │ └── visualizer.py └── scripts ├── all2all_volume.py └── allreduce_volume.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/README.md -------------------------------------------------------------------------------- /cli_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/cli_benchmark.py -------------------------------------------------------------------------------- /cli_perf_visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/cli_perf_visual.py -------------------------------------------------------------------------------- /cli_structure_analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/cli_structure_analyzer.py -------------------------------------------------------------------------------- /images/Qwen3-30B-A3B/graph_decode_Qwen3-30B-A3B_tp1_bs16_seqlen600_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/Qwen3-30B-A3B/graph_decode_Qwen3-30B-A3B_tp1_bs16_seqlen600_genlen128.png -------------------------------------------------------------------------------- /images/Qwen3-30B-A3B/graph_prefill_Qwen3-30B-A3B_tp1_bs16_seqlen600_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/Qwen3-30B-A3B/graph_prefill_Qwen3-30B-A3B_tp1_bs16_seqlen600_genlen128.png -------------------------------------------------------------------------------- /images/flops_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/flops_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/flops_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/flops_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/grpah_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/grpah_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/grpah_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/grpah_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/latency_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/latency_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/latency_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/latency_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/llama2-70b/flops_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/llama2-70b/flops_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/llama2-70b/flops_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/llama2-70b/flops_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/llama2-70b/grpah_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/llama2-70b/grpah_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/llama2-70b/grpah_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/llama2-70b/grpah_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/llama2-70b/latency_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/llama2-70b/latency_decode_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/llama2-70b/latency_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/llama2-70b/latency_prefill_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/llama2-70b/params_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/llama2-70b/params_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /images/params_llama2-70b_tp8_bs32_seqlen1024_genlen128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/images/params_llama2-70b_tp8_bs32_seqlen1024_genlen128.png -------------------------------------------------------------------------------- /llm_counts/__init__.py: -------------------------------------------------------------------------------- 1 | # LLMCounts package -------------------------------------------------------------------------------- /llm_counts/benchmark_analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/benchmark_analyzer.py -------------------------------------------------------------------------------- /llm_counts/configs/configs/gpu_configs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/configs/configs/gpu_configs.json -------------------------------------------------------------------------------- /llm_counts/configs/configs/gpu_perf.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/configs/configs/gpu_perf.ini -------------------------------------------------------------------------------- /llm_counts/configs/configs/model_configs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/configs/configs/model_configs.json -------------------------------------------------------------------------------- /llm_counts/configs/gpu_configs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/configs/gpu_configs.json -------------------------------------------------------------------------------- /llm_counts/configs/gpu_info.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/configs/gpu_info.csv -------------------------------------------------------------------------------- /llm_counts/configs/gpu_perf.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/configs/gpu_perf.ini -------------------------------------------------------------------------------- /llm_counts/configs/model_configs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/configs/model_configs.json -------------------------------------------------------------------------------- /llm_counts/count_flops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/count_flops.py -------------------------------------------------------------------------------- /llm_counts/count_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/count_latency.py -------------------------------------------------------------------------------- /llm_counts/count_memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/count_memory.py -------------------------------------------------------------------------------- /llm_counts/count_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/count_params.py -------------------------------------------------------------------------------- /llm_counts/layer_graph_visualizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/layer_graph_visualizer.py -------------------------------------------------------------------------------- /llm_counts/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llm_counts/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/utils/config.py -------------------------------------------------------------------------------- /llm_counts/utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/utils/constants.py -------------------------------------------------------------------------------- /llm_counts/utils/roofline_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/utils/roofline_model.py -------------------------------------------------------------------------------- /llm_counts/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/utils/utils.py -------------------------------------------------------------------------------- /llm_counts/utils/visualizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/llm_counts/utils/visualizer.py -------------------------------------------------------------------------------- /scripts/all2all_volume.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/scripts/all2all_volume.py -------------------------------------------------------------------------------- /scripts/allreduce_volume.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harleyszhang/llm_counts/HEAD/scripts/allreduce_volume.py --------------------------------------------------------------------------------