├── .gitignore ├── README.md ├── gpu_usage ├── 01-ai_Yi-1.5-6B-Chat_gpu_usage.csv ├── 01-ai_Yi-1.5-6B_gpu_usage.csv ├── 01-ai_Yi-1.5-9B-Chat_gpu_usage.csv ├── 01-ai_Yi-1.5-9B_gpu_usage.csv ├── NousResearch_Hermes-2-Pro-Mistral-7B_gpu_usage.csv ├── Qwen_Qwen2-7B-Instruct_gpu_usage.csv ├── Qwen_Qwen2.5-0.5B-Instruct_gpu_usage.csv ├── Qwen_Qwen2.5-1.5B-Instruct_gpu_usage.csv ├── Qwen_Qwen2.5-14B-Instruct_gpu_usage.csv ├── Qwen_Qwen2.5-3B-Instruct_gpu_usage.csv ├── Qwen_Qwen2.5-7B-Instruct-1M_gpu_usage.csv ├── Qwen_Qwen2.5-7B-Instruct_gpu_usage.csv ├── Qwen_Qwen2.5-Math-1.5B-Instruct_gpu_usage.csv ├── Qwen_Qwen2.5-Math-7B-Instruct_gpu_usage.csv ├── Qwen_Qwen2.5-Math-7B_gpu_usage.csv ├── Qwen_Qwen3-0.6B_gpu_usage.csv ├── Qwen_Qwen3-1.7B_gpu_usage.csv ├── Qwen_Qwen3-14B_gpu_usage.csv ├── Qwen_Qwen3-4B_gpu_usage.csv ├── Qwen_Qwen3-8B_gpu_usage.csv ├── deepseek-ai_DeepSeek-R1-0528-Qwen3-8B_gpu_usage.csv ├── deepseek-ai_DeepSeek-R1-Distill-Llama-8B_gpu_usage.csv ├── deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B_gpu_usage.csv ├── deepseek-ai_DeepSeek-R1-Distill-Qwen-7B_gpu_usage.csv ├── deepseek-ai_deepseek-llm-7b-base_gpu_usage.csv ├── deepseek-ai_deepseek-llm-7b-chat_gpu_usage.csv ├── deepseek-ai_deepseek-math-7b-rl_gpu_usage.csv ├── google_gemma-3-12b-it_gpu_usage.csv ├── google_gemma-3-1b-it_gpu_usage.csv ├── google_gemma-3-4b-it_gpu_usage.csv ├── meta-llama_Llama-2-13b-chat-hf_gpu_usage.csv ├── meta-llama_Llama-2-13b-hf_gpu_usage.csv ├── meta-llama_Llama-2-7b-chat-hf_gpu_usage.csv ├── meta-llama_Llama-2-7b-hf_gpu_usage.csv ├── meta-llama_Llama-3.1-8B-Instruct_gpu_usage.csv ├── meta-llama_Llama-3.2-1B-Instruct_gpu_usage.csv ├── meta-llama_Llama-3.2-3B-Instruct_gpu_usage.csv ├── meta-llama_Meta-Llama-3-8B-Instruct_gpu_usage.csv ├── mistralai_Ministral-8B-Instruct-2410_gpu_usage.csv ├── mistralai_Mistral-7B-Instruct-v0.3_gpu_usage.csv └── openchat_openchat-3.6-8b-20240522_gpu_usage.csv ├── main_log.txt ├── parse.ipynb ├── results_json ├── 01-ai_Yi-1.5-6B-Chat_full-bench_2025-07-27T20-07-27.547843.json ├── 01-ai_Yi-1.5-6B_full-bench_2025-08-01T04-25-13.907532.json ├── 01-ai_Yi-1.5-9B-Chat_full-bench_2025-07-27T12-03-18.497948.json ├── 01-ai_Yi-1.5-9B_full-bench_2025-07-28T07-51-08.195989.json ├── DeepSeek-R1-Distill-Qwen-1.5B_2025-08-14T07-21-04.965022.json ├── NousResearch_Hermes-2-Pro-Mistral-7B_2025-08-08T18-11-42.473630.json ├── Qwen_Qwen2-7B-Instruct_full-bench_2025-07-26T00-11-42.488891.json ├── Qwen_Qwen2.5-0.5B-Instruct_2025-08-14T03-40-00.804497.json ├── Qwen_Qwen2.5-1.5B-Instruct_2025-08-14T01-05-38.407962.json ├── Qwen_Qwen2.5-14B-Instruct_2025-08-12T03-24-58.617841.json ├── Qwen_Qwen2.5-3B-Instruct_2025-08-09T12-13-46.725707.json ├── Qwen_Qwen2.5-7B-Instruct-1M_full-bench_2025-08-01T15-42-35.985248.json ├── Qwen_Qwen2.5-7B-Instruct_full-bench_2025-08-06T01-13-10.614187.json ├── Qwen_Qwen2.5-Math-1.5B-Instruct_2025-08-09T04-25-28.095556.json ├── Qwen_Qwen2.5-Math-7B-Instruct_full-bench_2025-08-02T12-57-32.315698.json ├── Qwen_Qwen2.5-Math-7B_full-bench_2025-08-03T16-19-38.915317.json ├── Qwen_Qwen3-0.6B_2025-08-14T15-32-26.452379.json ├── Qwen_Qwen3-1.7B_2025-08-14T11-46-29.566564.json ├── Qwen_Qwen3-14B_2025-08-13T09-10-59.887777.json ├── Qwen_Qwen3-4B_2025-08-14T21-23-53.711549.json ├── Qwen_Qwen3-8B_full-bench_2025-08-02T07-14-43.583524.json ├── deepseek-ai_DeepSeek-R1-0528-Qwen3-8B_full-bench_2025-07-26T18-09-46.798601.json ├── deepseek-ai_DeepSeek-R1-Distill-Llama-8B_full-bench_2025-08-05T12-33-17.758692.json ├── deepseek-ai_DeepSeek-R1-Distill-Qwen-7B_2025-08-06T23-54-44.617651.json ├── deepseek-ai_deepseek-llm-7b-base_2025-08-08T01-15-27.817586.json ├── deepseek-ai_deepseek-llm-7b-chat_2025-08-07T18-04-01.194841.json ├── deepseek-math-7b-rl_2025-08-07T07-56-58.873248.json ├── google_gemma-3-12b-it_full-bench_2025-07-24T00-10-18.873426.json ├── google_gemma-3-1b-it_full-bench_2025-07-23T08-24-17.501846.json ├── google_gemma-3-4b-it_full-bench_2025-07-23T01-33-21.584095.json ├── gpu_util_equation_dark.svg ├── gpu_util_equation_light.svg ├── meta-llama_Llama-2-13b-chat-hf_full-bench_2025-07-24T17-19-25.853170.json ├── meta-llama_Llama-2-13b-hf_full-bench_2025-07-25T12-41-02.249971.json ├── meta-llama_Llama-2-7b-chat-hf_full-bench_2025-07-21T23-01-58.369828.json ├── meta-llama_Llama-2-7b-hf_full-bench_2025-07-20T01-50-06.686535.json ├── meta-llama_Llama-3.1-8B-Instruct_full-bench_2025-07-21T16-03-50.037777.json ├── meta-llama_Llama-3.2-1B-Instruct_full-bench_2025-07-20T20-31-50.105888.json ├── meta-llama_Llama-3.2-3B-Instruct_full-bench_2025-07-21T03-44-18.392701.json ├── meta-llama_Meta-Llama-3-8B-Instruct_full-bench_2025-08-06T07-54-03.388655.json ├── mistralai_Ministral-8B-Instruct-2410_full-bench_2025-07-22T20-42-12.925837.json ├── mistralai_Mistral-7B-Instruct-v0.3_full-bench_2025-08-06T16-32-17.537246.json └── openchat_openchat-3.6-8b-20240522_2025-08-08T09-06-55.893382.json ├── results_tables ├── commonsense_and_nli_rank.csv ├── knowledge_and_reading_rank.csv ├── llm_benchmarks_all_results.xlsx ├── llm_benchmarks_master.csv ├── overall_rank.csv └── reasoning_and_math_rank.csv └── run_benchmarks.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/README.md -------------------------------------------------------------------------------- /gpu_usage/01-ai_Yi-1.5-6B-Chat_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/01-ai_Yi-1.5-6B-Chat_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/01-ai_Yi-1.5-6B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/01-ai_Yi-1.5-6B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/01-ai_Yi-1.5-9B-Chat_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/01-ai_Yi-1.5-9B-Chat_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/01-ai_Yi-1.5-9B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/01-ai_Yi-1.5-9B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/NousResearch_Hermes-2-Pro-Mistral-7B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/NousResearch_Hermes-2-Pro-Mistral-7B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen2-7B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen2-7B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen2.5-0.5B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen2.5-0.5B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen2.5-1.5B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen2.5-1.5B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen2.5-14B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen2.5-14B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen2.5-3B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen2.5-3B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen2.5-7B-Instruct-1M_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen2.5-7B-Instruct-1M_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen2.5-7B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen2.5-7B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen2.5-Math-1.5B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen2.5-Math-1.5B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen2.5-Math-7B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen2.5-Math-7B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen2.5-Math-7B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen2.5-Math-7B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen3-0.6B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen3-0.6B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen3-1.7B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen3-1.7B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen3-14B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen3-14B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen3-4B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen3-4B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/Qwen_Qwen3-8B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/Qwen_Qwen3-8B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/deepseek-ai_DeepSeek-R1-0528-Qwen3-8B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/deepseek-ai_DeepSeek-R1-0528-Qwen3-8B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/deepseek-ai_DeepSeek-R1-Distill-Llama-8B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/deepseek-ai_DeepSeek-R1-Distill-Llama-8B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/deepseek-ai_deepseek-llm-7b-base_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/deepseek-ai_deepseek-llm-7b-base_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/deepseek-ai_deepseek-llm-7b-chat_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/deepseek-ai_deepseek-llm-7b-chat_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/deepseek-ai_deepseek-math-7b-rl_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/deepseek-ai_deepseek-math-7b-rl_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/google_gemma-3-12b-it_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/google_gemma-3-12b-it_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/google_gemma-3-1b-it_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/google_gemma-3-1b-it_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/google_gemma-3-4b-it_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/google_gemma-3-4b-it_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/meta-llama_Llama-2-13b-chat-hf_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/meta-llama_Llama-2-13b-chat-hf_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/meta-llama_Llama-2-13b-hf_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/meta-llama_Llama-2-13b-hf_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/meta-llama_Llama-2-7b-chat-hf_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/meta-llama_Llama-2-7b-chat-hf_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/meta-llama_Llama-2-7b-hf_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/meta-llama_Llama-2-7b-hf_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/meta-llama_Llama-3.1-8B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/meta-llama_Llama-3.1-8B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/meta-llama_Llama-3.2-1B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/meta-llama_Llama-3.2-1B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/meta-llama_Llama-3.2-3B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/meta-llama_Llama-3.2-3B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/meta-llama_Meta-Llama-3-8B-Instruct_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/meta-llama_Meta-Llama-3-8B-Instruct_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/mistralai_Ministral-8B-Instruct-2410_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/mistralai_Ministral-8B-Instruct-2410_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/mistralai_Mistral-7B-Instruct-v0.3_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/mistralai_Mistral-7B-Instruct-v0.3_gpu_usage.csv -------------------------------------------------------------------------------- /gpu_usage/openchat_openchat-3.6-8b-20240522_gpu_usage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/gpu_usage/openchat_openchat-3.6-8b-20240522_gpu_usage.csv -------------------------------------------------------------------------------- /main_log.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/main_log.txt -------------------------------------------------------------------------------- /parse.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/parse.ipynb -------------------------------------------------------------------------------- /results_json/01-ai_Yi-1.5-6B-Chat_full-bench_2025-07-27T20-07-27.547843.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/01-ai_Yi-1.5-6B-Chat_full-bench_2025-07-27T20-07-27.547843.json -------------------------------------------------------------------------------- /results_json/01-ai_Yi-1.5-6B_full-bench_2025-08-01T04-25-13.907532.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/01-ai_Yi-1.5-6B_full-bench_2025-08-01T04-25-13.907532.json -------------------------------------------------------------------------------- /results_json/01-ai_Yi-1.5-9B-Chat_full-bench_2025-07-27T12-03-18.497948.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/01-ai_Yi-1.5-9B-Chat_full-bench_2025-07-27T12-03-18.497948.json -------------------------------------------------------------------------------- /results_json/01-ai_Yi-1.5-9B_full-bench_2025-07-28T07-51-08.195989.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/01-ai_Yi-1.5-9B_full-bench_2025-07-28T07-51-08.195989.json -------------------------------------------------------------------------------- /results_json/DeepSeek-R1-Distill-Qwen-1.5B_2025-08-14T07-21-04.965022.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/DeepSeek-R1-Distill-Qwen-1.5B_2025-08-14T07-21-04.965022.json -------------------------------------------------------------------------------- /results_json/NousResearch_Hermes-2-Pro-Mistral-7B_2025-08-08T18-11-42.473630.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/NousResearch_Hermes-2-Pro-Mistral-7B_2025-08-08T18-11-42.473630.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen2-7B-Instruct_full-bench_2025-07-26T00-11-42.488891.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen2-7B-Instruct_full-bench_2025-07-26T00-11-42.488891.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen2.5-0.5B-Instruct_2025-08-14T03-40-00.804497.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen2.5-0.5B-Instruct_2025-08-14T03-40-00.804497.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen2.5-1.5B-Instruct_2025-08-14T01-05-38.407962.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen2.5-1.5B-Instruct_2025-08-14T01-05-38.407962.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen2.5-14B-Instruct_2025-08-12T03-24-58.617841.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen2.5-14B-Instruct_2025-08-12T03-24-58.617841.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen2.5-3B-Instruct_2025-08-09T12-13-46.725707.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen2.5-3B-Instruct_2025-08-09T12-13-46.725707.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen2.5-7B-Instruct-1M_full-bench_2025-08-01T15-42-35.985248.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen2.5-7B-Instruct-1M_full-bench_2025-08-01T15-42-35.985248.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen2.5-7B-Instruct_full-bench_2025-08-06T01-13-10.614187.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen2.5-7B-Instruct_full-bench_2025-08-06T01-13-10.614187.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen2.5-Math-1.5B-Instruct_2025-08-09T04-25-28.095556.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen2.5-Math-1.5B-Instruct_2025-08-09T04-25-28.095556.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen2.5-Math-7B-Instruct_full-bench_2025-08-02T12-57-32.315698.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen2.5-Math-7B-Instruct_full-bench_2025-08-02T12-57-32.315698.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen2.5-Math-7B_full-bench_2025-08-03T16-19-38.915317.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen2.5-Math-7B_full-bench_2025-08-03T16-19-38.915317.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen3-0.6B_2025-08-14T15-32-26.452379.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen3-0.6B_2025-08-14T15-32-26.452379.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen3-1.7B_2025-08-14T11-46-29.566564.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen3-1.7B_2025-08-14T11-46-29.566564.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen3-14B_2025-08-13T09-10-59.887777.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen3-14B_2025-08-13T09-10-59.887777.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen3-4B_2025-08-14T21-23-53.711549.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen3-4B_2025-08-14T21-23-53.711549.json -------------------------------------------------------------------------------- /results_json/Qwen_Qwen3-8B_full-bench_2025-08-02T07-14-43.583524.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/Qwen_Qwen3-8B_full-bench_2025-08-02T07-14-43.583524.json -------------------------------------------------------------------------------- /results_json/deepseek-ai_DeepSeek-R1-0528-Qwen3-8B_full-bench_2025-07-26T18-09-46.798601.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/deepseek-ai_DeepSeek-R1-0528-Qwen3-8B_full-bench_2025-07-26T18-09-46.798601.json -------------------------------------------------------------------------------- /results_json/deepseek-ai_DeepSeek-R1-Distill-Llama-8B_full-bench_2025-08-05T12-33-17.758692.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/deepseek-ai_DeepSeek-R1-Distill-Llama-8B_full-bench_2025-08-05T12-33-17.758692.json -------------------------------------------------------------------------------- /results_json/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B_2025-08-06T23-54-44.617651.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B_2025-08-06T23-54-44.617651.json -------------------------------------------------------------------------------- /results_json/deepseek-ai_deepseek-llm-7b-base_2025-08-08T01-15-27.817586.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/deepseek-ai_deepseek-llm-7b-base_2025-08-08T01-15-27.817586.json -------------------------------------------------------------------------------- /results_json/deepseek-ai_deepseek-llm-7b-chat_2025-08-07T18-04-01.194841.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/deepseek-ai_deepseek-llm-7b-chat_2025-08-07T18-04-01.194841.json -------------------------------------------------------------------------------- /results_json/deepseek-math-7b-rl_2025-08-07T07-56-58.873248.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/deepseek-math-7b-rl_2025-08-07T07-56-58.873248.json -------------------------------------------------------------------------------- /results_json/google_gemma-3-12b-it_full-bench_2025-07-24T00-10-18.873426.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/google_gemma-3-12b-it_full-bench_2025-07-24T00-10-18.873426.json -------------------------------------------------------------------------------- /results_json/google_gemma-3-1b-it_full-bench_2025-07-23T08-24-17.501846.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/google_gemma-3-1b-it_full-bench_2025-07-23T08-24-17.501846.json -------------------------------------------------------------------------------- /results_json/google_gemma-3-4b-it_full-bench_2025-07-23T01-33-21.584095.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/google_gemma-3-4b-it_full-bench_2025-07-23T01-33-21.584095.json -------------------------------------------------------------------------------- /results_json/gpu_util_equation_dark.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/gpu_util_equation_dark.svg -------------------------------------------------------------------------------- /results_json/gpu_util_equation_light.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/gpu_util_equation_light.svg -------------------------------------------------------------------------------- /results_json/meta-llama_Llama-2-13b-chat-hf_full-bench_2025-07-24T17-19-25.853170.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/meta-llama_Llama-2-13b-chat-hf_full-bench_2025-07-24T17-19-25.853170.json -------------------------------------------------------------------------------- /results_json/meta-llama_Llama-2-13b-hf_full-bench_2025-07-25T12-41-02.249971.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/meta-llama_Llama-2-13b-hf_full-bench_2025-07-25T12-41-02.249971.json -------------------------------------------------------------------------------- /results_json/meta-llama_Llama-2-7b-chat-hf_full-bench_2025-07-21T23-01-58.369828.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/meta-llama_Llama-2-7b-chat-hf_full-bench_2025-07-21T23-01-58.369828.json -------------------------------------------------------------------------------- /results_json/meta-llama_Llama-2-7b-hf_full-bench_2025-07-20T01-50-06.686535.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/meta-llama_Llama-2-7b-hf_full-bench_2025-07-20T01-50-06.686535.json -------------------------------------------------------------------------------- /results_json/meta-llama_Llama-3.1-8B-Instruct_full-bench_2025-07-21T16-03-50.037777.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/meta-llama_Llama-3.1-8B-Instruct_full-bench_2025-07-21T16-03-50.037777.json -------------------------------------------------------------------------------- /results_json/meta-llama_Llama-3.2-1B-Instruct_full-bench_2025-07-20T20-31-50.105888.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/meta-llama_Llama-3.2-1B-Instruct_full-bench_2025-07-20T20-31-50.105888.json -------------------------------------------------------------------------------- /results_json/meta-llama_Llama-3.2-3B-Instruct_full-bench_2025-07-21T03-44-18.392701.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/meta-llama_Llama-3.2-3B-Instruct_full-bench_2025-07-21T03-44-18.392701.json -------------------------------------------------------------------------------- /results_json/meta-llama_Meta-Llama-3-8B-Instruct_full-bench_2025-08-06T07-54-03.388655.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/meta-llama_Meta-Llama-3-8B-Instruct_full-bench_2025-08-06T07-54-03.388655.json -------------------------------------------------------------------------------- /results_json/mistralai_Ministral-8B-Instruct-2410_full-bench_2025-07-22T20-42-12.925837.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/mistralai_Ministral-8B-Instruct-2410_full-bench_2025-07-22T20-42-12.925837.json -------------------------------------------------------------------------------- /results_json/mistralai_Mistral-7B-Instruct-v0.3_full-bench_2025-08-06T16-32-17.537246.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/mistralai_Mistral-7B-Instruct-v0.3_full-bench_2025-08-06T16-32-17.537246.json -------------------------------------------------------------------------------- /results_json/openchat_openchat-3.6-8b-20240522_2025-08-08T09-06-55.893382.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_json/openchat_openchat-3.6-8b-20240522_2025-08-08T09-06-55.893382.json -------------------------------------------------------------------------------- /results_tables/commonsense_and_nli_rank.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_tables/commonsense_and_nli_rank.csv -------------------------------------------------------------------------------- /results_tables/knowledge_and_reading_rank.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_tables/knowledge_and_reading_rank.csv -------------------------------------------------------------------------------- /results_tables/llm_benchmarks_all_results.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_tables/llm_benchmarks_all_results.xlsx -------------------------------------------------------------------------------- /results_tables/llm_benchmarks_master.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_tables/llm_benchmarks_master.csv -------------------------------------------------------------------------------- /results_tables/overall_rank.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_tables/overall_rank.csv -------------------------------------------------------------------------------- /results_tables/reasoning_and_math_rank.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/results_tables/reasoning_and_math_rank.csv -------------------------------------------------------------------------------- /run_benchmarks.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayminban/41-llms-evaluated-on-19-benchmarks/HEAD/run_benchmarks.sh --------------------------------------------------------------------------------