├── .dockerignore ├── .gitattributes ├── .github └── workflows │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── COLAB_SETUP.md ├── Dockerfile ├── LICENSE ├── README.md ├── REPOSITORY_UPDATES.md ├── benchmarks_output └── sample_results.json ├── cache └── models │ ├── .locks │ └── models--microsoft--Phi-3-mini-4k-instruct │ │ ├── 178968dec606c790aa335e9142f6afec37288470.lock │ │ ├── 67aa82cddb4d66391ddf31ff99f059239bd2d1e7.lock │ │ ├── 88ec145f4e7684c009bc6d55df24bb82c7d3c379.lock │ │ ├── 9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347.lock │ │ ├── b9b031fadda61a035b2e8ceb4362cbf604002b21.lock │ │ └── c6a944b4d49ce5d79030250ed6bdcbb1a65dfda1.lock │ └── models--microsoft--Phi-3-mini-4k-instruct │ ├── blobs │ ├── 178968dec606c790aa335e9142f6afec37288470 │ ├── 67aa82cddb4d66391ddf31ff99f059239bd2d1e7 │ ├── 88ec145f4e7684c009bc6d55df24bb82c7d3c379 │ ├── 9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 │ ├── b9b031fadda61a035b2e8ceb4362cbf604002b21 │ └── c6a944b4d49ce5d79030250ed6bdcbb1a65dfda1 │ ├── refs │ └── main │ └── snapshots │ └── 0a67737cc96d2554230f90338b163bc6380a2a85 │ ├── added_tokens.json │ ├── config.json │ ├── special_tokens_map.json │ ├── tokenizer.json │ ├── tokenizer.model │ └── tokenizer_config.json ├── configs ├── benchmark_config.yaml └── colab_config.yaml ├── dashboard.log ├── docs ├── .Rhistory └── api_docs.md ├── examples ├── advanced_usage.py └── basic_usage.py ├── gemma_benchmark ├── __init__.py ├── auth.py ├── core │ ├── __init__.py │ ├── benchmark.py │ ├── interfaces.py │ └── model_loader.py ├── scripts │ ├── download_data.py │ └── run_benchmark.py ├── tasks │ ├── __init__.py │ ├── arc.py │ ├── efficiency.py │ ├── gsm8k.py │ ├── humaneval.py │ ├── mmlu.py │ └── truthfulqa.py ├── utils │ ├── __init__.py │ ├── config_validation.py │ ├── data_downloader.py │ └── metrics.py └── visualization │ ├── __init__.py │ ├── charts.py │ └── leaderboard.py ├── pyproject.toml ├── requirements.txt ├── src ├── config │ ├── advanced_custom_benchmark_config.yaml │ ├── benchmark_config.yaml │ └── default_benchmark_config.yaml ├── generate_default_config.py └── scripts │ ├── download_data.py │ └── run_benchmark.py ├── test_colab_benchmark.py ├── test_results ├── demo_visualization.png └── sample_results.json ├── tests ├── __init__.py ├── test_core.py └── test_tasks.py └── visualize └── dashboard.py /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/.dockerignore -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/.github/workflows/tests.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /COLAB_SETUP.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/COLAB_SETUP.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/README.md -------------------------------------------------------------------------------- /REPOSITORY_UPDATES.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/REPOSITORY_UPDATES.md -------------------------------------------------------------------------------- /benchmarks_output/sample_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/benchmarks_output/sample_results.json -------------------------------------------------------------------------------- /cache/models/.locks/models--microsoft--Phi-3-mini-4k-instruct/178968dec606c790aa335e9142f6afec37288470.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cache/models/.locks/models--microsoft--Phi-3-mini-4k-instruct/67aa82cddb4d66391ddf31ff99f059239bd2d1e7.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cache/models/.locks/models--microsoft--Phi-3-mini-4k-instruct/88ec145f4e7684c009bc6d55df24bb82c7d3c379.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cache/models/.locks/models--microsoft--Phi-3-mini-4k-instruct/9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cache/models/.locks/models--microsoft--Phi-3-mini-4k-instruct/b9b031fadda61a035b2e8ceb4362cbf604002b21.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cache/models/.locks/models--microsoft--Phi-3-mini-4k-instruct/c6a944b4d49ce5d79030250ed6bdcbb1a65dfda1.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/178968dec606c790aa335e9142f6afec37288470: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/178968dec606c790aa335e9142f6afec37288470 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/67aa82cddb4d66391ddf31ff99f059239bd2d1e7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/67aa82cddb4d66391ddf31ff99f059239bd2d1e7 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/88ec145f4e7684c009bc6d55df24bb82c7d3c379: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/88ec145f4e7684c009bc6d55df24bb82c7d3c379 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/b9b031fadda61a035b2e8ceb4362cbf604002b21: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/b9b031fadda61a035b2e8ceb4362cbf604002b21 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/c6a944b4d49ce5d79030250ed6bdcbb1a65dfda1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/cache/models/models--microsoft--Phi-3-mini-4k-instruct/blobs/c6a944b4d49ce5d79030250ed6bdcbb1a65dfda1 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/refs/main: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/cache/models/models--microsoft--Phi-3-mini-4k-instruct/refs/main -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/added_tokens.json: -------------------------------------------------------------------------------- 1 | ../../blobs/178968dec606c790aa335e9142f6afec37288470 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/config.json: -------------------------------------------------------------------------------- 1 | ../../blobs/b9b031fadda61a035b2e8ceb4362cbf604002b21 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | ../../blobs/c6a944b4d49ce5d79030250ed6bdcbb1a65dfda1 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/tokenizer.json: -------------------------------------------------------------------------------- 1 | ../../blobs/88ec145f4e7684c009bc6d55df24bb82c7d3c379 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/tokenizer.model: -------------------------------------------------------------------------------- 1 | ../../blobs/9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 -------------------------------------------------------------------------------- /cache/models/models--microsoft--Phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | ../../blobs/67aa82cddb4d66391ddf31ff99f059239bd2d1e7 -------------------------------------------------------------------------------- /configs/benchmark_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/configs/benchmark_config.yaml -------------------------------------------------------------------------------- /configs/colab_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/configs/colab_config.yaml -------------------------------------------------------------------------------- /dashboard.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/dashboard.log -------------------------------------------------------------------------------- /docs/.Rhistory: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/api_docs.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/docs/api_docs.md -------------------------------------------------------------------------------- /examples/advanced_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/examples/advanced_usage.py -------------------------------------------------------------------------------- /examples/basic_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/examples/basic_usage.py -------------------------------------------------------------------------------- /gemma_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gemma_benchmark/auth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/auth.py -------------------------------------------------------------------------------- /gemma_benchmark/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gemma_benchmark/core/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/core/benchmark.py -------------------------------------------------------------------------------- /gemma_benchmark/core/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/core/interfaces.py -------------------------------------------------------------------------------- /gemma_benchmark/core/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/core/model_loader.py -------------------------------------------------------------------------------- /gemma_benchmark/scripts/download_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/scripts/download_data.py -------------------------------------------------------------------------------- /gemma_benchmark/scripts/run_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/scripts/run_benchmark.py -------------------------------------------------------------------------------- /gemma_benchmark/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gemma_benchmark/tasks/arc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/tasks/arc.py -------------------------------------------------------------------------------- /gemma_benchmark/tasks/efficiency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/tasks/efficiency.py -------------------------------------------------------------------------------- /gemma_benchmark/tasks/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/tasks/gsm8k.py -------------------------------------------------------------------------------- /gemma_benchmark/tasks/humaneval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/tasks/humaneval.py -------------------------------------------------------------------------------- /gemma_benchmark/tasks/mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/tasks/mmlu.py -------------------------------------------------------------------------------- /gemma_benchmark/tasks/truthfulqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/tasks/truthfulqa.py -------------------------------------------------------------------------------- /gemma_benchmark/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gemma_benchmark/utils/config_validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/utils/config_validation.py -------------------------------------------------------------------------------- /gemma_benchmark/utils/data_downloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/utils/data_downloader.py -------------------------------------------------------------------------------- /gemma_benchmark/utils/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/utils/metrics.py -------------------------------------------------------------------------------- /gemma_benchmark/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gemma_benchmark/visualization/charts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/visualization/charts.py -------------------------------------------------------------------------------- /gemma_benchmark/visualization/leaderboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/gemma_benchmark/visualization/leaderboard.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/requirements.txt -------------------------------------------------------------------------------- /src/config/advanced_custom_benchmark_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/src/config/advanced_custom_benchmark_config.yaml -------------------------------------------------------------------------------- /src/config/benchmark_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/src/config/benchmark_config.yaml -------------------------------------------------------------------------------- /src/config/default_benchmark_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/src/config/default_benchmark_config.yaml -------------------------------------------------------------------------------- /src/generate_default_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/src/generate_default_config.py -------------------------------------------------------------------------------- /src/scripts/download_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/src/scripts/download_data.py -------------------------------------------------------------------------------- /src/scripts/run_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/src/scripts/run_benchmark.py -------------------------------------------------------------------------------- /test_colab_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/test_colab_benchmark.py -------------------------------------------------------------------------------- /test_results/demo_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/test_results/demo_visualization.png -------------------------------------------------------------------------------- /test_results/sample_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/test_results/sample_results.json -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test suite for gemma_benchmark package.""" 2 | -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/tests/test_core.py -------------------------------------------------------------------------------- /tests/test_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/tests/test_tasks.py -------------------------------------------------------------------------------- /visualize/dashboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilcheng/gemma-benchmark/HEAD/visualize/dashboard.py --------------------------------------------------------------------------------