├── .github └── workflows │ └── actions.yml ├── .gitignore ├── LICENSE ├── README.md ├── data ├── .gitignore ├── generations │ ├── README.md │ ├── claude-3-5-sonnet-20241022.jsonl │ ├── claude-3-haiku-20240307.jsonl │ ├── claude-3-opus-20240229.jsonl │ ├── gemini-1.5-flash-002.jsonl │ ├── gemini-1.5-flash-8b-001.jsonl │ ├── gemini-1.5-pro-002.jsonl │ ├── gemini-2.0-flash-exp.jsonl │ ├── gemini_exp_1206.jsonl │ ├── gpt-4o-2024-11-20.jsonl │ ├── gpt-4o-mini.jsonl │ ├── grok-2-vision-1212.jsonl │ ├── grok-vision-beta.jsonl │ ├── known-bad.jsonl │ ├── known-good.jsonl │ ├── mistralai-pixtral-12b-2409.jsonl │ ├── mistralai-pixtral-large-instruct-2411.jsonl │ └── reka-flash-20241127.jsonl ├── results │ └── reka-core-20240415-evaluator │ │ ├── claude-3-5-sonnet-20241022.jsonl │ │ ├── claude-3-5-sonnet-20241022_summary.jsonl │ │ ├── claude-3-haiku-20240307.jsonl │ │ ├── claude-3-haiku-20240307_summary.jsonl │ │ ├── claude-3-opus-20240229.jsonl │ │ ├── claude-3-opus-20240229_summary.jsonl │ │ ├── gemini-1.5-flash-002.jsonl │ │ ├── gemini-1.5-flash-002_summary.jsonl │ │ ├── gemini-1.5-flash-8b-001.jsonl │ │ ├── gemini-1.5-flash-8b-001_summary.jsonl │ │ ├── gemini-1.5-pro-002.jsonl │ │ ├── gemini-1.5-pro-002_summary.jsonl │ │ ├── gemini-2.0-flash-exp.jsonl │ │ ├── gemini-2.0-flash-exp_summary.jsonl │ │ ├── gpt-4o-2024-11-20.jsonl │ │ ├── gpt-4o-2024-11-20_summary.jsonl │ │ ├── gpt4o-mini.jsonl │ │ ├── gpt4o-mini_summary.jsonl │ │ ├── grok-2-vision-1212.jsonl │ │ ├── grok-2-vision-1212_summary.jsonl │ │ ├── grok-vision-beta.jsonl │ │ ├── grok-vision-beta_summary.jsonl │ │ ├── mistralai-pixtral-12b-2409.jsonl │ │ ├── mistralai-pixtral-12b-2409_summary.jsonl │ │ ├── mistralai-pixtral-large-instruct-2411.jsonl │ │ ├── mistralai-pixtral-large-instruct-2411_summary.jsonl │ │ ├── reka-flash-20241127.jsonl │ │ └── reka-flash-20241127_summary.jsonl └── vibe-eval.v1.jsonl ├── evaluate.py ├── figure.png ├── models ├── base_model.py ├── claude_models.py ├── gemini_models.py ├── generate.py ├── openai_models.py ├── pixtral_models.py ├── pixtral_server.py ├── reka_models.py ├── utils.py └── xai_models.py ├── requirements.txt ├── tests ├── test_data.py ├── test_evaluate.py └── test_requirements.txt ├── visualizer.jpeg └── visualizer └── index.html /.github/workflows/actions.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/.github/workflows/actions.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/README.md -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | images 2 | *.tar.gz 3 | -------------------------------------------------------------------------------- /data/generations/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/README.md -------------------------------------------------------------------------------- /data/generations/claude-3-5-sonnet-20241022.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/claude-3-5-sonnet-20241022.jsonl -------------------------------------------------------------------------------- /data/generations/claude-3-haiku-20240307.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/claude-3-haiku-20240307.jsonl -------------------------------------------------------------------------------- /data/generations/claude-3-opus-20240229.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/claude-3-opus-20240229.jsonl -------------------------------------------------------------------------------- /data/generations/gemini-1.5-flash-002.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/gemini-1.5-flash-002.jsonl -------------------------------------------------------------------------------- /data/generations/gemini-1.5-flash-8b-001.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/gemini-1.5-flash-8b-001.jsonl -------------------------------------------------------------------------------- /data/generations/gemini-1.5-pro-002.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/gemini-1.5-pro-002.jsonl -------------------------------------------------------------------------------- /data/generations/gemini-2.0-flash-exp.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/gemini-2.0-flash-exp.jsonl -------------------------------------------------------------------------------- /data/generations/gemini_exp_1206.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/generations/gpt-4o-2024-11-20.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/gpt-4o-2024-11-20.jsonl -------------------------------------------------------------------------------- /data/generations/gpt-4o-mini.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/gpt-4o-mini.jsonl -------------------------------------------------------------------------------- /data/generations/grok-2-vision-1212.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/grok-2-vision-1212.jsonl -------------------------------------------------------------------------------- /data/generations/grok-vision-beta.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/grok-vision-beta.jsonl -------------------------------------------------------------------------------- /data/generations/known-bad.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/known-bad.jsonl -------------------------------------------------------------------------------- /data/generations/known-good.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/known-good.jsonl -------------------------------------------------------------------------------- /data/generations/mistralai-pixtral-12b-2409.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/mistralai-pixtral-12b-2409.jsonl -------------------------------------------------------------------------------- /data/generations/mistralai-pixtral-large-instruct-2411.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/mistralai-pixtral-large-instruct-2411.jsonl -------------------------------------------------------------------------------- /data/generations/reka-flash-20241127.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/generations/reka-flash-20241127.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/claude-3-5-sonnet-20241022.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/claude-3-5-sonnet-20241022.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/claude-3-5-sonnet-20241022_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/claude-3-5-sonnet-20241022_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/claude-3-haiku-20240307.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/claude-3-haiku-20240307.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/claude-3-haiku-20240307_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/claude-3-haiku-20240307_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/claude-3-opus-20240229.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/claude-3-opus-20240229.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/claude-3-opus-20240229_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/claude-3-opus-20240229_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gemini-1.5-flash-002.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gemini-1.5-flash-002.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gemini-1.5-flash-002_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gemini-1.5-flash-002_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gemini-1.5-flash-8b-001.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gemini-1.5-flash-8b-001.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gemini-1.5-flash-8b-001_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gemini-1.5-flash-8b-001_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gemini-1.5-pro-002.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gemini-1.5-pro-002.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gemini-1.5-pro-002_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gemini-1.5-pro-002_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gemini-2.0-flash-exp.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gemini-2.0-flash-exp.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gemini-2.0-flash-exp_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gemini-2.0-flash-exp_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gpt-4o-2024-11-20.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gpt-4o-2024-11-20.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gpt-4o-2024-11-20_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gpt-4o-2024-11-20_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gpt4o-mini.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gpt4o-mini.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/gpt4o-mini_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/gpt4o-mini_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/grok-2-vision-1212.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/grok-2-vision-1212.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/grok-2-vision-1212_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/grok-2-vision-1212_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/grok-vision-beta.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/grok-vision-beta_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/grok-vision-beta_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/mistralai-pixtral-12b-2409.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/mistralai-pixtral-12b-2409.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/mistralai-pixtral-12b-2409_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/mistralai-pixtral-12b-2409_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/mistralai-pixtral-large-instruct-2411.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/mistralai-pixtral-large-instruct-2411.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/mistralai-pixtral-large-instruct-2411_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/mistralai-pixtral-large-instruct-2411_summary.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/reka-flash-20241127.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/reka-flash-20241127.jsonl -------------------------------------------------------------------------------- /data/results/reka-core-20240415-evaluator/reka-flash-20241127_summary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/results/reka-core-20240415-evaluator/reka-flash-20241127_summary.jsonl -------------------------------------------------------------------------------- /data/vibe-eval.v1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/data/vibe-eval.v1.jsonl -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/evaluate.py -------------------------------------------------------------------------------- /figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/figure.png -------------------------------------------------------------------------------- /models/base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/models/base_model.py -------------------------------------------------------------------------------- /models/claude_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/models/claude_models.py -------------------------------------------------------------------------------- /models/gemini_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/models/gemini_models.py -------------------------------------------------------------------------------- /models/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/models/generate.py -------------------------------------------------------------------------------- /models/openai_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/models/openai_models.py -------------------------------------------------------------------------------- /models/pixtral_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/models/pixtral_models.py -------------------------------------------------------------------------------- /models/pixtral_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/models/pixtral_server.py -------------------------------------------------------------------------------- /models/reka_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/models/reka_models.py -------------------------------------------------------------------------------- /models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/models/utils.py -------------------------------------------------------------------------------- /models/xai_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/models/xai_models.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/requirements.txt -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/tests/test_data.py -------------------------------------------------------------------------------- /tests/test_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/tests/test_evaluate.py -------------------------------------------------------------------------------- /tests/test_requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | -------------------------------------------------------------------------------- /visualizer.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/visualizer.jpeg -------------------------------------------------------------------------------- /visualizer/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reka-ai/reka-vibe-eval/HEAD/visualizer/index.html --------------------------------------------------------------------------------