├── .gitignore ├── README.md ├── data ├── causal_reasoning_images │ ├── 1.png │ ├── 10.png │ ├── 11.png │ ├── 12.png │ ├── 13.png │ ├── 14.png │ ├── 15.png │ ├── 16.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ └── 9.png ├── data_total.json ├── logical_reasoning_images │ ├── 1.jpg │ ├── 10.png │ ├── 10_ref.png │ ├── 11.png │ ├── 11_ref.png │ ├── 12.png │ ├── 12_ref.png │ ├── 13.png │ ├── 13_ref.png │ ├── 14.png │ ├── 14_ref.png │ ├── 15.png │ ├── 15_ref.png │ ├── 16.png │ ├── 16_ref.png │ ├── 1_ref.png │ ├── 2.png │ ├── 3.jpg │ ├── 3_ref.png │ ├── 4.png │ ├── 4_ref.jpg │ ├── 5.png │ ├── 6.png │ ├── 6_ref.png │ ├── 7.png │ ├── 8.png │ ├── 8_ref.png │ ├── 9.png │ └── 9_ref.png ├── spatial_reasoning_images │ ├── 1.png │ ├── 10.png │ ├── 11.png │ ├── 12.png │ ├── 13.png │ ├── 14.png │ ├── 15.png │ ├── 16.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ └── 9.png └── temporal_reasoning_images │ ├── 1.png │ ├── 10.png │ ├── 11.png │ ├── 12.png │ ├── 13.png │ ├── 14.png │ ├── 15.png │ ├── 16.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ └── 9.png ├── gpt_eval.py ├── images ├── bench1.png ├── judge.png ├── metric_score.png ├── metric_score2.png ├── output.png ├── results.png └── statis.png ├── outputs ├── gemini2 │ └── images │ │ └── logical_reasoning │ │ └── logical_reasoning_1.png └── gpt-4o-native │ └── images │ └── temporal_reasoning │ └── temporal_reasoning_1.png └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | outputs -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/README.md -------------------------------------------------------------------------------- /data/causal_reasoning_images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/1.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/10.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/11.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/12.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/13.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/14.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/15.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/16.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/2.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/3.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/4.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/5.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/6.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/7.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/8.png -------------------------------------------------------------------------------- /data/causal_reasoning_images/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/causal_reasoning_images/9.png -------------------------------------------------------------------------------- /data/data_total.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/data_total.json -------------------------------------------------------------------------------- /data/logical_reasoning_images/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/1.jpg -------------------------------------------------------------------------------- /data/logical_reasoning_images/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/10.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/10_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/10_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/11.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/11_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/11_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/12.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/12_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/12_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/13.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/13_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/13_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/14.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/14_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/14_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/15.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/15_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/15_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/16.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/16_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/16_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/1_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/1_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/2.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/3.jpg -------------------------------------------------------------------------------- /data/logical_reasoning_images/3_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/3_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/4.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/4_ref.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/4_ref.jpg -------------------------------------------------------------------------------- /data/logical_reasoning_images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/5.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/6.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/6_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/6_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/7.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/8.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/8_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/8_ref.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/9.png -------------------------------------------------------------------------------- /data/logical_reasoning_images/9_ref.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/logical_reasoning_images/9_ref.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/1.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/10.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/11.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/12.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/13.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/14.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/15.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/16.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/2.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/3.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/4.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/5.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/6.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/7.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/8.png -------------------------------------------------------------------------------- /data/spatial_reasoning_images/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/spatial_reasoning_images/9.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/1.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/10.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/11.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/12.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/13.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/14.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/15.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/16.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/2.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/3.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/4.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/5.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/6.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/7.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/8.png -------------------------------------------------------------------------------- /data/temporal_reasoning_images/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/data/temporal_reasoning_images/9.png -------------------------------------------------------------------------------- /gpt_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/gpt_eval.py -------------------------------------------------------------------------------- /images/bench1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/images/bench1.png -------------------------------------------------------------------------------- /images/judge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/images/judge.png -------------------------------------------------------------------------------- /images/metric_score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/images/metric_score.png -------------------------------------------------------------------------------- /images/metric_score2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/images/metric_score2.png -------------------------------------------------------------------------------- /images/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/images/output.png -------------------------------------------------------------------------------- /images/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/images/results.png -------------------------------------------------------------------------------- /images/statis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/images/statis.png -------------------------------------------------------------------------------- /outputs/gemini2/images/logical_reasoning/logical_reasoning_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/outputs/gemini2/images/logical_reasoning/logical_reasoning_1.png -------------------------------------------------------------------------------- /outputs/gpt-4o-native/images/temporal_reasoning/temporal_reasoning_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/outputs/gpt-4o-native/images/temporal_reasoning/temporal_reasoning_1.png -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PhoenixZ810/RISEBench/HEAD/utils.py --------------------------------------------------------------------------------