├── games
    ├── maze
    │   ├── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── file_utils.py
    │   ├── generators
    │   │   ├── __init__.py
    │   │   ├── video_gen.py
    │   │   ├── image_gen.py
    │   │   ├── state_gen.py
    │   │   ├── data_gen.py
    │   │   └── maze_gen.py
    │   ├── README.md
    │   ├── __main__.py
    │   ├── constants.py
    │   ├── templates
    │   │   ├── __init__.py
    │   │   ├── turn_count.py
    │   │   ├── player_position.py
    │   │   ├── goal_position.py
    │   │   ├── base_template.py
    │   │   ├── find_path_to_goal.py
    │   │   ├── position_after_moving.py
    │   │   └── available_directions.py
    │   ├── config.py
    │   ├── test_skin.py
    │   ├── main.py
    │   └── default_textures.py
    ├── __init__.py
    ├── trapfield
    │   ├── __init__.py
    │   ├── config.py
    │   └── constants.py
    ├── maze3d
    │   ├── __init__.py
    │   └── color_handler.py
    ├── pathfinder
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── board.py
    │   └── texture_handler.py
    └── sokoban
    │   ├── __init__.py
    │   └── config.py
├── AutoEnv
    ├── base
    │   ├── __init__.py
    │   ├── engine
    │   │   ├── __init__.py
    │   │   └── cost_monitor.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── image.py
    │   └── pipeline
    │   │   ├── __init__.py
    │   │   ├── base_node.py
    │   │   └── base_pipeline.py
    ├── config
    │   └── env_skin_gen.yaml
    ├── autoenv
    │   └── pipeline
    │   │   ├── __init__.py
    │   │   └── visual
    │   │       ├── __init__.py
    │   │       ├── prompt.py
    │   │       └── pipeline.py
    └── LICENSE
├── skins
    ├── maze
    │   ├── 1
    │   │   ├── wall.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   ├── target.png
    │   │   └── description.json
    │   ├── 2
    │   │   ├── wall.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   ├── target.png
    │   │   └── description.json
    │   ├── 3
    │   │   ├── wall.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   ├── target.png
    │   │   └── description.json
    │   ├── 4
    │   │   ├── wall.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   ├── target.png
    │   │   └── description.json
    │   └── 5
    │   │   ├── wall.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   ├── target.png
    │   │   └── description.json
    ├── sokoban
    │   ├── 1
    │   │   ├── box.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   ├── target.png
    │   │   ├── wall.png
    │   │   └── description.json
    │   ├── 2
    │   │   ├── box.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   ├── target.png
    │   │   ├── wall.png
    │   │   └── description.json
    │   ├── 3
    │   │   ├── box.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   ├── target.png
    │   │   ├── wall.png
    │   │   └── description.json
    │   ├── 4
    │   │   ├── box.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   ├── target.png
    │   │   ├── wall.png
    │   │   └── description.json
    │   └── 5
    │   │   ├── box.jpg
    │   │   ├── floor.jpg
    │   │   ├── player.jpg
    │   │   ├── target.jpg
    │   │   ├── wall.jpg
    │   │   └── description.json
    ├── pathfinder
    │   ├── 1
    │   │   ├── end.png
    │   │   ├── road.png
    │   │   ├── start.png
    │   │   └── description.json
    │   ├── 2
    │   │   ├── end.png
    │   │   ├── road.png
    │   │   ├── start.png
    │   │   └── description.json
    │   ├── 3
    │   │   ├── end.png
    │   │   ├── road.png
    │   │   ├── start.png
    │   │   └── description.json
    │   └── 4
    │   │   ├── end.png
    │   │   ├── road.png
    │   │   ├── start.png
    │   │   └── description.json
    ├── trapfield
    │   ├── 1
    │   │   ├── goal.png
    │   │   ├── trap.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   └── description.json
    │   ├── 2
    │   │   ├── goal.png
    │   │   ├── trap.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   └── description.json
    │   ├── 3
    │   │   ├── goal.png
    │   │   ├── trap.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   └── description.json
    │   └── 4
    │   │   ├── goal.png
    │   │   ├── trap.png
    │   │   ├── floor.png
    │   │   ├── player.png
    │   │   └── description.json
    └── maze3d
    │   ├── 1
    │       ├── colors.json
    │       └── description.json
    │   ├── 2
    │       ├── colors.json
    │       └── description.json
    │   ├── 3
    │       ├── colors.json
    │       └── description.json
    │   └── 4
    │       ├── colors.json
    │       └── description.json
├── scripts
    ├── vlm_evaluate.sh
    ├── generate_by_skins.sh
    ├── generate_videos.sh
    ├── Wan2.2-TI2V-5B_lora.py
    ├── start_sglang_server.sh
    └── videomodel_evaluate.sh
├── core
    ├── schema
    │   ├── __init__.py
    │   ├── grid.py
    │   ├── render.py
    │   ├── entity.py
    │   ├── position.py
    │   └── state.py
    ├── __init__.py
    ├── constants.py
    ├── game_adapter.py
    └── texture_handler.py
├── utils
    ├── __init__.py
    └── video_metadata.py
├── generation
    └── __init__.py
├── evaluation
    ├── vlm_eval
    │   ├── executors
    │   │   ├── __init__.py
    │   │   ├── maze_executor.py
    │   │   └── trapfield_executor.py
    │   ├── __init__.py
    │   ├── game_executor.py
    │   ├── prompts
    │   │   ├── trapfield_prompt.py
    │   │   ├── maze_prompt.py
    │   │   ├── sokoban_prompt.py
    │   │   ├── pathfinder_prompt.py
    │   │   ├── maze3d_prompt.py
    │   │   └── __init__.py
    │   ├── vlm_client.py
    │   ├── MODEL_CONFIG.md
    │   ├── action_metrics.py
    │   ├── run_vlm_eval.py
    │   ├── recalculate_avg_step.py
    │   └── action_utils.py
    ├── videomodel_eval
    │   ├── __init__.py
    │   └── evaluator.py
    └── __init__.py
├── config
    ├── config_maze.yaml
    ├── vlm
    │   ├── maze3d_eval.yaml
    │   ├── sokoban_eval.yaml
    │   ├── trapfield_eval.yaml
    │   ├── maze_eval.yaml
    │   └── pathfinder_eval.yaml
    ├── config_trapfield.yaml
    ├── config_3d_maze.yaml
    ├── config_pathfinder.yaml
    └── config.yaml
├── .env.example
├── LICENSE
├── requirements.txt
├── .gitignore
├── prompts
    ├── videomodel_pathfinder_prompt.py
    ├── videomodel_maze_prompt.py
    ├── videomodel_trapfield_prompt.py
    ├── videomodel_sokoban_prompt.py
    ├── videomodel_maze3d_prompt.py
    ├── __init__.py
    └── METADATA_USAGE.md
└── dataset_init.py


/games/maze/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/games/maze/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/games/maze/generators/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/AutoEnv/base/__init__.py:
--------------------------------------------------------------------------------
1 | # Base package initializer
2 | 


--------------------------------------------------------------------------------
/AutoEnv/base/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Engine subpackage
2 | 


--------------------------------------------------------------------------------
/AutoEnv/base/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Utils subpackage
2 | 


--------------------------------------------------------------------------------
/AutoEnv/base/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | # Pipeline subpackage
2 | 


--------------------------------------------------------------------------------
/games/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Game modules using unified core.
3 | """
4 | 
5 | 


--------------------------------------------------------------------------------
/games/maze/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/games/maze/README.md


--------------------------------------------------------------------------------
/skins/maze/1/wall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/1/wall.png


--------------------------------------------------------------------------------
/skins/maze/2/wall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/2/wall.png


--------------------------------------------------------------------------------
/skins/maze/3/wall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/3/wall.png


--------------------------------------------------------------------------------
/skins/maze/4/wall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/4/wall.png


--------------------------------------------------------------------------------
/skins/maze/5/wall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/5/wall.png


--------------------------------------------------------------------------------
/skins/maze/1/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/1/floor.png


--------------------------------------------------------------------------------
/skins/maze/1/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/1/player.png


--------------------------------------------------------------------------------
/skins/maze/1/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/1/target.png


--------------------------------------------------------------------------------
/skins/maze/2/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/2/floor.png


--------------------------------------------------------------------------------
/skins/maze/2/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/2/player.png


--------------------------------------------------------------------------------
/skins/maze/2/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/2/target.png


--------------------------------------------------------------------------------
/skins/maze/3/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/3/floor.png


--------------------------------------------------------------------------------
/skins/maze/3/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/3/player.png


--------------------------------------------------------------------------------
/skins/maze/3/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/3/target.png


--------------------------------------------------------------------------------
/skins/maze/4/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/4/floor.png


--------------------------------------------------------------------------------
/skins/maze/4/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/4/player.png


--------------------------------------------------------------------------------
/skins/maze/4/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/4/target.png


--------------------------------------------------------------------------------
/skins/maze/5/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/5/floor.png


--------------------------------------------------------------------------------
/skins/maze/5/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/5/player.png


--------------------------------------------------------------------------------
/skins/maze/5/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/5/target.png


--------------------------------------------------------------------------------
/skins/sokoban/1/box.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/1/box.png


--------------------------------------------------------------------------------
/skins/sokoban/2/box.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/2/box.png


--------------------------------------------------------------------------------
/skins/sokoban/3/box.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/3/box.png


--------------------------------------------------------------------------------
/skins/sokoban/4/box.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/4/box.png


--------------------------------------------------------------------------------
/skins/sokoban/5/box.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/5/box.jpg


--------------------------------------------------------------------------------
/skins/pathfinder/1/end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/1/end.png


--------------------------------------------------------------------------------
/skins/pathfinder/2/end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/2/end.png


--------------------------------------------------------------------------------
/skins/pathfinder/3/end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/3/end.png


--------------------------------------------------------------------------------
/skins/pathfinder/4/end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/4/end.png


--------------------------------------------------------------------------------
/skins/sokoban/1/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/1/floor.png


--------------------------------------------------------------------------------
/skins/sokoban/1/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/1/player.png


--------------------------------------------------------------------------------
/skins/sokoban/1/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/1/target.png


--------------------------------------------------------------------------------
/skins/sokoban/1/wall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/1/wall.png


--------------------------------------------------------------------------------
/skins/sokoban/2/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/2/floor.png


--------------------------------------------------------------------------------
/skins/sokoban/2/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/2/player.png


--------------------------------------------------------------------------------
/skins/sokoban/2/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/2/target.png


--------------------------------------------------------------------------------
/skins/sokoban/2/wall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/2/wall.png


--------------------------------------------------------------------------------
/skins/sokoban/3/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/3/floor.png


--------------------------------------------------------------------------------
/skins/sokoban/3/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/3/player.png


--------------------------------------------------------------------------------
/skins/sokoban/3/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/3/target.png


--------------------------------------------------------------------------------
/skins/sokoban/3/wall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/3/wall.png


--------------------------------------------------------------------------------
/skins/sokoban/4/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/4/floor.png


--------------------------------------------------------------------------------
/skins/sokoban/4/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/4/player.png


--------------------------------------------------------------------------------
/skins/sokoban/4/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/4/target.png


--------------------------------------------------------------------------------
/skins/sokoban/4/wall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/4/wall.png


--------------------------------------------------------------------------------
/skins/sokoban/5/floor.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/5/floor.jpg


--------------------------------------------------------------------------------
/skins/sokoban/5/player.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/5/player.jpg


--------------------------------------------------------------------------------
/skins/sokoban/5/target.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/5/target.jpg


--------------------------------------------------------------------------------
/skins/sokoban/5/wall.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/5/wall.jpg


--------------------------------------------------------------------------------
/skins/trapfield/1/goal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/1/goal.png


--------------------------------------------------------------------------------
/skins/trapfield/1/trap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/1/trap.png


--------------------------------------------------------------------------------
/skins/trapfield/2/goal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/2/goal.png


--------------------------------------------------------------------------------
/skins/trapfield/2/trap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/2/trap.png


--------------------------------------------------------------------------------
/skins/trapfield/3/goal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/3/goal.png


--------------------------------------------------------------------------------
/skins/trapfield/3/trap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/3/trap.png


--------------------------------------------------------------------------------
/skins/trapfield/4/goal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/4/goal.png


--------------------------------------------------------------------------------
/skins/trapfield/4/trap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/4/trap.png


--------------------------------------------------------------------------------
/skins/pathfinder/1/road.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/1/road.png


--------------------------------------------------------------------------------
/skins/pathfinder/1/start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/1/start.png


--------------------------------------------------------------------------------
/skins/pathfinder/2/road.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/2/road.png


--------------------------------------------------------------------------------
/skins/pathfinder/2/start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/2/start.png


--------------------------------------------------------------------------------
/skins/pathfinder/3/road.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/3/road.png


--------------------------------------------------------------------------------
/skins/pathfinder/3/start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/3/start.png


--------------------------------------------------------------------------------
/skins/pathfinder/4/road.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/4/road.png


--------------------------------------------------------------------------------
/skins/pathfinder/4/start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/4/start.png


--------------------------------------------------------------------------------
/skins/trapfield/1/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/1/floor.png


--------------------------------------------------------------------------------
/skins/trapfield/1/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/1/player.png


--------------------------------------------------------------------------------
/skins/trapfield/2/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/2/floor.png


--------------------------------------------------------------------------------
/skins/trapfield/2/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/2/player.png


--------------------------------------------------------------------------------
/skins/trapfield/3/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/3/floor.png


--------------------------------------------------------------------------------
/skins/trapfield/3/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/3/player.png


--------------------------------------------------------------------------------
/skins/trapfield/4/floor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/4/floor.png


--------------------------------------------------------------------------------
/skins/trapfield/4/player.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/4/player.png


--------------------------------------------------------------------------------
/games/maze/__main__.py:
--------------------------------------------------------------------------------
1 | from .main import main
2 | 
3 | if __name__ == "__main__":
4 |     raise SystemExit(main())
5 | 


--------------------------------------------------------------------------------
/games/trapfield/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | TrapField Game - 陷阱场游戏
3 | 类似迷宫，但没有外围墙，内部墙换成陷阱
4 | """
5 | 
6 | __version__ = "1.0.0"
7 | 
8 | 


--------------------------------------------------------------------------------
/scripts/vlm_evaluate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CONFIG_FILE=config/vlm/$your_config$.yaml
4 | 
5 | python -m evaluation.vlm_eval.run_vlm_eval "$CONFIG_FILE"
6 | 
7 | 


--------------------------------------------------------------------------------
/games/maze3d/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Maze3D 游戏模块
3 | """
4 | 
5 | from .main import QAGenerator, PuzzleGenerator
6 | 
7 | __all__ = ['QAGenerator', 'PuzzleGenerator']
8 | 
9 | 


--------------------------------------------------------------------------------
/skins/maze3d/1/colors.json:
--------------------------------------------------------------------------------
1 | {
2 |   "start_pos": "#4444FF",
3 |   "goal_pos": "#FF4444",
4 |   "default_cube": "#888888",
5 |   "ball": "#FFD700",
6 |   "ball_edge": "#FF8C00"
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/skins/maze3d/2/colors.json:
--------------------------------------------------------------------------------
1 | {
2 |   "start_pos": "#750725",
3 |   "goal_pos": "#0DC27D",
4 |   "default_cube": "#34495E",
5 |   "ball": "#9C0BD0",
6 |   "ball_edge": "#C40E75"
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/skins/maze3d/3/colors.json:
--------------------------------------------------------------------------------
1 | {
2 |   "start_pos": "#00FF00",
3 |   "goal_pos": "#FF00FF",
4 |   "default_cube": "#CCCCCC",
5 |   "ball": "#00FFFF",
6 |   "ball_edge": "#0088FF"
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/skins/maze3d/4/colors.json:
--------------------------------------------------------------------------------
1 | {
2 |   "start_pos": "#FF6B6B",
3 |   "goal_pos": "#4E5DCD",
4 |   "default_cube": "#95E1D3",
5 |   "ball": "#34F66B",
6 |   "ball_edge": "#0AABA3"
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/skins/pathfinder/1/description.json:
--------------------------------------------------------------------------------
1 | {
2 |   "game_type": "pathfinder",
3 |   "skin_id": "1",
4 |   "visual_description": {
5 |     "start": "green circle",
6 |     "end": "red circle",
7 |     "road": "white"
8 |   }
9 | }


--------------------------------------------------------------------------------
/skins/pathfinder/3/description.json:
--------------------------------------------------------------------------------
1 | {
2 |   "game_type": "pathfinder",
3 |   "skin_id": "3",
4 |   "visual_description": {
5 |     "start": "blue ice ball",
6 |     "end": "blue circle",
7 |     "road": "icy"
8 |   }
9 | }


--------------------------------------------------------------------------------
/scripts/generate_by_skins.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # 根据皮肤批量生成关卡
3 | # 用法: ./scripts/generate_by_skins.sh [config_file]
4 | 
5 | CONFIG_FILE=${1:-config/config.yaml}
6 | 
7 | python generation/batch_generate.py "$CONFIG_FILE"
8 | 
9 | 


--------------------------------------------------------------------------------
/skins/pathfinder/2/description.json:
--------------------------------------------------------------------------------
1 | {
2 |   "game_type": "pathfinder",
3 |   "skin_id": "2",
4 |   "visual_description": {
5 |     "start": "white golf ball",
6 |     "end": "golf hole",
7 |     "road": "green grass"
8 |   }
9 | }


--------------------------------------------------------------------------------
/skins/maze/1/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "maze",
 3 |   "skin_id": "1",
 4 |   "visual_description": {
 5 |     "player": "red circle",
 6 |     "goal": "green square",
 7 |     "wall": "light blue square",
 8 |     "floor": "white square"
 9 |   }
10 | }


--------------------------------------------------------------------------------
/skins/maze/2/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "maze",
 3 |   "skin_id": "2",
 4 |   "visual_description": {
 5 |     "player": "white rabbit",
 6 |     "goal": "orange carrots",
 7 |     "wall": "gray rock",
 8 |     "floor": "green grass tiles"
 9 |   }
10 | }


--------------------------------------------------------------------------------
/skins/maze/5/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "maze",
 3 |   "skin_id": "5",
 4 |   "visual_description": {
 5 |     "player": "green circle",
 6 |     "goal": "red circle",
 7 |     "wall": "blue potion bottle",
 8 |     "floor": "white square"
 9 |   }
10 | }


--------------------------------------------------------------------------------
/skins/pathfinder/4/description.json:
--------------------------------------------------------------------------------
1 | {
2 |   "game_type": "pathfinder",
3 |   "skin_id": "4",
4 |   "visual_description": {
5 |     "start": "white boat with yellow dots",
6 |     "end": "white flag with yellow dots",
7 |     "road": "blue water tiles"
8 |   }
9 | }


--------------------------------------------------------------------------------
/skins/trapfield/1/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "trapfield",
 3 |   "skin_id": "1",
 4 |   "visual_description": {
 5 |     "player": "blue circle",
 6 |     "goal": "green circle",
 7 |     "trap": "red x",
 8 |     "floor": "white square"
 9 |   }
10 | }


--------------------------------------------------------------------------------
/games/pathfinder/__init__.py:
--------------------------------------------------------------------------------
1 | """PathFinder Game - A path finding puzzle game with Bezier curve roads."""
2 | 
3 | from .board import PathFinderBoard
4 | from .generator import generate_pathfinder_board
5 | 
6 | __all__ = ['PathFinderBoard', 'generate_pathfinder_board']
7 | 
8 | 


--------------------------------------------------------------------------------
/skins/trapfield/4/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "trapfield",
 3 |   "skin_id": "4",
 4 |   "visual_description": {
 5 |     "player": "gray robot",
 6 |     "goal": "golden star",
 7 |     "trap": "blue crystal block",
 8 |     "floor": "silver metal plate"
 9 |   }
10 | }


--------------------------------------------------------------------------------
/skins/maze/3/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "maze",
 3 |   "skin_id": "3",
 4 |   "visual_description": {
 5 |     "player": "blue robot head",
 6 |     "goal": "green and yellow tile",
 7 |     "wall": "black brick wall",
 8 |     "floor": "gray decorative tile"
 9 |   }
10 | }


--------------------------------------------------------------------------------
/skins/maze/4/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "maze",
 3 |   "skin_id": "4",
 4 |   "visual_description": {
 5 |     "player": "anime schoolgirl character",
 6 |     "goal": "green square",
 7 |     "wall": "gray stone wall",
 8 |     "floor": "wooden floor tiles"
 9 |   }
10 | }


--------------------------------------------------------------------------------
/skins/trapfield/3/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "trapfield",
 3 |   "skin_id": "3",
 4 |   "visual_description": {
 5 |     "player": "blue adventurer",
 6 |     "goal": "golden eagle emblem",
 7 |     "trap": "fiery explosion",
 8 |     "floor": "gray stone bricks"
 9 |   }
10 | }


--------------------------------------------------------------------------------
/skins/trapfield/2/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "trapfield",
 3 |   "skin_id": "2",
 4 |   "visual_description": {
 5 |     "player": "blue parka explorer and penguin",
 6 |     "goal": "red flag",
 7 |     "trap": "blue water pool",
 8 |     "floor": "blue ice crystals"
 9 |   }
10 | }


--------------------------------------------------------------------------------
/skins/maze3d/1/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "maze3d",
 3 |   "skin_id": "1",
 4 |   "visual_description": {
 5 |     "start_cube": "blue cube",
 6 |     "goal_cube": "red cube",
 7 |     "default_cube": "gray cube",
 8 |     "ball": "golden ball with orange edge"
 9 |   }
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/skins/sokoban/1/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "sokoban",
 3 |   "skin_id": "1",
 4 |   "visual_description": {
 5 |     "player": "blue circle",
 6 |     "goal": "pink square",
 7 |     "box": "yellow square",
 8 |     "wall": "gray square",
 9 |     "floor": "white square"
10 |   }
11 | }


--------------------------------------------------------------------------------
/AutoEnv/config/env_skin_gen.yaml:
--------------------------------------------------------------------------------
 1 | # Image generation model (required)
 2 | image_model: "gemini-2.5-flash-image"
 3 | 
 4 | # Maze mode configuration
 5 | maze_type: "maze"
 6 | theme: "ancient stone with moss and cracks"
 7 | 
 8 | # Output directory
 9 | envs_root_path: "workspace/envs"
10 | 
11 | 


--------------------------------------------------------------------------------
/skins/maze3d/3/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "maze3d",
 3 |   "skin_id": "3",
 4 |   "visual_description": {
 5 |     "start_cube": "bright green cube",
 6 |     "goal_cube": "magenta cube",
 7 |     "default_cube": "light gray cube",
 8 |     "ball": "cyan ball with blue edge"
 9 |   }
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/skins/sokoban/5/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "sokoban",
 3 |   "skin_id": "5",
 4 |   "visual_description": {
 5 |     "player": "brown-haired character",
 6 |     "goal": "green x",
 7 |     "box": "wooden crate",
 8 |     "wall": "orange floor tiles",
 9 |     "floor": "beige sand texture"
10 |   }
11 | }


--------------------------------------------------------------------------------
/skins/maze3d/4/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "maze3d",
 3 |   "skin_id": "4",
 4 |   "visual_description": {
 5 |     "start_cube": "coral red cube",
 6 |     "goal_cube": "royal blue cube",
 7 |     "default_cube": "mint green cube",
 8 |     "ball": "bright green ball with teal edge"
 9 |   }
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/skins/maze3d/2/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "maze3d",
 3 |   "skin_id": "2",
 4 |   "visual_description": {
 5 |     "start_cube": "dark maroon cube",
 6 |     "goal_cube": "teal green cube",
 7 |     "default_cube": "dark slate blue cube",
 8 |     "ball": "purple ball with magenta edge"
 9 |   }
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/skins/sokoban/4/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "sokoban",
 3 |   "skin_id": "4",
 4 |   "visual_description": {
 5 |     "player": "purple wizard",
 6 |     "goal": "blue magic circle",
 7 |     "box": "purple wooden crate",
 8 |     "wall": "purple rune stones",
 9 |     "floor": "gray stone bricks"
10 |   }
11 | }


--------------------------------------------------------------------------------
/skins/sokoban/2/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "sokoban",
 3 |   "skin_id": "2",
 4 |   "visual_description": {
 5 |     "player": "blue worker with red pants",
 6 |     "goal": "golden square pattern",
 7 |     "box": "wooden crate",
 8 |     "wall": "rusty metal door",
 9 |     "floor": "gray stone tiles"
10 |   }
11 | }


--------------------------------------------------------------------------------
/skins/sokoban/3/description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "game_type": "sokoban",
 3 |   "skin_id": "3",
 4 |   "visual_description": {
 5 |     "player": "brown-haired boy with a box",
 6 |     "goal": "yellow and green checkered circle",
 7 |     "box": "wooden crate",
 8 |     "wall": "brown brick wall",
 9 |     "floor": "gray stone floor"
10 |   }
11 | }


--------------------------------------------------------------------------------
/core/schema/__init__.py:
--------------------------------------------------------------------------------
 1 | from .position import Position, BBox
 2 | from .entity import Entity
 3 | from .grid import Grid
 4 | from .render import RenderConfig
 5 | from .state import UnifiedState
 6 | 
 7 | __all__ = [
 8 |     'Position',
 9 |     'BBox',
10 |     'Entity',
11 |     'Grid',
12 |     'RenderConfig',
13 |     'UnifiedState',
14 | ]
15 | 
16 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | VR-Bench 工具模块
 3 | """
 4 | 
 5 | from .video_processor import (
 6 |     VideoProcessor,
 7 |     normalize_video,
 8 |     resize_video_to_frames,
 9 |     get_video_info,
10 | )
11 | 
12 | __all__ = [
13 |     'VideoProcessor',
14 |     'normalize_video',
15 |     'resize_video_to_frames',
16 |     'get_video_info',
17 | ]
18 | 
19 | 


--------------------------------------------------------------------------------
/generation/__init__.py:
--------------------------------------------------------------------------------
 1 | """数据生成系统"""
 2 | 
 3 | from generation.path_finder import (
 4 |     find_optimal_paths,
 5 |     find_maze_paths,
 6 |     find_trapfield_paths,
 7 |     find_pathfinder_paths
 8 | )
 9 | 
10 | __all__ = [
11 |     'find_optimal_paths',
12 |     'find_maze_paths',
13 |     'find_trapfield_paths',
14 |     'find_pathfinder_paths'
15 | ]
16 | 


--------------------------------------------------------------------------------
/AutoEnv/base/utils/image.py:
--------------------------------------------------------------------------------
 1 | """Image utilities."""
 2 | 
 3 | import base64
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | def save_base64_image(img_b64: str, path: Path) -> None:
 8 |     """Save a base64-encoded image to file."""
 9 |     path.parent.mkdir(parents=True, exist_ok=True)
10 |     img_bytes = base64.b64decode(img_b64)
11 |     path.write_bytes(img_bytes)
12 | 
13 | 


--------------------------------------------------------------------------------
/games/trapfield/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | TrapField 游戏配置
 3 | """
 4 | 
 5 | from .constants import EMPTY_CELL, TRAP_CELL, PLAYER_CELL, GOAL_CELL
 6 | 
 7 | # 需要的纹理文件
 8 | REQUIRED_TEXTURES = ['floor', 'trap', 'player', 'goal']
 9 | 
10 | # 单元格到图层的映射
11 | CELL_LAYER_MAP = {
12 |     EMPTY_CELL: 'floor',
13 |     TRAP_CELL: 'trap',
14 |     PLAYER_CELL: 'player',
15 |     GOAL_CELL: 'goal'
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/executors/__init__.py:
--------------------------------------------------------------------------------
1 | from .maze_executor import MazeExecutor
2 | from .sokoban_executor import SokobanExecutor
3 | from .trapfield_executor import TrapFieldExecutor
4 | from .pathfinder_executor import PathfinderExecutor
5 | from .maze3d_executor import Maze3DExecutor
6 | 
7 | __all__ = ['MazeExecutor', 'SokobanExecutor', 'TrapFieldExecutor', 'PathfinderExecutor', 'Maze3DExecutor']
8 | 
9 | 


--------------------------------------------------------------------------------
/games/maze/constants.py:
--------------------------------------------------------------------------------
 1 | EMPTY_CELL = 0
 2 | WALL_CELL = 1
 3 | PLAYER_CELL = 2
 4 | GOAL_CELL = 3
 5 | 
 6 | CELL_SIZE = 30
 7 | 
 8 | IMAGES_DIR = "images"
 9 | STATES_DIR = "states"
10 | VIDEOS_DIR = "video"
11 | DATA_PATH = "data.json"
12 | 
13 | ALLOWED_SIZES = [9, 11, 13]
14 | SIZE_LABELS = ["Small", "Medium", "Large"]
15 | 
16 | PLOT_LEVELS = {
17 |     9: "Easy",
18 |     11: "Medium",
19 |     13: "Hard",
20 | }
21 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/__init__.py:
--------------------------------------------------------------------------------
 1 | from .vlm_client import VLMClient
 2 | from .vlm_evaluator import VLMEvaluator
 3 | from .game_executor import GameExecutor
 4 | from .action_utils import parse_actions
 5 | from .action_metrics import calculate_sr, calculate_pr, calculate_mr
 6 | 
 7 | __all__ = [
 8 |     'VLMClient',
 9 |     'VLMEvaluator',
10 |     'GameExecutor',
11 |     'parse_actions',
12 |     'calculate_sr',
13 |     'calculate_pr',
14 |     'calculate_mr',
15 | ]
16 | 
17 | 


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Core module for game rendering and texture handling.
 3 | Shared by all games (Sokoban, Maze, etc.)
 4 | """
 5 | 
 6 | from .constants import *
 7 | from .texture_handler import BaseTextureHandler
 8 | from .renderer import BaseRenderer
 9 | 
10 | __all__ = [
11 |     'BaseTextureHandler',
12 |     'BaseRenderer',
13 |     'EMPTY',
14 |     'WALL',
15 |     'PLAYER',
16 |     'TARGET',
17 |     'BOX',
18 |     'BOX_ON_TARGET',
19 |     'PLAYER_ON_TARGET',
20 | ]
21 | 
22 | 


--------------------------------------------------------------------------------
/games/maze/templates/__init__.py:
--------------------------------------------------------------------------------
 1 | from .available_directions import AvailableDirections
 2 | from .player_position import PlayerPosition
 3 | from .goal_position import GoalPosition
 4 | from .position_after_moving import PositionAfterMoving
 5 | from .find_path_to_goal import FindPathToGoal
 6 | from .turn_count import TurnCount
 7 | 
 8 | __all__ = [
 9 |     "AvailableDirections",
10 |     "PlayerPosition",
11 |     "GoalPosition",
12 |     "PositionAfterMoving",
13 |     "FindPathToGoal",
14 |     "TurnCount",
15 | ]
16 | 


--------------------------------------------------------------------------------
/AutoEnv/autoenv/pipeline/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | AutoEnv Pipeline Module
 3 | Maze mode visual pipeline exports.
 4 | """
 5 | 
 6 | from autoenv.pipeline.visual import (
 7 |     AnalyzerNode,
 8 |     AssetGeneratorNode,
 9 |     AutoEnvContext,
10 |     BackgroundRemovalNode,
11 |     StrategistNode,
12 |     VisualPipeline,
13 | )
14 | 
15 | __all__ = [
16 |     "VisualPipeline",
17 |     "AutoEnvContext",
18 |     "AnalyzerNode",
19 |     "StrategistNode",
20 |     "AssetGeneratorNode",
21 |     "BackgroundRemovalNode",
22 | ]
23 | 


--------------------------------------------------------------------------------
/config/config_maze.yaml:
--------------------------------------------------------------------------------
 1 | # Maze 数据集生成配置
 2 | 
 3 | # 游戏类型
 4 | game_type: "maze"
 5 | 
 6 | # 皮肤文件夹根目录
 7 | skins_root: "skins/maze"
 8 | # 输出根目录
 9 | output_root: "dataset_output/generated_levels_maze"
10 | 
11 | # Maze 难度配置
12 | difficulties:
13 |   easy:
14 |     maze_size: 7
15 |     count: 120
16 | 
17 |   medium:
18 |     maze_size: 11
19 |     count: 120
20 | 
21 |   hard:
22 |     maze_size: 15
23 |     count: 120
24 | 
25 | # 生成配置
26 | generation:
27 |   fps: 24  
28 |   max_duplicate_retries: 100
29 | 
30 | # 并行配置
31 | parallel:
32 |   max_workers: 4
33 | 
34 | 


--------------------------------------------------------------------------------
/games/maze/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Maze game configuration.
 3 | """
 4 | 
 5 | from core.constants import *
 6 | 
 7 | # Required textures for Maze
 8 | REQUIRED_TEXTURES = ['floor', 'wall', 'player', 'target']
 9 | 
10 | # Cell type to layer and texture mapping
11 | # Format: cell_value -> (layer, texture_name)
12 | # Layer 1: floor (handled separately)
13 | # Layer 2: walls and targets (goals)
14 | # Layer 3: player
15 | CELL_LAYER_MAP = {
16 |     EMPTY_CELL: (0, None),
17 |     WALL_CELL: (2, 'wall'),
18 |     PLAYER_CELL: (3, 'player'),
19 |     GOAL_CELL: (2, 'target')
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/scripts/generate_videos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # 为数据集生成所有最优路径的视频
 3 | # 用法: ./scripts/generate_videos.sh <dataset_dir> [workers]
 4 | # 示例: ./scripts/generate_videos.sh dataset/maze/1
 5 | 
 6 | DATASET_DIR=${1:-dataset/maze/1/easy}
 7 | WORKERS=${2:-8}
 8 | SKIN=/data/pengyiran/cvpr_v1/skins/maze/1
 9 | if [ ! -d "$DATASET_DIR" ]; then
10 |     echo "错误: 数据集目录不存在: $DATASET_DIR"
11 |     exit 1
12 | fi
13 | 
14 | python generation/generate_videos.py \
15 |     "$DATASET_DIR" \
16 |     --workers "$WORKERS" \
17 |     --skin "$SKIN" \
18 |     2>&1 | grep -v "^Processing" | grep -v "^Frame"
19 | 
20 | 


--------------------------------------------------------------------------------
/AutoEnv/autoenv/pipeline/visual/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Visual Pipeline Module
 3 | Maze mode skin generation pipeline.
 4 | """
 5 | 
 6 | from autoenv.pipeline.visual.nodes import (
 7 |     AnalyzerNode,
 8 |     AssetGeneratorNode,
 9 |     AutoEnvContext,
10 |     BackgroundRemovalNode,
11 |     StrategistNode,
12 | )
13 | from autoenv.pipeline.visual.pipeline import VisualPipeline
14 | 
15 | __all__ = [
16 |     "AnalyzerNode",
17 |     "AssetGeneratorNode",
18 |     "AutoEnvContext",
19 |     "BackgroundRemovalNode",
20 |     "StrategistNode",
21 |     "VisualPipeline",
22 | ]
23 | 


--------------------------------------------------------------------------------
/games/sokoban/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sokoban game module.
 3 | 
 4 | This module provides Sokoban game logic, rendering, and utilities.
 5 | """
 6 | 
 7 | from .board import SokobanBoard, generate_random_board
 8 | from .textured_board import TexturedSokobanBoard, generate_textured_random_board, get_shared_texture_handler
 9 | from .renderer import SokobanRenderer, get_shared_renderer
10 | 
11 | __all__ = [
12 |     'SokobanBoard',
13 |     'generate_random_board',
14 |     'TexturedSokobanBoard',
15 |     'generate_textured_random_board',
16 |     'get_shared_texture_handler',
17 |     'SokobanRenderer',
18 |     'get_shared_renderer',
19 | ]
20 | 
21 | 


--------------------------------------------------------------------------------
/config/vlm/maze3d_eval.yaml:
--------------------------------------------------------------------------------
 1 | game: maze3d
 2 | dataset: dataset/maze3d_new/1
 3 | output: vlm_eval_results/maze3d
 4 | 
 5 | models:
 6 |   # API 模型示例
 7 |   - name: Qwen/Qwen2.5-VL-7B-Instruct
 8 |     type: api
 9 |     base_url: http://localhost:8123/v1
10 |     max_tokens: 10000
11 |     temperature: 1.0
12 | 
13 |   - name: gpt-5
14 |     type: api
15 |     base_url: https://newapi.deepwisdom.ai/v1
16 |     max_tokens: 60000
17 |     temperature: 1.0
18 |   - name: gemini-2.5-pro
19 |     type: api
20 |     base_url: https://newapi.deepwisdom.ai/v1
21 |     max_tokens: 60000
22 |     temperature: 1.0
23 | 
24 | workers: 10
25 | max_levels: -1
26 | 
27 | 


--------------------------------------------------------------------------------
/config/vlm/sokoban_eval.yaml:
--------------------------------------------------------------------------------
 1 | game: sokoban
 2 | dataset: dataset/sokoban/1
 3 | output: vlm_eval_results/sokoban
 4 | 
 5 | models:
 6 |   - name: gpt-5
 7 |     type: api
 8 |     base_url: https://newapi.deepwisdom.ai/v1
 9 |     max_tokens: 60000
10 |     temperature: 1.0
11 |   - name: gemini-2.5-pro
12 |     type: api
13 |     base_url: https://newapi.deepwisdom.ai/v1
14 |     max_tokens: 60000
15 |     temperature: 1.0
16 |   - name: Qwen/Qwen2.5-VL-7B-Instruct
17 |     type: api
18 |     base_url: http://localhost:8123/v1
19 |     max_tokens: 10000
20 |     temperature: 1.0
21 | 
22 | workers: 10
23 | max_levels: -1
24 | assets_folder: skins/sokoban/1
25 | 
26 | 


--------------------------------------------------------------------------------
/config/vlm/trapfield_eval.yaml:
--------------------------------------------------------------------------------
 1 | game: trapfield
 2 | dataset: dataset/trapfield/1
 3 | output: vlm_eval_results/trapfield
 4 | 
 5 | models:
 6 |   - name: gpt-5
 7 |     type: api
 8 |     base_url: https://newapi.deepwisdom.ai/v1
 9 |     max_tokens: 60000
10 |     temperature: 1.0
11 |   - name: gemini-2.5-pro
12 |     type: api
13 |     base_url: https://newapi.deepwisdom.ai/v1
14 |     max_tokens: 60000
15 |     temperature: 1.0
16 |   - name: Qwen/Qwen2.5-VL-7B-Instruct
17 |     type: api
18 |     base_url: http://localhost:8123/v1
19 |     max_tokens: 10000
20 |     temperature: 1.0
21 | 
22 | workers: 10
23 | max_levels: -1
24 | assets_folder: skins/trapfield/1
25 | 
26 | 


--------------------------------------------------------------------------------
/games/trapfield/constants.py:
--------------------------------------------------------------------------------
 1 | """
 2 | TrapField 游戏常量定义
 3 | """
 4 | 
 5 | # 单元格类型
 6 | EMPTY_CELL = 0      # 空地（可以走）
 7 | TRAP_CELL = 1       # 陷阱（踩上去游戏结束）
 8 | PLAYER_CELL = 2     # 玩家起点
 9 | GOAL_CELL = 3       # 目标终点
10 | 
11 | # 渲染配置
12 | CELL_SIZE = 64      # 每个单元格的像素大小
13 | 
14 | # 难度配置
15 | DIFFICULTY_CONFIG = {
16 |     'easy': {
17 |         'grid_size': 7,      # 7x7 网格
18 |         'trap_density': 0.2, # 20% 陷阱密度
19 |     },
20 |     'medium': {
21 |         'grid_size': 11,     # 11x11 网格
22 |         'trap_density': 0.3, # 30% 陷阱密度
23 |     },
24 |     'hard': {
25 |         'grid_size': 15,     # 15x15 网格
26 |         'trap_density': 0.35,# 35% 陷阱密度
27 |     }
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/games/maze/generators/video_gen.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Iterable, Optional, Sequence, Tuple
 4 | 
 5 | from ..renderer import get_shared_renderer
 6 | 
 7 | Coordinate = Tuple[int, int]
 8 | 
 9 | 
10 | class PillowNotInstalledError(RuntimeError):
11 |     pass
12 | 
13 | 
14 | def create_solution_video(
15 |     maze: Sequence[Sequence[int]],
16 |     path: Iterable[Coordinate],
17 |     cell_size: int,
18 |     save_path: str,
19 |     frame_duration_ms: int = 300,
20 |     assets_folder: Optional[str] = None,
21 | ) -> None:
22 |     renderer = get_shared_renderer(assets_folder)
23 |     renderer.render_video(maze, path, save_path, frame_duration_ms)
24 | 


--------------------------------------------------------------------------------
/evaluation/videomodel_eval/__init__.py:
--------------------------------------------------------------------------------
 1 | from .evaluator import TrajectoryEvaluator
 2 | from .extractor import CSRTTracker
 3 | from .metrics import (
 4 |     PrecisionRateMetric,
 5 |     StepMetric,
 6 |     ExactMatchMetric,
 7 |     normalize_trajectory,
 8 |     resample_by_length,
 9 |     compute_path_length
10 | )
11 | from .utils import get_video_info, draw_trajectory_comparison
12 | 
13 | __all__ = [
14 |     'TrajectoryEvaluator',
15 |     'CSRTTracker',
16 |     'PrecisionRateMetric',
17 |     'StepMetric',
18 |     'ExactMatchMetric',
19 |     'normalize_trajectory',
20 |     'resample_by_length',
21 |     'compute_path_length',
22 |     'get_video_info',
23 |     'draw_trajectory_comparison',
24 | ]
25 | 
26 | 


--------------------------------------------------------------------------------
/config/vlm/maze_eval.yaml:
--------------------------------------------------------------------------------
 1 | game: maze
 2 | dataset: dataset/maze/1
 3 | output: vlm_eval_results/maze
 4 | 
 5 | models:
 6 |   # 使用 SGLang 服务（先运行 bash scripts/start_sglang_server.sh）
 7 |   - name: Qwen/Qwen2.5-VL-7B-Instruct
 8 |     type: api
 9 |     base_url: http://localhost:8123/v1
10 |     max_tokens: 10000
11 |     temperature: 1.0
12 | 
13 |   # API 模型示例
14 |   - name: gpt-5
15 |     type: api
16 |     base_url: https://newapi.deepwisdom.ai/v1
17 |     max_tokens: 60000
18 |     temperature: 1.0
19 |   - name: gemini-2.5-pro
20 |     type: api
21 |     base_url: https://newapi.deepwisdom.ai/v1
22 |     max_tokens: 60000
23 |     temperature: 1.0
24 | 
25 | workers: 10
26 | max_levels: -1
27 | assets_folder: skins/maze/1
28 | 
29 | 


--------------------------------------------------------------------------------
/games/maze/generators/image_gen.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Optional, Sequence
 4 | 
 5 | from ..renderer import get_shared_renderer
 6 | 
 7 | 
 8 | class PillowNotInstalledError(RuntimeError):
 9 |     pass
10 | 
11 | 
12 | def draw_maze(maze: Sequence[Sequence[int]], cell_size: int, save_path: str,
13 |               assets_folder: Optional[str] = None) -> None:
14 |     try:
15 |         from PIL import Image
16 |     except ImportError as exc:
17 |         raise PillowNotInstalledError(
18 |             "Pillow is required to render maze images. Install it with 'pip install pillow'."
19 |         ) from exc
20 | 
21 |     renderer = get_shared_renderer(assets_folder)
22 |     renderer.render_maze(maze, save_path)
23 | 


--------------------------------------------------------------------------------
/config/vlm/pathfinder_eval.yaml:
--------------------------------------------------------------------------------
 1 | game: pathfinder
 2 | dataset: dataset/irregular_maze/1
 3 | output: vlm_eval_results/irregular_maze
 4 | 
 5 | models:
 6 |   # 使用 SGLang 服务（先运行 bash scripts/start_sglang_server.sh）
 7 |   # - name: Qwen/Qwen2.5-VL-7B-Instruct
 8 |   #   type: api
 9 |   #   base_url: http://localhost:8123/v1
10 |   #   max_tokens: 10000
11 |   #   temperature: 1.0
12 | 
13 |   # API 模型示例
14 |   - name: gpt-5
15 |     type: api
16 |     base_url: https://newapi.deepwisdom.ai/v1
17 |     max_tokens: 60000
18 |     temperature: 1.0
19 |   - name: gemini-2.5-pro
20 |     type: api
21 |     base_url: https://newapi.deepwisdom.ai/v1
22 |     max_tokens: 60000
23 |     temperature: 1.0
24 | 
25 | workers: 10
26 | max_levels: -1
27 | assets_folder: skins/pathfinder/1
28 | 
29 | 


--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | """评估系统 - 包含视频模型评估和VLM评估"""
 2 | from . import vlm_eval
 3 | from . import videomodel_eval
 4 | 
 5 | # 从videomodel_eval导出
 6 | from evaluation.videomodel_eval.extractor import CSRTTracker
 7 | from evaluation.videomodel_eval.evaluator import TrajectoryEvaluator
 8 | from evaluation.videomodel_eval.metrics import (
 9 |     PrecisionRateMetric,
10 |     StepMetric,
11 |     ExactMatchMetric,
12 |     normalize_trajectory,
13 |     resample_by_length,
14 |     compute_path_length
15 | )
16 | 
17 | __all__ = [
18 |     'vlm_eval',
19 |     'videomodel_eval',
20 |     'CSRTTracker',
21 |     'TrajectoryEvaluator',
22 |     'PrecisionRateMetric',
23 |     'StepMetric',
24 |     'ExactMatchMetric',
25 |     'normalize_trajectory',
26 |     'resample_by_length',
27 |     'compute_path_length'
28 | ]


--------------------------------------------------------------------------------
/core/schema/grid.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Dict
 3 | 
 4 | 
 5 | @dataclass
 6 | class Grid:
 7 |     data: List[List[int]]
 8 |     height: int
 9 |     width: int
10 |     
11 |     def to_dict(self) -> Dict:
12 |         return {
13 |             "data": self.data,
14 |             "height": self.height,
15 |             "width": self.width
16 |         }
17 |     
18 |     @classmethod
19 |     def from_dict(cls, data: Dict) -> 'Grid':
20 |         return cls(
21 |             data=data["data"],
22 |             height=data["height"],
23 |             width=data["width"]
24 |         )
25 |     
26 |     @classmethod
27 |     def from_2d_list(cls, grid: List[List[int]]) -> 'Grid':
28 |         return cls(
29 |             data=grid,
30 |             height=len(grid),
31 |             width=len(grid[0]) if grid else 0
32 |         )
33 | 
34 | 


--------------------------------------------------------------------------------
/config/config_trapfield.yaml:
--------------------------------------------------------------------------------
 1 | # TrapField 数据集生成配置
 2 | 
 3 | # 游戏类型
 4 | game_type: "trapfield"
 5 | 
 6 | # 皮肤文件夹根目录
 7 | skins_root: "skins/trapfield"
 8 | 
 9 | # 输出根目录
10 | output_root: "generated_levels_trapfield"
11 | 
12 | # TrapField 难度配置
13 | difficulties:
14 |   easy:
15 |     grid_size: 5          # 5x5 网格
16 |     trap_density: 0.2     # 20% 陷阱密度
17 |     max_attempts: 50      # 最大生成尝试次数
18 |     count: 120            # 生成数量
19 | 
20 |   medium:
21 |     grid_size: 7         # 7x7 网格
22 |     trap_density: 0.3     # 30% 陷阱密度
23 |     max_attempts: 50
24 |     count: 120
25 | 
26 |   hard:
27 |     grid_size: 11         # 11x11 网格
28 |     trap_density: 0.35    # 35% 陷阱密度
29 |     max_attempts: 50
30 |     count: 120
31 | 
32 | # 生成配置
33 | generation:
34 |   fps: 24  # 固定24fps（连续移动动画）
35 |   max_duplicate_retries: 100
36 | 
37 | # 并行配置
38 | parallel:
39 |   max_workers: 4
40 | 
41 | 


--------------------------------------------------------------------------------
/config/config_3d_maze.yaml:
--------------------------------------------------------------------------------
 1 | # 3D Maze 游戏数据集生成配置
 2 | 
 3 | # 游戏类型
 4 | game_type: "maze3d"
 5 | 
 6 | # 输出根目录
 7 | output_root: "generated_levels_maze3d"
 8 | 
 9 | # 皮肤目录（包含多个皮肤子目录，每个子目录包含 colors.json 和 description.json）
10 | skins_root: "skins/maze3d"
11 | 
12 | # 难度配置
13 | difficulties:
14 |   # 路径查找问题 - 不同难度
15 |   easy:
16 |     qa_type: "path_finding"
17 |     grid_size: [6, 6, 5]  # [宽度, 深度, 高度]
18 |     max_attempts: 100
19 |     count: 120
20 | 
21 |   medium:
22 |     qa_type: "path_finding"
23 |     grid_size: [8, 8, 7]
24 |     max_attempts: 100
25 |     count: 120
26 | 
27 |   hard:
28 |     qa_type: "path_finding"
29 |     grid_size: [10, 10, 9]
30 |     max_attempts: 100
31 |     count: 120
32 | 
33 | 
34 | # 生成配置
35 | generation:
36 |   max_retries: 100
37 |   timeout_seconds: 30
38 |   fps: 24  
39 |   generate_video: true  # 是否生成视频
40 | 
41 | # 并行配置
42 | parallel:
43 |   max_workers: 4
44 | 
45 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # VR-Bench Environment Configuration
 2 | # Copy this file to .env and fill in your values
 3 | 
 4 | # Dataset paths
 5 | DATASET_ROOT=/path/to/dataset_VR
 6 | OUTPUT_ROOT=/path/to/output
 7 | 
 8 | # Model cache
 9 | HF_CACHE_DIR=/path/to/huggingface_model
10 | TRANSFORMERS_CACHE=/path/to/huggingface_model
11 | 
12 | # CUDA configuration
13 | CUDA_HOME=/usr/local/cuda
14 | CUDA_VISIBLE_DEVICES=0,1
15 | 
16 | # API keys (for VLM evaluation)
17 | OPENAI_API_KEY=your_api_key_here
18 | OPENAI_BASE_URL=https://api.openai.com/v1
19 | 
20 | # Image Generation API (for AutoEnv skin generation)
21 | IMAGE_GEN_API_KEY=your_image_gen_api_key_here
22 | IMAGE_GEN_BASE_URL=https://api.openai.com/v1
23 | IMAGE_GEN_MODEL=gemini-2.5-flash-image
24 | 
25 | # Generation settings
26 | MAX_WORKERS=4
27 | DEFAULT_FPS=24
28 | 
29 | # Evaluation settings
30 | EVAL_WORKERS=4
31 | EVAL_NUM_SAMPLES=1000
32 | EVAL_THRESHOLD=0.05
33 | 
34 | 


--------------------------------------------------------------------------------
/games/sokoban/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sokoban game configuration.
 3 | """
 4 | 
 5 | from core.constants import *
 6 | 
 7 | # Required textures for Sokoban
 8 | REQUIRED_TEXTURES = ['floor', 'wall', 'player', 'target', 'box']
 9 | 
10 | # Cell type to layer and texture mapping
11 | # Format: cell_value -> (layer, texture_name)
12 | # Layer 1: floor (handled separately)
13 | # Layer 2: walls and targets
14 | # Layer 3: boxes and players
15 | CELL_LAYER_MAP = {
16 |     EMPTY: (0, None),
17 |     WALL: (2, 'wall'),
18 |     PLAYER: (3, 'player'),
19 |     TARGET: (2, 'target'),
20 |     BOX: (3, 'box'),
21 |     BOX_ON_TARGET: (3, 'box'),      # Box on target: target in layer 2, box in layer 3
22 |     PLAYER_ON_TARGET: (3, 'player')  # Player on target: target in layer 2, player in layer 3
23 | }
24 | 
25 | # Special handling for combined cells
26 | COMBINED_CELLS = {
27 |     BOX_ON_TARGET: ('target', 'box'),      # (layer 2, layer 3)
28 |     PLAYER_ON_TARGET: ('target', 'player')  # (layer 2, layer 3)
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/core/schema/render.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Dict
 3 | 
 4 | 
 5 | @dataclass
 6 | class RenderConfig:
 7 |     cell_size: int
 8 |     image_width: int
 9 |     image_height: int
10 |     
11 |     def to_dict(self) -> Dict[str, int]:
12 |         return {
13 |             "cell_size": self.cell_size,
14 |             "image_width": self.image_width,
15 |             "image_height": self.image_height
16 |         }
17 |     
18 |     @classmethod
19 |     def from_dict(cls, data: Dict[str, int]) -> 'RenderConfig':
20 |         return cls(
21 |             cell_size=data["cell_size"],
22 |             image_width=data["image_width"],
23 |             image_height=data["image_height"]
24 |         )
25 |     
26 |     @classmethod
27 |     def from_grid_size(cls, height: int, width: int, cell_size: int) -> 'RenderConfig':
28 |         return cls(
29 |             cell_size=cell_size,
30 |             image_width=width * cell_size,
31 |             image_height=height * cell_size
32 |         )
33 | 
34 | 


--------------------------------------------------------------------------------
/core/constants.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Unified constants for all games.
 3 | """
 4 | 
 5 | # Cell types - common across all games
 6 | EMPTY = 0
 7 | WALL = 1
 8 | PLAYER = 2
 9 | TARGET = 3
10 | 
11 | # Sokoban-specific cell types (Sokoban uses different values)
12 | SOKOBAN_EMPTY = 0
13 | SOKOBAN_WALL = 1
14 | SOKOBAN_BOX = 2
15 | SOKOBAN_TARGET = 3
16 | SOKOBAN_BOX_ON_TARGET = 4
17 | SOKOBAN_PLAYER = 5
18 | SOKOBAN_PLAYER_ON_TARGET = 6
19 | 
20 | # Legacy Sokoban constants (for backward compatibility with core constants)
21 | BOX = 4
22 | BOX_ON_TARGET = 5
23 | PLAYER_ON_TARGET = 6
24 | 
25 | # Maze-specific aliases (for compatibility)
26 | EMPTY_CELL = EMPTY
27 | WALL_CELL = WALL
28 | PLAYER_CELL = PLAYER
29 | GOAL_CELL = TARGET
30 | 
31 | # Rendering configuration
32 | DEFAULT_CELL_SIZE = 64
33 | SUPPORTED_IMAGE_FORMATS = ('.png', '.jpg', '.jpeg')
34 | 
35 | # Texture names - unified across all games
36 | TEXTURE_NAMES = {
37 |     'floor': 'floor',
38 |     'wall': 'wall',
39 |     'player': 'player',
40 |     'target': 'target',
41 |     'box': 'box'
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/games/maze/utils/file_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from pathlib import Path
 4 | import shutil
 5 | from typing import Iterable, Optional
 6 | 
 7 | 
 8 | def ensure_directory(path: str | Path) -> None:
 9 |     Path(path).mkdir(parents=True, exist_ok=True)
10 | 
11 | 
12 | def clean_directory(path: str | Path) -> None:
13 |     directory = Path(path)
14 |     if not directory.exists():
15 |         return
16 |     for entry in directory.iterdir():
17 |         if entry.is_dir():
18 |             shutil.rmtree(entry)
19 |         else:
20 |             entry.unlink()
21 | 
22 | 
23 | def setup_output_directories(
24 |     output_dir: str,
25 |     images_dir: str,
26 |     states_dir: str,
27 |     video_dir: Optional[str] = None,
28 | ) -> None:
29 |     for folder in (output_dir, images_dir, states_dir):
30 |         ensure_directory(folder)
31 |     clean_directory(images_dir)
32 |     clean_directory(states_dir)
33 | 
34 |     if video_dir is not None:
35 |         ensure_directory(video_dir)
36 |         clean_directory(video_dir)
37 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/game_executor.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import List, Dict, Any, Tuple
 3 | from core.schema import UnifiedState
 4 | 
 5 | 
 6 | class GameExecutor(ABC):
 7 |     @abstractmethod
 8 |     def load_state(self, state_path: str) -> UnifiedState:
 9 |         pass
10 |     
11 |     @abstractmethod
12 |     def get_optimal_solution(self, state: UnifiedState) -> List[Dict[str, Any]]:
13 |         pass
14 |     
15 |     @abstractmethod
16 |     def execute_action(self, state: UnifiedState, action: Dict[str, Any]) -> Tuple[UnifiedState, bool, str]:
17 |         pass
18 |     
19 |     @abstractmethod
20 |     def check_win(self, state: UnifiedState) -> bool:
21 |         pass
22 |     
23 |     @abstractmethod
24 |     def render_state(self, state: UnifiedState, output_path: str) -> None:
25 |         pass
26 |     
27 |     @abstractmethod
28 |     def get_system_prompt(self) -> str:
29 |         pass
30 | 
31 |     @abstractmethod
32 |     def get_user_prompt(self) -> str:
33 |         pass
34 | 
35 |     def get_game_type(self) -> str:
36 |         return 'default'
37 | 
38 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/prompts/trapfield_prompt.py:
--------------------------------------------------------------------------------
 1 | # 占位符格式: {player}, {goal}, {trap}, {floor}
 2 | TRAPFIELD_SYSTEM_PROMPT_TEMPLATE = """You are given an image of a grid-based maze.
 3 | {trap} tiles represent trap zones that must be avoided.
 4 | {floor} tiles represent open paths that can be moved through.
 5 | The {player} represents the starting point of the path.
 6 | The {goal} represents the goal or destination.
 7 | Task:
 8 | Infer the shortest valid path for the {player} to reach the {goal}.
 9 | Movement can only occur between adjacent open tiles — up, down, left, or right.
10 | Diagonal movement is not allowed.
11 | The path must not cross or touch any trap tiles.
12 | Output Format:
13 | Return the full movement sequence of the {player} as a JSON array of directions, where each element is one of "up", "down", "left", or "right".
14 | Do not include any explanations, reasoning, or extra text.
15 | Example of expected output:
16 | {{
17 |   "path": ["left", "left", "down", "down"]
18 | }}
19 | """
20 | 
21 | TRAPFIELD_USER_PROMPT_TEMPLATE = """Infer the shortest valid path for the {player} to reach the {goal}.
22 | """
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 VR-Bench Team
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/AutoEnv/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 FoundationAgents
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/prompts/maze_prompt.py:
--------------------------------------------------------------------------------
 1 | # 占位符格式: {player}, {goal}, {wall}, {floor}
 2 | MAZE_SYSTEM_PROMPT_TEMPLATE = """You are given an image of a grid-based maze.
 3 | {wall} tiles represent walls and cannot be crossed.
 4 | {floor} tiles represent open paths that can be moved through.
 5 | The {player} represents the starting point of the path.
 6 | The {goal} represents the goal or destination.
 7 | 
 8 | 
 9 | Task:
10 | Infer the shortest valid path from the {player} starting point to the {goal} goal.
11 | Movement can only occur between adjacent open tiles — up, down, left, or right.
12 | Diagonal movement is not allowed, and the path must not cross or touch any walls.
13 | 
14 | 
15 | Output Format:
16 | Return the entire movement sequence of the {player} as a JSON array of directions, where each element is one of "up", "down", "left", or "right".
17 | Do not include any explanations or additional text.
18 | 
19 | 
20 | Example of expected output:
21 | {{
22 |   "path": ["up", "up", "left", "down", "right", "right"]
23 | }}
24 | """
25 | 
26 | MAZE_USER_PROMPT_TEMPLATE = """Infer the shortest valid path from the {player} starting point to the {goal} goal.
27 | """
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/games/maze/generators/state_gen.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import sys
 4 | from pathlib import Path
 5 | from typing import Sequence
 6 | 
 7 | sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
 8 | 
 9 | from core.schema import UnifiedState, Grid, Entity, RenderConfig
10 | from .. import constants
11 | from ..utils import maze_utils
12 | 
13 | 
14 | def save_state(maze: Sequence[Sequence[int]], save_path: str) -> None:
15 |     player_pos = maze_utils.find_position(maze, constants.PLAYER_CELL)
16 |     goal_pos = maze_utils.find_position(maze, constants.GOAL_CELL)
17 | 
18 |     height = len(maze)
19 |     width = len(maze[0]) if maze else 0
20 |     cell_size = constants.CELL_SIZE
21 | 
22 |     state = UnifiedState(
23 |         version="1.0",
24 |         game_type="maze",
25 |         grid=Grid.from_2d_list([list(row) for row in maze]),
26 |         player=Entity.from_grid_pos(player_pos[0], player_pos[1], cell_size),
27 |         goal=Entity.from_grid_pos(goal_pos[0], goal_pos[1], cell_size),
28 |         boxes=[],
29 |         render=RenderConfig.from_grid_size(height, width, cell_size),
30 |         metadata={}
31 |     )
32 | 
33 |     state.save(save_path)
34 | 


--------------------------------------------------------------------------------
/scripts/Wan2.2-TI2V-5B_lora.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from PIL import Image
 3 | from diffsynth import save_video
 4 | from diffsynth.pipelines.wan_video_new import WanVideoPipeline, ModelConfig
 5 | from modelscope import dataset_snapshot_download
 6 | 
 7 | pipe = WanVideoPipeline.from_pretrained(
 8 |     torch_dtype=torch.bfloat16,
 9 |     device="cuda",
10 |     model_configs=[
11 |         ModelConfig(model_id="Wan-AI/Wan2.2-TI2V-5B", origin_file_pattern="models_t5_umt5-xxl-enc-bf16.pth", offload_device="cpu"),
12 |         ModelConfig(model_id="Wan-AI/Wan2.2-TI2V-5B", origin_file_pattern="diffusion_pytorch_model*.safetensors", offload_device="cpu"),
13 |         ModelConfig(model_id="Wan-AI/Wan2.2-TI2V-5B", origin_file_pattern="Wan2.2_VAE.pth", offload_device="cpu"),
14 |     ],
15 | )
16 | 
17 | # Load LoRA weights
18 | pipe.load_lora(pipe.dit, f"path/to/your/lora/checkpoint.safetensors", alpha=1)
19 | 
20 | pipe.enable_vram_management()
21 | 
22 | 
23 | input_image = Image.open("path/to/your/input_image.png").resize((512, 512))
24 | video = pipe(
25 |     prompt = """Your prompt here"""
26 | ,
27 |     negative_prompt="",
28 |     seed=0, tiled=True,
29 |     height=512, width=512,
30 |     input_image=input_image,
31 |     num_frames=193,
32 | )
33 | save_video(video, "path/to/your/output_video.mp4", fps=15, quality=5)
34 | 


--------------------------------------------------------------------------------
/config/config_pathfinder.yaml:
--------------------------------------------------------------------------------
 1 | # PathFinder 游戏数据集生成配置
 2 | 
 3 | # 游戏类型
 4 | game_type: "pathfinder"
 5 | 
 6 | # 输出根目录
 7 | output_root: "generated_levels_pathfinder"
 8 | 
 9 | # 皮肤根目录（PathFinder 不需要皮肤，但保留字段以兼容系统）
10 | 
11 | skins_root: "skins/pathfinder"
12 | # 难度配置
13 | # 注意：难度通过多个因素综合区分
14 | # - 图片尺寸：图片越大，视觉范围越大
15 | # - 道路宽度：道路越窄，越难控制
16 | # - 节点密度：节点越密集，路径网络越复杂
17 | # - 支路数量：支路越多，干扰项越多
18 | # - 最短路径：要求的路径越长，难度越高
19 | # 实际配置在 games/pathfinder/constants.py 中的 DIFFICULTY_CONFIG
20 | difficulties:
21 |   easy:
22 |     difficulty: "easy"      # 难度名称
23 |     # 512x512, 30px道路, 稀疏节点(18%), 1条支路, 4节点路径
24 |     max_attempts: 50        # 最大生成尝试次数
25 |     count: 120              # 生成数量
26 | 
27 |   medium:
28 |     difficulty: "medium"
29 |     # 768x768, 22px道路, 中等节点(15%), 3条支路, 6节点路径
30 |     max_attempts: 50
31 |     count: 120
32 | 
33 |   hard:
34 |     difficulty: "hard"
35 |     # 1024x1024, 18px道路, 密集节点(12%), 5条支路, 8节点路径
36 |     max_attempts: 50
37 |     count: 120
38 | 
39 | # 视频生成配置
40 | video:
41 |   fps: 24                    # 视频帧率（会被 constants.py 中的配置覆盖）
42 |   add_grid: false           # 是否添加网格（PathFinder 不适用）
43 | 
44 | # 生成配置
45 | generation:
46 |   max_retries: 100          # 单个关卡最大重试次数
47 |   timeout_seconds: 30       # 单个关卡生成超时时间
48 | 
49 | # 并行配置
50 | parallel:
51 |   max_workers: 12           # 并行工作进程数
52 | 
53 | 


--------------------------------------------------------------------------------
/scripts/start_sglang_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # SGLang 服务启动脚本
 4 | 
 5 | MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct"
 6 | HOST="0.0.0.0"
 7 | PORT=8123
 8 | CACHE_DIR="/data/pengyiran/cvpr_v1/huggingface_model"
 9 | 
10 | # GPU 配置
11 | # 方式 1: 使用单个 GPU
12 | # GPU_IDS="2"
13 | # TP_SIZE=1
14 | 
15 | # 方式 2: 使用多个 GPU (Tensor Parallelism)
16 | GPU_IDS="2,3"
17 | TP_SIZE=2
18 | 
19 | echo "启动 SGLang 服务..."
20 | echo "模型: $MODEL_NAME"
21 | echo "地址: http://$HOST:$PORT"
22 | echo "缓存目录: $CACHE_DIR"
23 | echo "GPU: $GPU_IDS"
24 | 
25 | # 单 GPU 模式
26 | # CUDA_VISIBLE_DEVICES=$GPU_IDS python -m sglang.launch_server \
27 | #     --model-path $MODEL_NAME \
28 | #     --host $HOST \
29 | #     --port $PORT \
30 | #     --cache-dir $CACHE_DIR \
31 | #     --trust-remote-code
32 | 
33 | # 设置 HuggingFace 缓存目录
34 | export HF_HOME=$CACHE_DIR
35 | export TRANSFORMERS_CACHE=$CACHE_DIR
36 | 
37 | # 多 GPU 模式
38 | CUDA_VISIBLE_DEVICES=$GPU_IDS python -m sglang.launch_server \
39 |     --model-path $MODEL_NAME \
40 |     --host $HOST \
41 |     --port $PORT \
42 |     --tp $TP_SIZE \
43 |     --download-dir $CACHE_DIR \
44 |     --trust-remote-code \
45 |     --skip-server-warmup
46 | 
47 | # 其他可选参数：
48 | # --mem-fraction-static 0.9 # GPU 显存使用比例
49 | # --chat-template qwen      # 指定 chat template
50 | # --context-length 8192     # 最大上下文长度
51 | 
52 | 


--------------------------------------------------------------------------------
/scripts/videomodel_evaluate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # 批量评估生成的视频（自动匹配所有难度）
 3 | # 用法: ./scripts/evaluate.sh
 4 | 
 5 | # 设置 CUDA 环境变量（修复 CuPy 编译问题）
 6 | export CUDA_PATH=${CUDA_HOME:-/usr/local/cuda}
 7 | export CPATH=$CUDA_PATH/include:${CPATH}
 8 | export LD_LIBRARY_PATH=$CUDA_PATH/lib64:${LD_LIBRARY_PATH}
 9 | 
10 | NAME=    #maze name
11 | DATASET_DIR=dataset_VR/train/$NAME/1
12 | OUTPUT_DIR=dataset_VR/train/$NAME/1
13 | RESULT_DIR=eval_results/$NAME
14 | WORKERS=4
15 | NUM_SAMPLES=1000
16 | THRESHOLD=0.05
17 | FIDELITY_PIXEL_THRESHOLD=5
18 | FRAME_STEP=1
19 | TRACKER_TYPE=ncc       # 追踪器类型: csrt, ncc, optical_flow
20 | SEARCH_MARGIN=50       # NCC追踪器搜索边距（0=全图搜索，>0=局部搜索范围）
21 | USE_GPU=gpu
22 | 
23 | # 构建Python命令
24 | CMD="python evaluation/videomodel_eval/batch_evaluate.py \
25 |     \"$DATASET_DIR\" \
26 |     \"$OUTPUT_DIR\" \
27 |     \"$RESULT_DIR\" \
28 |     --threshold \"$THRESHOLD\" \
29 |     --num-samples \"$NUM_SAMPLES\" \
30 |     --workers \"$WORKERS\" \
31 |     --fidelity-pixel-threshold \"$FIDELITY_PIXEL_THRESHOLD\" \
32 |     --frame-step \"$FRAME_STEP\" \
33 |     --tracker-type \"$TRACKER_TYPE\" \
34 |     --search-margin \"$SEARCH_MARGIN\""
35 | 
36 | # 如果指定了gpu参数，添加--gpu标志
37 | if [ "$USE_GPU" = "gpu" ] || [ "$USE_GPU" = "GPU" ]; then
38 |     CMD="$CMD --gpu"
39 |     echo "启用GPU加速模式"
40 | fi
41 | 
42 | # 执行命令
43 | eval $CMD
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # VR-Bench + AutoEnv Dependencies
 2 | # Python >= 3.10
 3 | 
 4 | # Core dependencies
 5 | numpy>=1.24.0
 6 | opencv-python>=4.8.0
 7 | pillow>=10.0.0
 8 | pyyaml>=6.0
 9 | imageio>=2.31.0
10 | imageio-ffmpeg>=0.4.9
11 | pydantic>=2.11.0
12 | pydantic-core>=2.33.0
13 | 
14 | # Data processing
15 | scipy>=1.11.0
16 | scikit-image>=0.21.0
17 | 
18 | # Image processing (AutoEnv)
19 | rembg>=2.0.68
20 | onnxruntime-gpu>=1.23.0
21 | 
22 | # LLM API
23 | openai>=1.0.0
24 | litellm>=1.75.0
25 | tiktoken>=0.11.0
26 | 
27 | # Async support
28 | aiohttp>=3.12.0
29 | aiohappyeyeballs>=2.6.0
30 | aiosignal>=1.4.0
31 | frozenlist>=1.7.0
32 | multidict>=6.6.0
33 | yarl>=1.20.0
34 | propcache>=0.3.0
35 | 
36 | # HTTP clients
37 | httpx>=0.28.0
38 | httpcore>=1.0.0
39 | h11>=0.16.0
40 | certifi>=2024.0.0
41 | idna>=3.10
42 | sniffio>=1.3.0
43 | anyio>=4.9.0
44 | requests>=2.32.0
45 | urllib3>=2.5.0
46 | charset-normalizer>=3.4.0
47 | 
48 | # Utilities
49 | python-dotenv>=1.0.0
50 | distro>=1.9.0
51 | tenacity>=9.1.0
52 | tqdm>=4.67.0
53 | packaging>=25.0
54 | typing-extensions>=4.14.0
55 | 
56 | # Optional: GPU acceleration
57 | # cupy-cuda12x>=12.0.0  # For CUDA 12.x
58 | # cupy-cuda11x>=11.0.0  # For CUDA 11.x
59 | 
60 | # Development dependencies (optional)
61 | # pytest>=7.4.0
62 | # black>=23.0.0
63 | # flake8>=6.0.0
64 | # mypy>=1.5.0
65 | # pylint>=2.17.0
66 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/prompts/sokoban_prompt.py:
--------------------------------------------------------------------------------
 1 | # 占位符格式: {player}, {goal}, {box}, {wall}, {floor}
 2 | SOKOBAN_SYSTEM_PROMPT_TEMPLATE = """You are given an image of a grid-based Sokoban puzzle.
 3 | {wall} tiles represent walls and cannot be crossed.
 4 | {floor} tiles represent open floor tiles that can be moved through.
 5 | The {player} represents the player or agent.
 6 | The {box} represents the box that needs to be pushed.
 7 | The {goal} represents the goal destination for the box.
 8 | Task:
 9 | Infer the complete movement sequence required for the {player} to push the {box} onto the {goal} goal.
10 | The {player} moves in four directions: up, down, left, right.
11 | When the {player} moves into a box, it automatically pushes the box if there is space behind it.
12 | The box and the {player} cannot cross or overlap any walls.
13 | Diagonal movement is not allowed, and the camera remains fixed from a top-down view.
14 | Output Format:
15 | Return the entire movement sequence as a JSON array of directional actions, where each element is one of "up", "down", "left", or "right".
16 | Do not include any explanations or additional text.
17 | Example of expected output:
18 | {{
19 |   "actions": ["right", "right", "down", "left", "down"]
20 | }}
21 | """
22 | 
23 | SOKOBAN_USER_PROMPT_TEMPLATE = """Infer the complete movement sequence required for the {player} to push the {box} onto the {goal} goal.
24 | """
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/games/maze/test_skin.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 测试 pymaze 换皮肤功能
 3 | 
 4 | 使用方法:
 5 |     python -m pymaze.test_skin                    # 使用默认皮肤
 6 |     python -m pymaze.test_skin custom_assets      # 使用自定义皮肤
 7 | """
 8 | 
 9 | import sys
10 | from pathlib import Path
11 | 
12 | try:
13 |     from . import constants
14 |     from .generators import data_gen
15 |     from .utils import file_utils
16 | except ImportError:
17 |     import constants
18 |     from generators import data_gen
19 |     from utils import file_utils
20 | 
21 | 
22 | def main():
23 |     assets_folder = sys.argv[1] if len(sys.argv) > 1 else None
24 |     
25 |     if assets_folder:
26 |         print(f"Using custom skin: {assets_folder}")
27 |     else:
28 |         print("Using default skin")
29 |     
30 |     output_dir = Path("test_maze_output")
31 |     images_dir = output_dir / "images"
32 |     states_dir = output_dir / "states"
33 |     video_dir = output_dir / "videos"
34 |     
35 |     file_utils.setup_output_directories(
36 |         str(output_dir), str(images_dir), str(states_dir), str(video_dir)
37 |     )
38 |     
39 |     print("Generating 1 test maze (9x9)...")
40 |     data_gen.generate_data(
41 |         0, 1, 9, str(images_dir), str(states_dir), str(video_dir),
42 |         assets_folder=assets_folder
43 |     )
44 |     
45 |     print(f"Done! Check output in: {output_dir}")
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     main()
50 | 
51 | 


--------------------------------------------------------------------------------
/games/maze/templates/turn_count.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import random
 4 | from typing import List
 5 | 
 6 | from ..utils import maze_utils
 7 | from .base_template import BaseTemplate
 8 | 
 9 | 
10 | class TurnCount(BaseTemplate):
11 |     def __init__(self, maze: List[List[int]], image_id: int) -> None:
12 |         super().__init__(maze, image_id)
13 | 
14 |         self.question_id = 4
15 |         self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}"
16 |         self.qa_type = "TransitionPath"
17 |         self.question_description = "Count how many turns it takes to reach the finish."
18 |         self.qa_level = "Hard"
19 |         self.question += (
20 |             "Find the path to the finish and count the number of turns it takes to get there. "
21 |             "You only need to provide one number."
22 |         )
23 | 
24 |         solver_rng = random.Random(image_id)
25 |         path_info: List[str] = []
26 |         path = maze_utils.dfs_solve_maze(maze, path_info, rng=solver_rng)
27 |         turn_info: List[str] = []
28 |         turns = maze_utils.count_turns(path, turn_info)
29 | 
30 |         self.answer = str(turns)
31 |         self.options = None
32 | 
33 |         self.analysis = "First," + "".join(path_info)
34 |         self.analysis += f"Therefore, the path is: {maze_utils.path_to_string(path)}\n\nThen,"
35 |         self.analysis += "".join(turn_info)
36 |         self.analysis += f"\nIn summary, the total number of turns is {turns}"
37 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/prompts/pathfinder_prompt.py:
--------------------------------------------------------------------------------
 1 | # 占位符格式: {start}, {end}, {road}
 2 | PATHFINDER_SYSTEM_PROMPT_TEMPLATE = """You are given an image of a pathfinding puzzle.
 3 | The image shows a network of curved paths connecting various waypoints.
 4 | Each waypoint (intersection or junction) is labeled with a letter or letter combination (A, B, C, ..., Z, AA, AB, etc.).
 5 | The {start} represents the starting point.
 6 | The {end} represents the goal or destination.
 7 | 
 8 | Task:
 9 | Find the shortest valid path from the {start} starting point to the {end} goal.
10 | The path must follow the visible roads/paths in the image.
11 | You can only move along the connected paths shown in the image.
12 | 
13 | Output Format:
14 | You MUST return a JSON object with a "path" field containing an array of waypoint labels.
15 | The array should start with the label closest to the starting point and end with the label closest to the goal.
16 | Do not include any explanations or additional text.
17 | 
18 | Required format:
19 | {{
20 |   "path": ["A", "B", "C", "D", "E"]
21 | }}
22 | 
23 | For puzzles with more than 26 waypoints, labels may be multi-character (e.g., "AA", "AB"):
24 | {{
25 |   "path": ["A", "Z", "AA", "AB"]
26 | }}
27 | 
28 | Important: The "path" field MUST be an array of strings, not a single string.
29 | """
30 | 
31 | PATHFINDER_USER_PROMPT_TEMPLATE = """Find the shortest path from the {start} starting point to the {end} goal by following the labeled waypoints.
32 | """
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Python
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | *.so
  6 | .Python
  7 | build/
  8 | develop-eggs/
  9 | dist/
 10 | downloads/
 11 | eggs/
 12 | .eggs/
 13 | lib/
 14 | lib64/
 15 | parts/
 16 | sdist/
 17 | var/
 18 | wheels/
 19 | pip-wheel-metadata/
 20 | share/python-wheels/
 21 | *.egg-info/
 22 | .installed.cfg
 23 | *.egg
 24 | MANIFEST
 25 | bak_dataset/
 26 | dataset/
 27 | vlm_eval_results/
 28 | huggingface_model/
 29 | bak/
 30 | # Virtual Environment
 31 | venv/
 32 | ENV/
 33 | env/
 34 | .venv
 35 | .env
 36 | # IDE
 37 | .vscode/
 38 | .idea/
 39 | *.swp
 40 | *.swo
 41 | *~
 42 | .DS_Store
 43 | 
 44 | # Jupyter Notebook
 45 | .ipynb_checkpoints
 46 | *.ipynb
 47 | 
 48 | # Testing
 49 | .pytest_cache/
 50 | .coverage
 51 | htmlcov/
 52 | .tox/
 53 | .nox/
 54 | 
 55 | # Logs
 56 | *.log
 57 | logs/
 58 | *.out
 59 | 
 60 | # Dataset and Generated Files
 61 | dataset/
 62 | dataset_*/
 63 | bak_dataset/
 64 | debug/
 65 | output/
 66 | eval_results/
 67 | *.mp4
 68 | *.avi
 69 | *.gif
 70 | *.png
 71 | *.jpg
 72 | *.jpeg
 73 | 
 74 | # But include skins folder
 75 | !skins/
 76 | !skins/**
 77 | 
 78 | # Temporary Files
 79 | tmp/
 80 | temp/
 81 | *.tmp
 82 | *.bak
 83 | *.swp
 84 | 
 85 | # Model Checkpoints
 86 | checkpoints/
 87 | *.pth
 88 | *.pt
 89 | *.ckpt
 90 | *.h5
 91 | 
 92 | # Cache
 93 | .cache/
 94 | *.cache
 95 | 
 96 | # OS
 97 | Thumbs.db
 98 | .DS_Store
 99 | 
100 | #dataset
101 | dataset_VR
102 | dataset_output/
103 | 
104 | # AutoEnv workspace (generated assets and costs)
105 | AutoEnv/workspace/
106 | 


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
 1 | # 统一数据集生成配置
 2 | # 支持多种游戏类型：sokoban, maze, pathfinder, trapfield
 3 | 
 4 | # 游戏类型 (sokoban, maze, pathfinder, trapfield)
 5 | game_type: "sokoban"
 6 | 
 7 | # 皮肤文件夹根目录
 8 | skins_root: "skins/sokoban/new"
 9 | 
10 | # 输出根目录
11 | output_root: "generated_levels"
12 | 
13 | # 难度配置
14 | # Sokoban 难度配置
15 | difficulties:
16 |   easy:
17 |     board_size: 5
18 |     num_boxes: 1
19 |     count: 120
20 | 
21 |   medium:
22 |     board_size: 8
23 |     num_boxes: 1
24 |     count: 120
25 | 
26 |   hard:
27 |     board_size: 12
28 |     num_boxes: 1
29 |     count: 120
30 | 
31 | # Maze 难度配置（切换 game_type 为 maze 时使用）
32 | # game_type: "maze"
33 | # skins_root: "skins/maze"
34 | # difficulties:
35 | #   small:
36 | #     maze_size: 9
37 | #     count: 120
38 | #
39 | #   medium:
40 | #     maze_size: 11
41 | #     count: 120
42 | #
43 | #   large:
44 | #     maze_size: 13
45 | #     count: 120
46 | 
47 | # PathFinder 难度配置（切换 game_type 为 pathfinder 时使用）
48 | # game_type: "pathfinder"
49 | # skins_root: "skins/pathfinder"
50 | # difficulties:
51 | #   easy:
52 | #     difficulty: "easy"
53 | #     image_size: 1024
54 | #     count: 120
55 | #
56 | #   medium:
57 | #     difficulty: "medium"
58 | #     image_size: 1024
59 | #     count: 120
60 | #
61 | #   hard:
62 | #     difficulty: "hard"
63 | #     image_size: 1024
64 | #     count: 120
65 | 
66 | # 生成配置
67 | generation:
68 |   check_solvable: true
69 |   max_attempts: 50
70 |   fps: 24  # 固定24fps（连续移动动画）
71 |   add_grid: false
72 |   max_duplicate_retries: 100
73 | 
74 | # 并行配置
75 | parallel:
76 |   max_workers: 4
77 | 
78 | 


--------------------------------------------------------------------------------
/AutoEnv/base/pipeline/base_node.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import uuid
 4 | from abc import ABC, abstractmethod
 5 | 
 6 | from pydantic import BaseModel, Field
 7 | 
 8 | 
 9 | def _generate_node_id() -> str:
10 |     return uuid.uuid4().hex[:8]
11 | 
12 | 
13 | class NodeContext(BaseModel):
14 |     """Base context for node execution. Subclasses define specific fields."""
15 | 
16 |     model_config = {"arbitrary_types_allowed": True, "extra": "allow"}
17 | 
18 | 
19 | class BaseNode(BaseModel, ABC):
20 |     """Abstract base class for DAG nodes."""
21 | 
22 |     node_id: str = Field(default_factory=_generate_node_id)
23 |     successors: list["BaseNode"] = Field(default_factory=list)
24 |     predecessors: list["BaseNode"] = Field(default_factory=list)
25 | 
26 |     model_config = {"arbitrary_types_allowed": True}
27 | 
28 |     def add(self, nodes: BaseNode | list[BaseNode]) -> BaseNode | list[BaseNode]:
29 |         """Add successor node(s)."""
30 |         node_list = [nodes] if isinstance(nodes, BaseNode) else nodes
31 |         for node in node_list:
32 |             if node not in self.successors:
33 |                 self.successors.append(node)
34 |             if self not in node.predecessors:
35 |                 node.predecessors.append(self)
36 |         return nodes
37 | 
38 |     def __rshift__(self, other: BaseNode | list[BaseNode]) -> BaseNode | list[BaseNode]:
39 |         """Syntactic sugar for a >> b."""
40 |         return self.add(other)
41 | 
42 |     @abstractmethod
43 |     async def execute(self, ctx: NodeContext) -> None:
44 |         """Execute node logic. Read inputs from ctx and write outputs to ctx."""
45 | 


--------------------------------------------------------------------------------
/prompts/videomodel_pathfinder_prompt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Pathfinder (irregular_maze) 游戏的视频模型 prompt 模板。
 4 | 
 5 | 占位符: {start}, {end}, {road}
 6 | 从 description.json 的 visual_description 中读取。
 7 | """
 8 | from string import Template
 9 | 
10 | PATHFINDER_PROMPT_TEMPLATE = Template("""Create a 2D animation based on the provided image of a maze. The $start slides smoothly along the $road path, stopping perfectly on the $end. The $start never slides or crosses into the black areas of the maze. The camera is a static, top-down view showing the entire maze.
11 | 
12 | Maze:
13 |  The maze paths are $road, the walls are black.
14 |  The $start moves to the goal position, represented by $end.
15 |  The $start slides smoothly along the $road path.
16 |  The $start never slides or crosses into the black areas of the maze.
17 |  The $start stops perfectly on the $end.
18 | 
19 | Scene:
20 |  No change in scene composition.
21 |  No change in the layout of the maze.
22 |  The $start travels along the $road path without speeding up or slowing down.
23 | 
24 | Camera:
25 |  Static camera.
26 |  No zoom.
27 |  No pan.
28 |  No glitches, noise, or artifacts.""")
29 | 
30 | 
31 | def get_pathfinder_prompt(visual_description: dict) -> str:
32 |     """
33 |     生成 pathfinder 游戏的动态 prompt。
34 |     
35 |     Args:
36 |         visual_description: 来自 description.json 的 visual_description 字段
37 |             - start: 起点描述 (如 "green circle")
38 |             - end: 终点描述 (如 "red circle")
39 |             - road: 道路描述 (如 "white square")
40 |     """
41 |     return PATHFINDER_PROMPT_TEMPLATE.substitute(
42 |         start=visual_description.get("start", "green circle"),
43 |         end=visual_description.get("end", "red circle"),
44 |         road=visual_description.get("road", "white path"),
45 |     )
46 | 
47 | 


--------------------------------------------------------------------------------
/games/maze/templates/player_position.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import List, Set
 4 | 
 5 | from .. import constants
 6 | from ..utils import maze_utils
 7 | from .base_template import BaseTemplate
 8 | 
 9 | 
10 | class PlayerPosition(BaseTemplate):
11 |     def __init__(self, maze: List[List[int]], image_id: int) -> None:
12 |         super().__init__(maze, image_id)
13 | 
14 |         self.qa_type = "StateInfo"
15 |         self.question_id = 1
16 |         self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}"
17 |         self.question_description = "Ask for the position of player."
18 |         self.qa_level = "Easy"
19 |         self.question += "Which of the following are the coordinates of the player?\n\n**Options:**"
20 | 
21 |         row, col = maze_utils.find_position(maze, constants.PLAYER_CELL)
22 |         answer_str = f"({row}, {col})"
23 | 
24 |         choices: Set[str] = {
25 |             answer_str,
26 |             f"({row + 1}, {col})",
27 |             f"({row - 1}, {col})",
28 |             f"({row}, {col + 1})",
29 |             f"({row}, {col - 1})",
30 |         }
31 | 
32 |         option_list = sorted(choices)
33 |         self.options = []
34 |         label_code = ord("A")
35 |         for entry in option_list:
36 |             label = chr(label_code)
37 |             self.options.append(f"{label}. {entry}")
38 |             if entry == answer_str:
39 |                 self.answer = label
40 |             label_code += 1
41 | 
42 |         for option in self.options:
43 |             self.question += f"\n{option}"
44 | 
45 |         self.analysis = (
46 |             "Take a look at the game screen, the red circle represents the player.\n"
47 |             f"The coordinates of player are {answer_str}, so the right option is {self.answer}"
48 |         )
49 | 


--------------------------------------------------------------------------------
/games/maze/templates/goal_position.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import List, Set
 4 | 
 5 | from .. import constants
 6 | from ..utils import maze_utils
 7 | from .base_template import BaseTemplate
 8 | 
 9 | 
10 | class GoalPosition(BaseTemplate):
11 |     def __init__(self, maze: List[List[int]], image_id: int) -> None:
12 |         super().__init__(maze, image_id)
13 | 
14 |         self.qa_type = "StateInfo"
15 |         self.question_id = 2
16 |         self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}"
17 |         self.question_description = "Ask for the position of goal within the maze."
18 |         self.qa_level = "Easy"
19 |         self.question += "Which of the following are the coordinates of the goal?\n\n**Options:**"
20 | 
21 |         row, col = maze_utils.find_position(maze, constants.GOAL_CELL)
22 |         answer_str = f"({row}, {col})"
23 | 
24 |         choices: Set[str] = {
25 |             answer_str,
26 |             f"({row + 1}, {col})",
27 |             f"({row - 1}, {col})",
28 |             f"({row}, {col + 1})",
29 |             f"({row}, {col - 1})",
30 |         }
31 | 
32 |         option_list = sorted(choices)
33 |         self.options = []
34 |         label_code = ord("A")
35 |         for entry in option_list:
36 |             label = chr(label_code)
37 |             self.options.append(f"{label}. {entry}")
38 |             if entry == answer_str:
39 |                 self.answer = label
40 |             label_code += 1
41 | 
42 |         for option in self.options:
43 |             self.question += f"\n{option}"
44 | 
45 |         self.analysis = (
46 |             "Take a look at the game screen, the green block represents the goal.\n"
47 |             f"The coordinates of goal are {answer_str}, so the right option is {self.answer}"
48 |         )
49 | 


--------------------------------------------------------------------------------
/games/pathfinder/constants.py:
--------------------------------------------------------------------------------
 1 | """Constants for PathFinder game."""
 2 | 
 3 | # 难度配置 - 通过图片尺寸、道路宽度、节点密度、支路数量区分难度
 4 | DIFFICULTY_CONFIG = {
 5 |     'easy': {
 6 |         'image_size': 1024,        # 小图
 7 |         'road_width': 60,         # 宽道路
 8 |         'node_spacing_ratio': 0.3,  # 节点间距占图片尺寸的比例（18% = 稀疏）
 9 |         'extra_paths': 1,         # 额外支路数量（少）
10 |         'min_solution_nodes': 4,  # 解决方案最少节点数（短路径）
11 |         'connectivity_ratio': 0.15,  # 连通率（0-1）：0.15 = 稀疏道路，看起来像真实道路
12 |     },
13 |     'medium': {
14 |         'image_size': 1024,        # 小图
15 |         'road_width': 50,         # 宽道路
16 |         'node_spacing_ratio': 0.2,  # 节点间距占图片尺寸的比例（18% = 稀疏）
17 |         'extra_paths': 2,         # 额外支路数量（少）
18 |         'min_solution_nodes': 6,  # 解决方案最少节点数（短路径）
19 |         'connectivity_ratio': 0.2,  # 连通率：0.2 = 中等密度
20 |     },
21 |     'hard': {
22 |         'image_size': 1024,       # 大图
23 |         'road_width': 36,         # 窄道路
24 |         'node_spacing_ratio': 0.15,  # 节点间距占图片尺寸的比例（12% = 密集）
25 |         'extra_paths': 3,         # 额外支路数量（多）
26 |         'min_solution_nodes': 7,  # 解决方案最少节点数（长路径）
27 |         'connectivity_ratio': 0.25,  # 连通率：0.25 = 较密集（但仍然像道路）
28 |     }
29 | }
30 | 
31 | # 渲染配置
32 | DEFAULT_IMAGE_SIZE = 500   # 默认图片尺寸（如果不指定难度）
33 | ROAD_WIDTH = 35            # 道路宽度（更细）
34 | NODE_RADIUS = 20           # 起点/终点半径
35 | START_COLOR = (255, 0, 0)    # 起点颜色（红色）
36 | END_COLOR = (0, 255, 0)      # 终点颜色（绿色）
37 | ROAD_COLOR = (255, 255, 255) # 道路颜色（白色）
38 | BG_COLOR = (0, 0, 0)         # 背景颜色（黑色）
39 | 
40 | # 曲线配置
41 | CURVE_SEGMENTS = 400       # 曲线分段数（更平滑）
42 | CURVE_CONTROL_POINTS = 3   # 每条曲线的控制点数量
43 | CURVE_BEND_FACTOR = 0.25   # 曲线弯曲程度
44 | 
45 | # 边界留白
46 | MARGIN = 80
47 | 
48 | # 视频配置
49 | FRAMES_PER_SECOND = 24     # 帧率（与其他游戏保持一致）
50 | MOVEMENT_SPEED = 1.0       # 移动速度（像素/帧）
51 | 
52 | 


--------------------------------------------------------------------------------
/AutoEnv/autoenv/pipeline/visual/prompt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Maze Mode Skin Generation Prompts
 3 | """
 4 | 
 5 | # Style consistency prompt for image-to-image generation
 6 | STYLE_CONSISTENT_PROMPT = """Above is the style reference image. Generate a new asset matching this exact visual style.
 7 | 
 8 | {base_prompt}
 9 | 
10 | CRITICAL REQUIREMENTS:
11 | 1. Match the art style, color palette, and rendering technique of the reference image
12 | 2. The new asset MUST look like it comes from the SAME GAME as the reference
13 | 3. PIXEL ART STYLE: Use retro pixel art aesthetic with appropriate detail:
14 |    - Moderate pixel granularity (16x16 to 32x32 pixel level)
15 |    - Include texture details, shading, and depth layers
16 |    - Clear pixel borders and defined edges
17 |    - Vintage game visual style with appropriate level of detail
18 |    - NOT overly simplified - maintain texture richness
19 | 4. STRONG VISUAL DISTINCTION: This asset must be HIGHLY DISTINGUISHABLE from other game elements:
20 |    - Use CONTRASTING colors (different hue, saturation, or brightness)
21 |    - Use DISTINCT shapes and visual patterns
22 |    - Ensure HIGH CONTRAST and CLEAR VISUAL IDENTITY
23 |    - Make it instantly recognizable at a glance
24 | 5. WALL TILE REQUIREMENTS (for wall assets only):
25 |    - Wall tiles MUST be COMPLETELY FILLED squares with NO empty or transparent areas
26 |    - Wall MUST cover the ENTIRE tile area from edge to edge
27 |    - NO irregular shapes, peaks, or protrusions extending beyond the square boundary
28 |    - NO gaps, holes, or partial coverage in wall tiles
29 |    - Wall and floor MUST have DISTINCTLY DIFFERENT visual appearance (different colors, textures, or patterns)
30 |    - Wall should be clearly recognizable as an impassable barrier
31 | 6. Balance: Maintain thematic coherence with the reference while ensuring strong visual differentiation and pixel art aesthetics
32 | """


--------------------------------------------------------------------------------
/prompts/videomodel_maze_prompt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Maze 游戏的视频模型 prompt 模板。
 4 | 
 5 | 占位符: {player}, {goal}, {wall}, {floor}
 6 | 从 description.json 的 visual_description 中读取。
 7 | """
 8 | from string import Template
 9 | 
10 | MAZE_PROMPT_TEMPLATE = Template("""Create a 2D animation based on the provided image of a maze. The $player slides smoothly along the $floor path, stopping perfectly on the $goal. The $player never slides or crosses into the $wall areas of the maze. The camera is a static, top-down view showing the entire maze.
11 | 
12 | Maze:
13 |  The maze paths are $floor, the walls are $wall.
14 |  The $player moves to the goal position, represented by $goal.
15 |  The $player slides smoothly along the $floor path.
16 |  The $player never slides or crosses into the $wall areas of the maze.
17 |  The $player stops perfectly on the $goal.
18 | 
19 | Scene:
20 |  No change in scene composition.
21 |  No change in the layout of the maze.
22 |  The $player travels along the $floor path without speeding up or slowing down.
23 | 
24 | Camera:
25 |  Static camera.
26 |  No zoom.
27 |  No pan.
28 |  No glitches, noise, or artifacts.""")
29 | 
30 | 
31 | def get_maze_prompt(visual_description: dict) -> str:
32 |     """
33 |     生成 maze 游戏的动态 prompt。
34 |     
35 |     Args:
36 |         visual_description: 来自 description.json 的 visual_description 字段
37 |             - player: 玩家描述 (如 "red circle")
38 |             - goal: 目标描述 (如 "green square")
39 |             - wall: 墙壁描述 (如 "light blue square")
40 |             - floor: 地板描述 (如 "white square")
41 |     """
42 |     return MAZE_PROMPT_TEMPLATE.substitute(
43 |         player=visual_description.get("player", "red circle"),
44 |         goal=visual_description.get("goal", "green square"),
45 |         wall=visual_description.get("wall", "blue"),
46 |         floor=visual_description.get("floor", "white"),
47 |     )
48 | 
49 | 


--------------------------------------------------------------------------------
/core/schema/entity.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Tuple, Dict, Optional
 3 | from .position import Position, BBox
 4 | 
 5 | 
 6 | @dataclass
 7 | class Entity:
 8 |     pixel_pos: Tuple[int, int]
 9 |     bbox: BBox
10 |     grid_pos: Optional[Position] = None
11 | 
12 |     def to_dict(self) -> Dict:
13 |         result = {
14 |             "pixel_pos": {"x": self.pixel_pos[0], "y": self.pixel_pos[1]},
15 |             "bbox": self.bbox.to_dict()
16 |         }
17 | 
18 |         if self.grid_pos is not None:
19 |             result["grid_pos"] = self.grid_pos.to_dict()
20 | 
21 |         return result
22 | 
23 |     @classmethod
24 |     def from_dict(cls, data: Dict) -> 'Entity':
25 |         pixel_data = data["pixel_pos"]
26 |         grid_pos = Position.from_dict(data["grid_pos"]) if "grid_pos" in data else None
27 | 
28 |         return cls(
29 |             pixel_pos=(pixel_data["x"], pixel_data["y"]),
30 |             bbox=BBox.from_dict(data["bbox"]),
31 |             grid_pos=grid_pos
32 |         )
33 | 
34 |     @classmethod
35 |     def from_grid_pos(cls, row: int, col: int, cell_size: int) -> 'Entity':
36 |         pixel_x = col * cell_size + cell_size // 2
37 |         pixel_y = row * cell_size + cell_size // 2
38 | 
39 |         return cls(
40 |             pixel_pos=(pixel_x, pixel_y),
41 |             bbox=BBox.from_grid_pos(row, col, cell_size),
42 |             grid_pos=Position(row=row, col=col)
43 |         )
44 | 
45 |     @classmethod
46 |     def from_pixel_pos(cls, x: int, y: int, bbox_size: int) -> 'Entity':
47 |         """从像素坐标创建 Entity（用于非网格游戏）"""
48 |         return cls(
49 |             pixel_pos=(x, y),
50 |             bbox=BBox(
51 |                 x=x - bbox_size // 2,
52 |                 y=y - bbox_size // 2,
53 |                 width=bbox_size,
54 |                 height=bbox_size
55 |             ),
56 |             grid_pos=None
57 |         )
58 | 
59 | 


--------------------------------------------------------------------------------
/prompts/videomodel_trapfield_prompt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Trapfield 游戏的视频模型 prompt 模板。
 4 | 
 5 | 占位符: {player}, {goal}, {trap}, {floor}
 6 | 从 description.json 的 visual_description 中读取。
 7 | """
 8 | from string import Template
 9 | 
10 | TRAPFIELD_PROMPT_TEMPLATE = Template("""Create a 2D animation based on the provided image of a maze. The $player slides smoothly along the $floor path, stopping perfectly on the $goal. The $player never slides into or crosses the $trap (trap areas). The camera is a static, top-down view showing the entire maze.
11 | 
12 | Maze:
13 |  The maze paths are $floor, and the trap areas are $trap.
14 |  The $player moves to the goal position, represented by the $goal.
15 |  The $player slides smoothly along the $floor path.
16 |  The $player never slides into or crosses the $trap of the maze.
17 |  The $player stops perfectly on the $goal.
18 | 
19 | Scene:
20 |  No change in scene composition.
21 |  No change in the layout of the maze.
22 |  The $player travels along the $floor path without speeding up or slowing down.
23 | 
24 | Camera:
25 |  Static camera.
26 |  No zoom.
27 |  No pan.
28 |  No glitches, noise, or artifacts.""")
29 | 
30 | 
31 | def get_trapfield_prompt(visual_description: dict) -> str:
32 |     """
33 |     生成 trapfield 游戏的动态 prompt。
34 |     
35 |     Args:
36 |         visual_description: 来自 description.json 的 visual_description 字段
37 |             - player: 玩家描述 (如 "blue circle")
38 |             - goal: 目标描述 (如 "green circle")
39 |             - trap: 陷阱描述 (如 "red x")
40 |             - floor: 地板描述 (如 "white square")
41 |     """
42 |     return TRAPFIELD_PROMPT_TEMPLATE.substitute(
43 |         player=visual_description.get("player", "blue circle"),
44 |         goal=visual_description.get("goal", "green circle"),
45 |         trap=visual_description.get("trap", "red cross"),
46 |         floor=visual_description.get("floor", "gray path"),
47 |     )
48 | 
49 | 


--------------------------------------------------------------------------------
/AutoEnv/base/pipeline/base_pipeline.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import asyncio
 4 | 
 5 | from pydantic import BaseModel, Field
 6 | 
 7 | from base.pipeline.base_node import BaseNode, NodeContext
 8 | 
 9 | 
10 | class BasePipeline(BaseModel):
11 |     """DAG Pipeline. Executes nodes in parallel by level, sharing NodeContext."""
12 | 
13 |     root: BaseNode = Field(...)
14 | 
15 |     model_config = {"arbitrary_types_allowed": True}
16 | 
17 |     def _collect_nodes(self) -> list[BaseNode]:
18 |         """Collect all nodes from root via DFS."""
19 |         visited: set[str] = set()
20 |         nodes: list[BaseNode] = []
21 | 
22 |         def dfs(node: BaseNode) -> None:
23 |             if node.node_id in visited:
24 |                 return
25 |             visited.add(node.node_id)
26 |             nodes.append(node)
27 |             for s in node.successors:
28 |                 dfs(s)
29 | 
30 |         dfs(self.root)
31 |         return nodes
32 | 
33 |     async def run(self, ctx: NodeContext | None = None) -> NodeContext:
34 |         """Execute nodes in parallel by level. All nodes share ctx."""
35 |         if ctx is None:
36 |             ctx = NodeContext()
37 | 
38 |         nodes = self._collect_nodes()
39 |         node_map = {n.node_id: n for n in nodes}
40 |         in_degree = {n.node_id: len(n.predecessors) for n in nodes}
41 |         executed: set[str] = set()
42 | 
43 |         while len(executed) < len(nodes):
44 |             ready = [nid for nid, deg in in_degree.items() if deg == 0 and nid not in executed]
45 |             if not ready:
46 |                 raise ValueError("Cycle detected in DAG")
47 | 
48 |             await asyncio.gather(*[node_map[nid].execute(ctx) for nid in ready])
49 | 
50 |             for node_id in ready:
51 |                 executed.add(node_id)
52 |                 for s in node_map[node_id].successors:
53 |                     in_degree[s.node_id] -= 1
54 | 
55 |         return ctx
56 | 


--------------------------------------------------------------------------------
/prompts/videomodel_sokoban_prompt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Sokoban 游戏的视频模型 prompt 模板。
 4 | 
 5 | 占位符: {player}, {box}, {goal}, {wall}, {floor}
 6 | 从 description.json 的 visual_description 中读取。
 7 | """
 8 | from string import Template
 9 | 
10 | SOKOBAN_PROMPT_TEMPLATE = Template("""Create a 2D animation based on the provided image of a grid puzzle.
11 | The $player moves into position behind the $box and smoothly pushes it toward the $goal.
12 | The $box only slides when pushed from behind by the $player and moves in a straight line along the $floor tiles.
13 | When the direction of the $box's movement needs to change, the $player must reposition itself to a new side of the $box.
14 | The $box never crosses or overlaps any $wall.
15 | 
16 | Gameplay Rules:
17 | The floor area is $floor, and the walls are $wall.
18 | The $box can only move when pushed by the $player from behind.
19 | The $player cannot pull the $box or move through walls.
20 | The $box slides smoothly in one direction until it reaches the $goal.
21 | The animation stops perfectly when the $box aligns with the $goal.
22 | 
23 | Scene:
24 | No change in grid layout or tile design.
25 | The camera remains static, showing the entire play area.
26 | The movement is smooth, with no speed variation, camera shake, or visual artifacts.""")
27 | 
28 | 
29 | def get_sokoban_prompt(visual_description: dict) -> str:
30 |     """
31 |     生成 sokoban 游戏的动态 prompt。
32 |     
33 |     Args:
34 |         visual_description: 来自 description.json 的 visual_description 字段
35 |             - player: 玩家描述 (如 "blue circle")
36 |             - box: 箱子描述 (如 "yellow square")
37 |             - goal: 目标描述 (如 "pink square")
38 |             - wall: 墙壁描述 (如 "gray square")
39 |             - floor: 地板描述 (如 "white square")
40 |     """
41 |     return SOKOBAN_PROMPT_TEMPLATE.substitute(
42 |         player=visual_description.get("player", "blue ball"),
43 |         box=visual_description.get("box", "yellow square"),
44 |         goal=visual_description.get("goal", "red square"),
45 |         wall=visual_description.get("wall", "gray wall"),
46 |         floor=visual_description.get("floor", "white floor"),
47 |     )
48 | 
49 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/prompts/maze3d_prompt.py:
--------------------------------------------------------------------------------
 1 | # 动态模板 - 支持从皮肤 description.json 替换占位符
 2 | # 占位符: {start_cube}, {goal_cube}, {default_cube}, {ball}
 3 | MAZE3D_SYSTEM_PROMPT_TEMPLATE = """You are given an image of a 3D maze composed of \
 4 | {default_cube}s that represent walkable platforms suspended in space.
 5 | Each cube represents a solid tile that the ball can stand on or move across.
 6 | The {ball} represents the starting point.
 7 | The {goal_cube} represents the goal or destination.
 8 | The {start_cube} represents the initial platform where the ball begins.
 9 | 
10 | Task:
11 | Infer the shortest valid 3D path for the {ball} to move from its \
12 | starting position to the {goal_cube}.
13 | 
14 | Movement Rules:
15 | - Horizontal movements (forward_left, forward_right, backward_left, backward_right): \
16 | Each move spans 2 grid units horizontally.
17 | - Vertical movements (up, down): Each move spans 3 grid units vertically via a ladder. \
18 | The ladder must be present at the starting position.
19 | - The sphere cannot move through empty space or overlap any cube structure.
20 | - All movements must follow valid cube surfaces and ladder connections.
21 | 
22 | The six valid directions of movement are:
23 | "forward_left" – move diagonally forward and to the left (2 units) within the same layer
24 | "forward_right" – move diagonally forward and to the right (2 units) within the same layer
25 | "backward_left" – move diagonally backward and to the left (2 units) within the same layer
26 | "backward_right" – move diagonally backward and to the right (2 units) within the same layer
27 | "up" – move vertically upward (3 units) via a ladder
28 | "down" – move vertically downward (3 units) via a ladder
29 | 
30 | Output Format:
31 | Return the full sequence of movement directions as a JSON array, where each \
32 | step is one of the six valid directions.
33 | Do not include any explanations, reasoning, or extra text.
34 | 
35 | Example of expected output:
36 | {{{{
37 |   "path": ["up", "forward_right", "forward_left", "up", "forward_right"]
38 | }}}}
39 | """
40 | 
41 | 
42 | MAZE3D_USER_PROMPT_TEMPLATE = """Infer the shortest valid 3D path for the {ball} \
43 | to move from its starting position to the {goal_cube}.
44 | """
45 | 
46 | 


--------------------------------------------------------------------------------
/prompts/videomodel_maze3d_prompt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Maze3D 游戏的视频模型 prompt 模板。
 4 | 
 5 | 占位符: {ball}, {start_cube}, {goal_cube}, {default_cube}
 6 | 从 description.json 的 visual_description 中读取。
 7 | """
 8 | from string import Template
 9 | 
10 | MAZE3D_PROMPT_TEMPLATE = Template("""Create a 3D animation based on the provided image of a cube maze. A $ball slides smoothly along the $default_cube pathway, climbs up the vertical ladders step by step, and finally stops perfectly on the $goal_cube at the top. The $ball never touches or passes through the $start_cube or any non-$default_cube areas of the maze. The camera remains static in an isometric, top-down angle showing the entire structure.
11 | 
12 | Maze:
13 |  The maze consists of stacked transparent $default_cube forming a 3D pathway.
14 |  The $goal_cube represents the goal position.
15 |  The $start_cube marks the starting platform where the $ball begins.
16 |  The $ball moves upward along the $default_cube path, climbing vertically via the ladders.
17 |  The ball slides smoothly without sudden changes in direction or speed.
18 |  The ball stops exactly on top of the $goal_cube at the end.
19 | 
20 | Scene:
21 |  No structural or color changes during animation.
22 |  The maze layout and cube arrangement remain unchanged.
23 |  The $ball moves continuously at a constant speed along the 3D path.
24 | 
25 | Camera:
26 |  Static, isometric camera view.
27 |  No zoom or pan.
28 |  Smooth animation without flicker, noise, or artifacts.""")
29 | 
30 | 
31 | def get_maze3d_prompt(visual_description: dict) -> str:
32 |     """
33 |     生成 maze3d 游戏的动态 prompt。
34 |     
35 |     Args:
36 |         visual_description: 来自 description.json 的 visual_description 字段
37 |             - ball: 球的描述 (如 "golden ball with orange edge")
38 |             - start_cube: 起点方块描述 (如 "blue cube")
39 |             - goal_cube: 目标方块描述 (如 "red cube")
40 |             - default_cube: 默认路径方块描述 (如 "gray cube")
41 |     """
42 |     return MAZE3D_PROMPT_TEMPLATE.substitute(
43 |         ball=visual_description.get("ball", "yellow ball"),
44 |         start_cube=visual_description.get("start_cube", "blue cube"),
45 |         goal_cube=visual_description.get("goal_cube", "red cube"),
46 |         default_cube=visual_description.get("default_cube", "gray cube"),
47 |     )
48 | 
49 | 


--------------------------------------------------------------------------------
/core/schema/position.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Tuple, Dict, List
 3 | 
 4 | 
 5 | @dataclass
 6 | class Position:
 7 |     row: int
 8 |     col: int
 9 |     
10 |     def to_dict(self) -> Dict[str, int]:
11 |         return {"row": self.row, "col": self.col}
12 |     
13 |     def to_list(self) -> List[int]:
14 |         return [self.row, self.col]
15 |     
16 |     @classmethod
17 |     def from_dict(cls, data: Dict[str, int]) -> 'Position':
18 |         if "row" in data:
19 |             return cls(row=data["row"], col=data["col"])
20 |         elif "y" in data:
21 |             return cls(row=data["y"], col=data["x"])
22 |         raise ValueError(f"Unknown position format: {data}")
23 |     
24 |     @classmethod
25 |     def from_list(cls, data: List[int]) -> 'Position':
26 |         return cls(row=data[0], col=data[1])
27 | 
28 | 
29 | @dataclass
30 | class BBox:
31 |     x: int
32 |     y: int
33 |     width: int
34 |     height: int
35 |     
36 |     @property
37 |     def center(self) -> Tuple[int, int]:
38 |         return (self.x + self.width // 2, self.y + self.height // 2)
39 |     
40 |     @property
41 |     def center_x(self) -> int:
42 |         return self.x + self.width // 2
43 |     
44 |     @property
45 |     def center_y(self) -> int:
46 |         return self.y + self.height // 2
47 |     
48 |     def to_dict(self) -> Dict[str, int]:
49 |         return {
50 |             "x": self.x,
51 |             "y": self.y,
52 |             "width": self.width,
53 |             "height": self.height,
54 |             "center_x": self.center_x,
55 |             "center_y": self.center_y
56 |         }
57 |     
58 |     def to_tuple(self) -> Tuple[int, int, int, int]:
59 |         return (self.x, self.y, self.width, self.height)
60 |     
61 |     @classmethod
62 |     def from_dict(cls, data: Dict[str, int]) -> 'BBox':
63 |         return cls(
64 |             x=data["x"],
65 |             y=data["y"],
66 |             width=data["width"],
67 |             height=data["height"]
68 |         )
69 |     
70 |     @classmethod
71 |     def from_grid_pos(cls, row: int, col: int, cell_size: int) -> 'BBox':
72 |         return cls(
73 |             x=col * cell_size,
74 |             y=row * cell_size,
75 |             width=cell_size,
76 |             height=cell_size
77 |         )
78 | 
79 | 


--------------------------------------------------------------------------------
/games/maze/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import sys
 4 | from pathlib import Path
 5 | from typing import List, Optional
 6 | 
 7 | try:
 8 |     from . import constants
 9 |     from .generators import data_gen
10 |     from .templates.base_template import BaseTemplate
11 |     from .utils import file_utils
12 | except ImportError:
13 |     import constants
14 |     from generators import data_gen
15 |     from templates.base_template import BaseTemplate
16 |     from utils import file_utils
17 | 
18 | # Configure output directory and maze counts here
19 | DEFAULT_OUTPUT_DIR = "maze_dataset_py"
20 | DEFAULT_COUNTS = {
21 |     9: 1,   # number of 9x9 mazes
22 |     11: 1,  # number of 11x11 mazes
23 |     13: 1,  # number of 13x13 mazes
24 | }
25 | 
26 | 
27 | def main(assets_folder: Optional[str] = None) -> int:
28 |     counts: List[int] = [DEFAULT_COUNTS.get(size, 0) for size in constants.ALLOWED_SIZES]
29 |     if any(count < 0 for count in counts):
30 |         raise ValueError("Counts must be non-negative integers")
31 | 
32 |     output_dir = Path(DEFAULT_OUTPUT_DIR)
33 |     images_dir = output_dir / constants.IMAGES_DIR
34 |     states_dir = output_dir / constants.STATES_DIR
35 |     video_dir = output_dir / constants.VIDEOS_DIR
36 |     data_file = output_dir / constants.DATA_PATH
37 | 
38 |     file_utils.setup_output_directories(
39 |         str(output_dir), str(images_dir), str(states_dir), str(video_dir)
40 |     )
41 | 
42 |     start_id = 0
43 |     templates: List[BaseTemplate] = []
44 | 
45 |     for size, label, count in zip(constants.ALLOWED_SIZES, constants.SIZE_LABELS, counts):
46 |         if count <= 0:
47 |             continue
48 |         print(f"Generating {count} {label} mazes...")
49 |         templates.extend(
50 |             data_gen.generate_data(
51 |                 start_id, count, size, str(images_dir), str(states_dir), str(video_dir),
52 |                 assets_folder=assets_folder
53 |             )
54 |         )
55 |         start_id += count
56 | 
57 |     data_gen.save_data_to_json(templates, str(data_file))
58 | 
59 |     print(f"Data generation completed. Output directory: {output_dir}")
60 |     return 0
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     assets_folder = sys.argv[1] if len(sys.argv) > 1 else None
65 |     raise SystemExit(main(assets_folder))
66 | 


--------------------------------------------------------------------------------
/games/maze/templates/base_template.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Any, Dict, List, Optional, Sequence
 4 | 
 5 | from .. import constants
 6 | 
 7 | _BASE_RULES = (
 8 |     "**Rules:**\n"
 9 |     "1. This is a maze mini-game.The player needs to navigate around obstacles to reach the destination and achieve victory.\n"
10 |     "2. The red circle represents the player, the green block is the goal and the blue blocks are obstacles.\n"
11 |     "3. The player can only move within the white blocks.\n"
12 |     "4. The coordinates are given in the format (row, col), where row represents the vertical position and col represents the horizontal position.\n\n"
13 |     "**Question:** "
14 | )
15 | 
16 | 
17 | class BaseTemplate:
18 |     data_id: str
19 |     qa_type: str
20 |     question_id: int
21 |     question_description: str
22 |     image: str
23 |     state: str
24 |     plot_level: str
25 |     qa_level: str
26 |     question: str
27 |     answer: str
28 |     options: Optional[List[str]]
29 |     analysis: str
30 | 
31 |     def __init__(self, maze: Sequence[Sequence[int]], image_id: int) -> None:
32 |         self.image = f"{constants.IMAGES_DIR}/image_{image_id:05d}.png"
33 |         self.state = f"{constants.STATES_DIR}/state_{image_id:05d}.json"
34 |         self.plot_level = constants.PLOT_LEVELS.get(len(maze), "Unknown")
35 |         self.question = _BASE_RULES
36 |         self.answer = ""
37 |         self.analysis = ""
38 |         self.options: Optional[List[str]] = None
39 | 
40 |     def to_dict(self) -> Dict[str, Any]:
41 |         payload: Dict[str, Any] = {
42 |             "data_id": getattr(self, "data_id", ""),
43 |             "qa_type": getattr(self, "qa_type", ""),
44 |             "question_id": getattr(self, "question_id", None),
45 |             "question_description": getattr(self, "question_description", ""),
46 |             "image": self.image,
47 |             "state": self.state,
48 |             "plot_level": getattr(self, "plot_level", ""),
49 |             "qa_level": getattr(self, "qa_level", ""),
50 |             "question": getattr(self, "question", ""),
51 |             "answer": getattr(self, "answer", ""),
52 |             "options": getattr(self, "options", None),
53 |             "analysis": getattr(self, "analysis", ""),
54 |         }
55 |         return payload
56 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/vlm_client.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import os
 3 | import logging
 4 | from openai import OpenAI
 5 | 
 6 | 
 7 | class VLMClient:
 8 |     """API模型客户端"""
 9 |     def __init__(self, model: str = "gpt-4o", api_key: str = None, base_url: str = None, max_tokens: int = 10000, temperature: float = 0.0):
10 |         self.model = model
11 |         self.maxtokens = max_tokens
12 |         self.temperature = temperature
13 |         self.model_type = "api"
14 |         self.client = OpenAI(
15 |             api_key=api_key or os.getenv("OPENAI_API_KEY"),
16 |             base_url=base_url
17 |         )
18 |     
19 |     def query(self, system_prompt: str, user_prompt: str, image_path: str = None) -> str:
20 |         try:
21 |             messages = [
22 |                 {
23 |                     "role": "system",
24 |                     "content": system_prompt
25 |                 }
26 |             ]
27 | 
28 |             if image_path:
29 |                 with open(image_path, "rb") as f:
30 |                     image_data = base64.b64encode(f.read()).decode("utf-8")
31 | 
32 |                 user_content = [
33 |                     {"type": "text", "text": user_prompt},
34 |                     {
35 |                         "type": "image_url",
36 |                         "image_url": {
37 |                             "url": f"data:image/png;base64,{image_data}"
38 |                         }
39 |                     }
40 |                 ]
41 |             else:
42 |                 user_content = user_prompt
43 | 
44 |             messages.append({
45 |                 "role": "user",
46 |                 "content": user_content
47 |             })
48 | 
49 |             response = self.client.chat.completions.create(
50 |                 model=self.model,
51 |                 messages=messages,
52 |                 max_tokens=self.maxtokens,
53 |                 temperature=self.temperature
54 |             )
55 | 
56 |             print(response.choices[0].message.content)
57 | 
58 |             return response.choices[0].message.content
59 | 
60 |         except FileNotFoundError as e:
61 |             logging.error(f"Image file not found: {image_path}")
62 |             raise
63 |         except Exception as e:
64 |             logging.error(f"VLM API call failed: {type(e).__name__}: {e}")
65 |             raise
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/games/maze3d/color_handler.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 3D Maze 颜色处理器
 3 | 负责从皮肤目录加载颜色配置
 4 | """
 5 | 
 6 | import json
 7 | import logging
 8 | from pathlib import Path
 9 | from typing import Dict, Optional
10 | 
11 | 
12 | # 必需的颜色键
13 | REQUIRED_COLOR_KEYS = ['start_pos', 'goal_pos', 'default_cube', 'ball', 'ball_edge']
14 | 
15 | 
16 | def load_colors_from_skin(skin_folder: str) -> Dict[str, str]:
17 |     """
18 |     从皮肤目录加载颜色配置
19 | 
20 |     Args:
21 |         skin_folder: 皮肤目录路径
22 | 
23 |     Returns:
24 |         颜色配置字典
25 | 
26 |     Raises:
27 |         FileNotFoundError: 皮肤目录或 colors.json 不存在
28 |         ValueError: colors.json 格式错误或缺少必需的颜色键
29 |     """
30 |     if not skin_folder:
31 |         raise FileNotFoundError("No skin folder specified")
32 | 
33 |     skin_path = Path(skin_folder)
34 |     if not skin_path.exists():
35 |         raise FileNotFoundError(f"Skin folder not found: {skin_folder}")
36 | 
37 |     colors_path = skin_path / 'colors.json'
38 | 
39 |     if not colors_path.exists():
40 |         raise FileNotFoundError(f"colors.json not found in {skin_folder}")
41 | 
42 |     try:
43 |         with open(colors_path, 'r', encoding='utf-8') as f:
44 |             colors = json.load(f)
45 |     except json.JSONDecodeError as e:
46 |         raise ValueError(f"Failed to parse colors.json in {skin_folder}: {e}")
47 | 
48 |     missing_keys = [k for k in REQUIRED_COLOR_KEYS if k not in colors]
49 |     if missing_keys:
50 |         raise ValueError(f"Missing required color keys {missing_keys} in {colors_path}")
51 | 
52 |     logging.debug(f"Loaded colors from {colors_path}")
53 |     return colors
54 | 
55 | 
56 | def load_skin_description(skin_folder: str) -> Optional[Dict[str, str]]:
57 |     """
58 |     从皮肤目录加载视觉描述
59 |     
60 |     Args:
61 |         skin_folder: 皮肤目录路径
62 |         
63 |     Returns:
64 |         视觉描述字典，如果加载失败则返回 None
65 |     """
66 |     if not skin_folder:
67 |         return None
68 |     
69 |     desc_path = Path(skin_folder) / 'description.json'
70 |     
71 |     if not desc_path.exists():
72 |         return None
73 |     
74 |     try:
75 |         with open(desc_path, 'r', encoding='utf-8') as f:
76 |             data = json.load(f)
77 |         
78 |         return data.get('visual_description')
79 |         
80 |     except Exception as e:
81 |         logging.error(f"Failed to load description from {skin_folder}: {e}")
82 |         return None
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/games/maze/templates/find_path_to_goal.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import random
 4 | from typing import List, Set
 5 | 
 6 | from ..utils import maze_utils
 7 | from .base_template import BaseTemplate
 8 | 
 9 | 
10 | class FindPathToGoal(BaseTemplate):
11 |     def __init__(self, maze: List[List[int]], image_id: int) -> None:
12 |         super().__init__(maze, image_id)
13 | 
14 |         self.question_id = 3
15 |         self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}"
16 |         self.qa_type = "TransitionPath"
17 |         self.question_description = "Find the path to the goal"
18 |         self.qa_level = "Medium"
19 |         self.question += "Which sequence of movements will allow the player to reach the destination?\n\n**Options:**"
20 | 
21 |         solver_rng = random.Random(image_id)
22 |         info: List[str] = []
23 |         path = maze_utils.dfs_solve_maze(maze, info, rng=solver_rng)
24 |         actions = _path_to_actions(path)
25 |         answer_str = ", ".join(actions)
26 | 
27 |         variant_rng = random.Random(image_id + 1)
28 |         variants: Set[str] = {answer_str}
29 |         for _ in range(4):
30 |             variants.add(_random_path(len(actions), variant_rng))
31 | 
32 |         option_list = sorted(variants)
33 |         self.options = []
34 |         label_code = ord("A")
35 |         for entry in option_list:
36 |             label = chr(label_code)
37 |             self.options.append(f"{label}. {entry}")
38 |             if entry == answer_str:
39 |                 self.answer = label
40 |             label_code += 1
41 | 
42 |         for option in self.options:
43 |             self.question += f"\n{option}"
44 | 
45 |         self.analysis = "".join(info)
46 |         self.analysis += (
47 |             f"\n\nTherefore, the right sequence of movements are: {answer_str}\n"
48 |             f"The right option is {self.answer}"
49 |         )
50 | 
51 | 
52 | def _path_to_actions(path: List[maze_utils.Coordinate]) -> List[str]:
53 |     actions: List[str] = []
54 |     for index in range(1, len(path)):
55 |         actions.append(maze_utils.get_direction(path[index - 1], path[index]))
56 |     return actions
57 | 
58 | 
59 | def _random_path(length: int, rng: random.Random) -> str:
60 |     directions = ["up", "down", "left", "right"]
61 |     sequence = [rng.choice(directions) for _ in range(max(1, length))]
62 |     return ", ".join(sequence)
63 | 


--------------------------------------------------------------------------------
/games/pathfinder/board.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PathFinder 游戏板 - 基于曲线路径
 3 | """
 4 | 
 5 | from typing import List, Tuple
 6 | 
 7 | 
 8 | class PathSegment:
 9 |     """路径段"""
10 |     
11 |     def __init__(self, control_points: List[Tuple[float, float]]):
12 |         self.control_points = control_points
13 |     
14 |     def get_start(self) -> Tuple[float, float]:
15 |         return self.control_points[0]
16 |     
17 |     def get_end(self) -> Tuple[float, float]:
18 |         return self.control_points[-1]
19 | 
20 | 
21 | class PathFinderBoard:
22 |     """PathFinder 游戏板"""
23 | 
24 |     def __init__(
25 |         self,
26 |         segments: List[PathSegment],
27 |         start_point: Tuple[float, float],
28 |         end_point: Tuple[float, float],
29 |         solution_segments: List[int],
30 |         solution_path: List[Tuple[float, float]] = None,  # 新增：解决方案的节点路径
31 |         image_size: int = 800,
32 |         road_width: int = 35  # 新增：道路宽度
33 |     ):
34 |         self.segments = segments
35 |         self.start_point = start_point
36 |         self.end_point = end_point
37 |         self.solution_segments = solution_segments
38 |         self.solution_path = solution_path or []  # 节点序列
39 |         self.image_size = image_size
40 |         self.road_width = road_width  # 保存道路宽度
41 |     
42 |     def is_solvable(self) -> bool:
43 |         return len(self.solution_segments) > 0
44 |     
45 |     def to_dict(self) -> dict:
46 |         return {
47 |             'segments': [[pt for pt in seg.control_points] for seg in self.segments],
48 |             'start_point': list(self.start_point),
49 |             'end_point': list(self.end_point),
50 |             'solution_segments': self.solution_segments,
51 |             'solution_path': [list(pt) for pt in self.solution_path],
52 |             'image_size': self.image_size,
53 |             'road_width': self.road_width
54 |         }
55 |     
56 |     @classmethod
57 |     def from_dict(cls, data: dict) -> 'PathFinderBoard':
58 |         segments = [PathSegment(points) for points in data['segments']]
59 |         return cls(
60 |             segments=segments,
61 |             start_point=tuple(data['start_point']),
62 |             end_point=tuple(data['end_point']),
63 |             solution_segments=data['solution_segments'],
64 |             solution_path=[tuple(pt) for pt in data.get('solution_path', [])],
65 |             image_size=data.get('image_size', 800),
66 |             road_width=data.get('road_width', 35)
67 |         )
68 | 


--------------------------------------------------------------------------------
/games/maze/templates/position_after_moving.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import random
 4 | from typing import List, Set
 5 | 
 6 | from .. import constants
 7 | from ..utils import maze_utils
 8 | from .base_template import BaseTemplate
 9 | 
10 | 
11 | class PositionAfterMoving(BaseTemplate):
12 |     def __init__(self, maze: List[List[int]], image_id: int) -> None:
13 |         super().__init__(maze, image_id)
14 | 
15 |         self.qa_type = "ActionOutcome"
16 |         self.question_id = 6
17 |         self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}"
18 |         self.question_description = "The position after moving."
19 |         self.qa_level = "Medium"
20 | 
21 |         rng = random.Random(image_id)
22 | 
23 |         directions = maze_utils.get_available_directions(maze)
24 |         if not directions:
25 |             raise ValueError("Player has no available moves to build question")
26 |         direction = rng.choice(directions)
27 | 
28 |         self.question += f"What are the coordinates of player after moving {direction}?\n\n**Options:**"
29 | 
30 |         row, col = maze_utils.find_position(maze, constants.PLAYER_CELL)
31 |         if direction == "up":
32 |             answer_str = f"({row - 1}, {col})"
33 |         elif direction == "down":
34 |             answer_str = f"({row + 1}, {col})"
35 |         elif direction == "left":
36 |             answer_str = f"({row}, {col - 1})"
37 |         else:
38 |             answer_str = f"({row}, {col + 1})"
39 | 
40 |         choices: Set[str] = {
41 |             answer_str,
42 |             f"({row + 1}, {col})",
43 |             f"({row - 1}, {col})",
44 |             f"({row}, {col + 1})",
45 |             f"({row}, {col - 1})",
46 |             f"({row}, {col})",
47 |         }
48 | 
49 |         option_list = sorted(choices)
50 |         self.options = []
51 |         label_code = ord("A")
52 |         for entry in option_list:
53 |             label = chr(label_code)
54 |             self.options.append(f"{label}. {entry}")
55 |             if entry == answer_str:
56 |                 self.answer = label
57 |             label_code += 1
58 | 
59 |         for option in self.options:
60 |             self.question += f"\n{option}"
61 | 
62 |         self.analysis = (
63 |             f"Observe the screen, the position of player is ({row}, {col}). "
64 |             f"After moving {direction}, the player is in {answer_str}. "
65 |             f"Therefore, the right option is {self.answer}"
66 |         )
67 | 


--------------------------------------------------------------------------------
/games/maze/generators/data_gen.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import json
 4 | from pathlib import Path
 5 | from typing import Dict, List, Optional
 6 | import random
 7 | 
 8 | from .. import constants
 9 | from ..templates import (
10 |     AvailableDirections,
11 |     FindPathToGoal,
12 |     GoalPosition,
13 |     PlayerPosition,
14 |     PositionAfterMoving,
15 |     TurnCount,
16 | )
17 | from ..templates.base_template import BaseTemplate
18 | from ..utils import maze_utils
19 | from . import image_gen, maze_gen, state_gen, video_gen
20 | 
21 | 
22 | def generate_json_data(maze: List[List[int]], data_id: int) -> List[BaseTemplate]:
23 |     return [
24 |         PlayerPosition(maze, data_id),
25 |         GoalPosition(maze, data_id),
26 |         PositionAfterMoving(maze, data_id),
27 |         AvailableDirections(maze, data_id),
28 |         FindPathToGoal(maze, data_id),
29 |         TurnCount(maze, data_id),
30 |     ]
31 | 
32 | 
33 | def generate_data(
34 |     id_begin: int,
35 |     amount: int,
36 |     maze_size: int,
37 |     images_dir: str,
38 |     states_dir: str,
39 |     video_dir: str,
40 |     assets_folder: Optional[str] = None,
41 | ) -> List[BaseTemplate]:
42 |     dataset: List[BaseTemplate] = []
43 |     for internal_id in range(id_begin, id_begin + amount):
44 |         maze = maze_gen.generate_maze(maze_size, maze_size)
45 | 
46 |         image_path = Path(images_dir) / f"image_{internal_id:05d}.png"
47 |         state_path = Path(states_dir) / f"state_{internal_id:05d}.json"
48 |         video_path = Path(video_dir) / f"video_{internal_id:05d}.gif"
49 | 
50 |         image_gen.draw_maze(maze, constants.CELL_SIZE, str(image_path), assets_folder=assets_folder)
51 |         state_gen.save_state(maze, str(state_path))
52 | 
53 |         solver_rng = random.Random(internal_id)
54 |         path = maze_utils.dfs_solve_maze(maze, [], rng=solver_rng)
55 |         video_gen.create_solution_video(
56 |             maze,
57 |             path,
58 |             constants.CELL_SIZE,
59 |             str(video_path),
60 |             assets_folder=assets_folder,
61 |         )
62 | 
63 |         dataset.extend(generate_json_data(maze, internal_id))
64 |     return dataset
65 | 
66 | 
67 | def save_data_to_json(records: List[BaseTemplate], file_path: str) -> None:
68 |     payload: List[Dict[str, object]] = [item.to_dict() for item in records]
69 |     Path(file_path).parent.mkdir(parents=True, exist_ok=True)
70 |     with open(file_path, "w", encoding="utf-8") as fh:
71 |         json.dump(payload, fh, indent=2)
72 | 


--------------------------------------------------------------------------------
/AutoEnv/autoenv/pipeline/visual/pipeline.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Maze Mode Skin Generation Pipeline
 3 | Simplified DAG-based visual asset generation pipeline
 4 | """
 5 | 
 6 | from pathlib import Path
 7 | 
 8 | from autoenv.pipeline.visual.nodes import (
 9 |     AnalyzerNode,
10 |     AssetGeneratorNode,
11 |     AutoEnvContext,
12 |     BackgroundRemovalNode,
13 |     StrategistNode,
14 | )
15 | from base.engine.async_llm import AsyncLLM
16 | from base.pipeline.base_pipeline import BasePipeline
17 | 
18 | 
19 | class VisualPipeline(BasePipeline):
20 |     """
21 |     Visualization pipeline for maze mode.
22 | 
23 |     DAG structure:
24 |         Analyzer → Strategist → AssetGenerator → BackgroundRemoval
25 |     """
26 | 
27 |     model_config = {"arbitrary_types_allowed": True}
28 | 
29 |     @classmethod
30 |     def create_default(
31 |         cls,
32 |         image_model: str,
33 |     ) -> "VisualPipeline":
34 |         """
35 |         Factory method: Create default visualization pipeline.
36 | 
37 |         Args:
38 |             image_model: Image generation model name (required)
39 | 
40 |         Usage:
41 |             pipeline = VisualPipeline.create_default(
42 |                 image_model="gemini-2.5-flash-image"
43 |             )
44 |             ctx = await pipeline.run(
45 |                 maze_type="maze",
46 |                 theme="cyberpunk neon city",
47 |                 output_dir=Path("workspace/envs/maze_001")
48 |             )
49 |         """
50 |         image_llm = AsyncLLM(image_model)
51 | 
52 |         analyzer = AnalyzerNode()
53 |         strategist = StrategistNode()
54 |         asset_generator = AssetGeneratorNode(image_llm=image_llm)
55 |         bg_removal = BackgroundRemovalNode(vision_llm=image_llm)
56 | 
57 |         analyzer >> strategist >> asset_generator >> bg_removal
58 | 
59 |         return cls(root=analyzer)
60 | 
61 |     async def run(
62 |         self,
63 |         maze_type: str,
64 |         theme: str,
65 |         output_dir: Path = Path("."),
66 |     ) -> AutoEnvContext:
67 |         """
68 |         Execute pipeline.
69 | 
70 |         Args:
71 |             maze_type: 迷宫类型（maze, pathfinder, sokoban, trapfield）
72 |             theme: 视觉主题（如 "cyberpunk neon city"）
73 |             output_dir: 输出目录
74 |         """
75 |         ctx = AutoEnvContext(
76 |             maze_type=maze_type,
77 |             theme=theme,
78 |             output_dir=output_dir,
79 |         )
80 |         return await super().run(ctx)
81 | 


--------------------------------------------------------------------------------
/core/schema/state.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import List, Dict, Any, Optional
 3 | from pathlib import Path
 4 | import json
 5 | 
 6 | from .position import Position, BBox
 7 | from .entity import Entity
 8 | from .grid import Grid
 9 | from .render import RenderConfig
10 | 
11 | 
12 | @dataclass
13 | class UnifiedState:
14 |     version: str
15 |     game_type: str
16 |     player: Entity
17 |     goal: Entity
18 |     render: RenderConfig
19 |     grid: Optional[Grid] = None
20 |     boxes: List[Entity] = field(default_factory=list)
21 |     metadata: Dict[str, Any] = field(default_factory=dict)
22 | 
23 |     def to_dict(self) -> Dict[str, Any]:
24 |         result = {
25 |             "version": self.version,
26 |             "game_type": self.game_type,
27 |             "entities": {
28 |                 "player": self.player.to_dict(),
29 |                 "goal": self.goal.to_dict(),
30 |                 "boxes": [box.to_dict() for box in self.boxes]
31 |             },
32 |             "render": self.render.to_dict(),
33 |             "metadata": self.metadata
34 |         }
35 | 
36 |         if self.grid is not None:
37 |             result["grid"] = self.grid.to_dict()
38 | 
39 |         return result
40 | 
41 |     def save(self, path: str):
42 |         Path(path).parent.mkdir(parents=True, exist_ok=True)
43 |         with open(path, 'w', encoding='utf-8') as f:
44 |             json.dump(self.to_dict(), f, indent=2, ensure_ascii=False)
45 | 
46 |     @classmethod
47 |     def from_dict(cls, data: Dict[str, Any]) -> 'UnifiedState':
48 |         entities = data["entities"]
49 |         grid = Grid.from_dict(data["grid"]) if "grid" in data else None
50 | 
51 |         return cls(
52 |             version=data["version"],
53 |             game_type=data["game_type"],
54 |             player=Entity.from_dict(entities["player"]),
55 |             goal=Entity.from_dict(entities["goal"]),
56 |             render=RenderConfig.from_dict(data["render"]),
57 |             grid=grid,
58 |             boxes=[Entity.from_dict(box) for box in entities.get("boxes", [])],
59 |             metadata=data.get("metadata", {})
60 |         )
61 | 
62 |     @classmethod
63 |     def load(cls, path: str) -> 'UnifiedState':
64 |         with open(path, 'r', encoding='utf-8') as f:
65 |             data = json.load(f)
66 |         return cls.from_dict(data)
67 | 
68 |     def get_player_bbox(self) -> BBox:
69 |         return self.player.bbox
70 | 
71 |     def get_goal_bbox(self) -> BBox:
72 |         return self.goal.bbox
73 | 
74 |     def get_player_grid_pos(self) -> Optional[Position]:
75 |         return self.player.grid_pos
76 | 
77 |     def get_goal_grid_pos(self) -> Optional[Position]:
78 |         return self.goal.grid_pos
79 | 
80 | 


--------------------------------------------------------------------------------
/games/maze/generators/maze_gen.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import random
 4 | from typing import List, Tuple
 5 | 
 6 | from .. import constants
 7 | 
 8 | Coordinate = Tuple[int, int]
 9 | 
10 | _RANDOM = random.Random()
11 | 
12 | 
13 | def generate_maze(rows: int, cols: int) -> List[List[int]]:
14 |     if rows % 2 == 0 or cols % 2 == 0:
15 |         raise ValueError("The number of rows and columns in the maze must be odd!")
16 | 
17 |     maze = [[constants.WALL_CELL for _ in range(cols)] for _ in range(rows)]
18 |     maze[1][1] = constants.EMPTY_CELL
19 |     _dfs(maze, 1, 1)
20 |     _place_player_and_goal(maze)
21 |     return maze
22 | 
23 | 
24 | def _dfs(maze: List[List[int]], row: int, col: int) -> None:
25 |     directions = [(-2, 0), (0, 2), (2, 0), (0, -2)]
26 |     _RANDOM.shuffle(directions)
27 | 
28 |     for d_row, d_col in directions:
29 |         next_row = row + d_row
30 |         next_col = col + d_col
31 |         if _is_in_bounds(maze, next_row, next_col) and maze[next_row][next_col] == constants.WALL_CELL:
32 |             maze[row + d_row // 2][col + d_col // 2] = constants.EMPTY_CELL
33 |             maze[next_row][next_col] = constants.EMPTY_CELL
34 |             _dfs(maze, next_row, next_col)
35 | 
36 | 
37 | def _place_player_and_goal(maze: List[List[int]]) -> None:
38 |     rows = len(maze)
39 |     cols = len(maze[0]) if rows else 0
40 | 
41 |     empty_cells: List[Coordinate] = [
42 |         (r, c)
43 |         for r in range(rows)
44 |         for c in range(cols)
45 |         if maze[r][c] == constants.EMPTY_CELL
46 |     ]
47 |     if not empty_cells:
48 |         raise ValueError("There are no empty cells in the maze to place the player and goal.")
49 | 
50 |     player_row, player_col = _RANDOM.choice(empty_cells)
51 | 
52 |     distances = []
53 |     for cell in empty_cells:
54 |         if cell == (player_row, player_col):
55 |             continue
56 |         distance = abs(cell[0] - player_row) + abs(cell[1] - player_col)
57 |         distances.append((cell, distance))
58 | 
59 |     if not distances:
60 |         raise ValueError("There are not enough empty cells to place the goal.")
61 | 
62 |     unique_distances = sorted({distance for _, distance in distances}, reverse=True)
63 |     if len(unique_distances) >= 3:
64 |         target_distance = unique_distances[2]
65 |     elif len(unique_distances) == 2:
66 |         target_distance = unique_distances[1]
67 |     else:
68 |         target_distance = unique_distances[0]
69 | 
70 |     candidates = [cell for cell, distance in distances if distance == target_distance]
71 |     if not candidates:
72 |         raise ValueError("No cells found with the target distance to place the goal.")
73 | 
74 |     goal_row, goal_col = _RANDOM.choice(candidates)
75 | 
76 |     maze[player_row][player_col] = constants.PLAYER_CELL
77 |     maze[goal_row][goal_col] = constants.GOAL_CELL
78 | 
79 | 
80 | def _is_in_bounds(maze: List[List[int]], row: int, col: int) -> bool:
81 |     return 0 < row < len(maze) and 0 < col < len(maze[0])
82 | 


--------------------------------------------------------------------------------
/games/maze/templates/available_directions.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import random
 4 | from typing import List, Set
 5 | 
 6 | from .. import constants
 7 | from ..utils import maze_utils
 8 | from .base_template import BaseTemplate
 9 | 
10 | 
11 | class AvailableDirections(BaseTemplate):
12 |     def __init__(self, maze: List[List[int]], image_id: int) -> None:
13 |         super().__init__(maze, image_id)
14 | 
15 |         self.question_id = 5
16 |         self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}"
17 |         self.qa_type = "StateInfo"
18 |         self.question_description = "Ask for the available directions to move are currently."
19 |         self.qa_level = "Easy"
20 |         self.question += "Which directions are available to move now?\n\n**Options:**"
21 | 
22 |         answers = maze_utils.get_available_directions(maze)
23 |         answer_str = ", ".join(answers)
24 | 
25 |         rng = random.Random(image_id)
26 |         option_sets = [
27 |             ["up", "down", "left", "right"],
28 |             ["up, down", "up, left", "up, right", "down, left", "down, right", "left, right"],
29 |             ["up, down, left", "up, down, right", "up, left, right", "down, left, right"],
30 |             ["up, down, left, right"],
31 |         ]
32 |         counts = [2, 2, 2, 1]
33 | 
34 |         pool: Set[str] = set()
35 |         for choices, count in zip(option_sets, counts):
36 |             _add_random_options(pool, choices, count, rng)
37 |         pool.add(answer_str)
38 | 
39 |         option_list = sorted(pool, key=lambda item: (len(item), item))
40 |         self.options = []
41 |         label_code = ord("A")
42 |         for entry in option_list:
43 |             label = chr(label_code)
44 |             self.options.append(f"{label}. {entry}")
45 |             if entry == answer_str:
46 |                 self.answer = label
47 |             label_code += 1
48 | 
49 |         for option in self.options:
50 |             self.question += f"\n{option}"
51 | 
52 |         player_row, player_col = maze_utils.find_position(maze, constants.PLAYER_CELL)
53 |         segments = [f"The player is on ({player_row}, {player_col})"]
54 |         if "up" in answer_str:
55 |             segments.append(f"({player_row - 1}, {player_col}) is empty")
56 |         if "down" in answer_str:
57 |             segments.append(f"({player_row + 1}, {player_col}) is empty")
58 |         if "left" in answer_str:
59 |             segments.append(f"({player_row}, {player_col - 1}) is empty")
60 |         if "right" in answer_str:
61 |             segments.append(f"({player_row}, {player_col + 1}) is empty")
62 | 
63 |         detail = ", and ".join(segments)
64 |         self.analysis = f"{detail}. The player can move {answer_str}. Therefore, The option is {self.answer}"
65 | 
66 | 
67 | def _add_random_options(
68 |     bucket: Set[str],
69 |     choices: List[str],
70 |     count: int,
71 |     rng: random.Random,
72 | ) -> None:
73 |     target = len(bucket) + count
74 |     if not choices:
75 |         return
76 |     while len(bucket) < target:
77 |         bucket.add(rng.choice(choices))
78 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/MODEL_CONFIG.md:
--------------------------------------------------------------------------------
  1 | # VLM 模型配置说明
  2 | 
  3 | ## 模型类型
  4 | 
  5 | 系统支持两种类型的 VLM 模型：
  6 | 
  7 | ### 1. API 模型 (type: api)
  8 | 
  9 | 通过 API 调用的远程模型，如 GPT-4、Gemini 等。
 10 | 
 11 | **配置示例：**
 12 | ```yaml
 13 | models:
 14 |   - name: gpt-5
 15 |     type: api
 16 |     base_url: https://newapi.deepwisdom.ai/v1
 17 |     max_tokens: 60000
 18 |     temperature: 1.0
 19 | ```
 20 | 
 21 | **参数说明：**
 22 | - `name`: 模型名称
 23 | - `type`: 必须为 `api`
 24 | - `base_url`: API 端点地址
 25 | - `max_tokens`: 最大生成 token 数
 26 | - `temperature`: 生成温度（0.0-2.0）
 27 | 
 28 | **环境变量：**
 29 | 需要设置 `OPENAI_API_KEY` 环境变量（在 `.env` 文件中）
 30 | 
 31 | ### 2. Local 模型 (type: local)
 32 | 
 33 | 本地加载的 HuggingFace 模型。
 34 | 
 35 | **配置示例：**
 36 | ```yaml
 37 | models:
 38 |   - name: Qwen/Qwen2-VL-7B-Instruct
 39 |     type: local
 40 |     device: cuda:0
 41 |     max_tokens: 10000
 42 |     temperature: 0.0
 43 |   
 44 |   - name: llava-hf/llava-v1.6-mistral-7b-hf
 45 |     type: local
 46 |     device: cuda:1
 47 |     max_tokens: 10000
 48 |     temperature: 0.0
 49 | ```
 50 | 
 51 | **参数说明：**
 52 | - `name`: HuggingFace 模型名称或路径
 53 | - `type`: 必须为 `local`
 54 | - `device`: 运行设备（如 `cuda:0`, `cuda:1`, `cpu`）
 55 | - `max_tokens`: 最大生成 token 数
 56 | - `temperature`: 生成温度（0.0-2.0）
 57 | 
 58 | **模型加载逻辑：**
 59 | 1. 首先尝试从本地缓存加载模型
 60 | 2. 如果失败，自动从 HuggingFace 下载到 `/huggingface_model` 目录
 61 | 3. 模型会被加载到指定的 GPU 设备上
 62 | 
 63 | **依赖安装：**
 64 | ```bash
 65 | pip install transformers torch pillow accelerate
 66 | 
 67 | # 如果使用 Qwen2.5-VL 模型，还需要安装：
 68 | pip install qwen-vl-utils
 69 | ```
 70 | 
 71 | ## 完整配置示例
 72 | 
 73 | ```yaml
 74 | game: maze
 75 | dataset: dataset/maze/1
 76 | output: vlm_eval_results/maze
 77 | 
 78 | models:
 79 |   # API 模型
 80 |   - name: gpt-5
 81 |     type: api
 82 |     base_url: https://newapi.deepwisdom.ai/v1
 83 |     max_tokens: 60000
 84 |     temperature: 1.0
 85 |   
 86 |   # Local 模型 - GPU 0
 87 |   - name: Qwen/Qwen2-VL-7B-Instruct
 88 |     type: local
 89 |     device: cuda:0
 90 |     max_tokens: 10000
 91 |     temperature: 0.0
 92 |   
 93 |   # Local 模型 - GPU 1
 94 |   - name: llava-hf/llava-v1.6-mistral-7b-hf
 95 |     type: local
 96 |     device: cuda:1
 97 |     max_tokens: 10000
 98 |     temperature: 0.0
 99 | 
100 | workers: 10
101 | max_levels: -1
102 | assets_folder: skins/maze/1
103 | ```
104 | 
105 | ## 多 GPU 使用
106 | 
107 | 可以配置多个 local 模型在不同的 GPU 上运行：
108 | 
109 | ```yaml
110 | models:
111 |   - name: model-1
112 |     type: local
113 |     device: cuda:0  # 第一张 GPU
114 |     
115 |   - name: model-2
116 |     type: local
117 |     device: cuda:1  # 第二张 GPU
118 |     
119 |   - name: model-3
120 |     type: local
121 |     device: cuda:2  # 第三张 GPU
122 | ```
123 | 
124 | ## 注意事项
125 | 
126 | 1. **API 模型**：需要确保网络连接正常，API key 有效
127 | 2. **Local 模型**：
128 |    - 首次运行会下载模型，可能需要较长时间
129 |    - 确保有足够的磁盘空间（`/huggingface_model` 目录）
130 |    - 确保 GPU 显存足够（7B 模型约需 14GB 显存）
131 |    - 可以使用 `device: cpu` 在 CPU 上运行（速度较慢）
132 | 3. **并行执行**：不同模型会并行评估，注意资源分配
133 | 4. **每个难度只测试后 24 个 case**
134 | 
135 | ## 支持的模型示例
136 | 
137 | ### Local 模型
138 | - `Qwen/Qwen2-VL-7B-Instruct`
139 | - `Qwen/Qwen2-VL-2B-Instruct`
140 | - `llava-hf/llava-v1.6-mistral-7b-hf`
141 | - `llava-hf/llava-1.5-7b-hf`
142 | - 其他支持 `AutoModelForVision2Seq` 的模型
143 | 
144 | ### API 模型
145 | - GPT-4o, GPT-4V
146 | - Gemini Pro Vision
147 | - Claude 3 Vision
148 | - 其他兼容 OpenAI API 的模型
149 | 
150 | 


--------------------------------------------------------------------------------
/prompts/__init__.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Video Model Prompts 模块。
  4 | 
  5 | 提供基于皮肤配置的动态 prompt 生成功能。
  6 | """
  7 | import json
  8 | from pathlib import Path
  9 | from typing import Optional
 10 | 
 11 | from .videomodel_maze_prompt import get_maze_prompt
 12 | from .videomodel_maze3d_prompt import get_maze3d_prompt
 13 | from .videomodel_sokoban_prompt import get_sokoban_prompt
 14 | from .videomodel_trapfield_prompt import get_trapfield_prompt
 15 | from .videomodel_pathfinder_prompt import get_pathfinder_prompt
 16 | 
 17 | # 游戏类型别名映射
 18 | GAME_ALIASES = {
 19 |     "irregular_maze": "pathfinder",
 20 |     "regular_maze": "maze",
 21 |     "3d_maze": "maze3d",
 22 | }
 23 | 
 24 | # 游戏类型到 prompt 生成函数的映射
 25 | PROMPT_GENERATORS = {
 26 |     "maze": get_maze_prompt,
 27 |     "maze3d": get_maze3d_prompt,
 28 |     "sokoban": get_sokoban_prompt,
 29 |     "trapfield": get_trapfield_prompt,
 30 |     "pathfinder": get_pathfinder_prompt,
 31 | }
 32 | 
 33 | 
 34 | def load_skin_description(skins_root: Path, game_type: str, skin_id: str) -> Optional[dict]:
 35 |     """
 36 |     加载皮肤的 description.json 文件。
 37 |     
 38 |     Args:
 39 |         skins_root: skins 目录的根路径
 40 |         game_type: 游戏类型 (maze, maze3d, sokoban, trapfield, pathfinder)
 41 |         skin_id: 皮肤 ID (1, 2, 3, ...)
 42 |     
 43 |     Returns:
 44 |         description.json 的内容，或 None（如果文件不存在）
 45 |     """
 46 |     # 处理游戏类型别名
 47 |     canonical_game_type = GAME_ALIASES.get(game_type, game_type)
 48 |     
 49 |     desc_path = skins_root / canonical_game_type / skin_id / "description.json"
 50 |     
 51 |     if not desc_path.exists():
 52 |         return None
 53 |     
 54 |     with open(desc_path, 'r', encoding='utf-8') as f:
 55 |         return json.load(f)
 56 | 
 57 | 
 58 | def get_dynamic_prompt(
 59 |     game_type: str,
 60 |     skin_id: str,
 61 |     skins_root: Optional[Path] = None,
 62 | ) -> str:
 63 |     """
 64 |     根据游戏类型和皮肤 ID 生成动态 prompt。
 65 |     
 66 |     Args:
 67 |         game_type: 游戏类型 (maze, maze3d, sokoban, trapfield, pathfinder, irregular_maze, regular_maze)
 68 |         skin_id: 皮肤 ID
 69 |         skins_root: skins 目录的根路径，默认为 VR-Bench/skins
 70 |     
 71 |     Returns:
 72 |         生成的 prompt 字符串
 73 |     
 74 |     Raises:
 75 |         ValueError: 如果游戏类型不支持或找不到皮肤描述文件
 76 |     """
 77 |     # 处理游戏类型别名
 78 |     canonical_game_type = GAME_ALIASES.get(game_type, game_type)
 79 |     
 80 |     # 检查游戏类型是否支持
 81 |     if canonical_game_type not in PROMPT_GENERATORS:
 82 |         raise ValueError(f"Unsupported game type: {game_type}")
 83 |     
 84 |     # 确定 skins 目录路径
 85 |     if skins_root is None:
 86 |         # 默认路径: VR-Bench/skins (相对于此文件)
 87 |         skins_root = Path(__file__).parent.parent / "skins"
 88 |     
 89 |     # 加载皮肤描述
 90 |     description = load_skin_description(skins_root, canonical_game_type, skin_id)
 91 |     
 92 |     if description is None:
 93 |         raise ValueError(
 94 |             f"Skin description not found: skins/{canonical_game_type}/{skin_id}/description.json"
 95 |         )
 96 |     
 97 |     visual_description = description.get("visual_description", {})
 98 |     
 99 |     if not visual_description:
100 |         raise ValueError(
101 |             f"visual_description is empty in skins/{canonical_game_type}/{skin_id}/description.json"
102 |         )
103 |     
104 |     # 生成 prompt
105 |     generator = PROMPT_GENERATORS[canonical_game_type]
106 |     return generator(visual_description)
107 | 
108 | 
109 | __all__ = [
110 |     "get_dynamic_prompt",
111 |     "load_skin_description",
112 |     "GAME_ALIASES",
113 |     "PROMPT_GENERATORS",
114 | ]
115 | 
116 | 


--------------------------------------------------------------------------------
/core/game_adapter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 游戏适配器基类
  3 | 定义统一的接口，让不同游戏都能接入并发生成系统
  4 | """
  5 | 
  6 | from abc import ABC, abstractmethod
  7 | from pathlib import Path
  8 | from typing import Any, Dict, Optional, Tuple
  9 | 
 10 | 
 11 | class GameAdapter(ABC):
 12 |     """游戏适配器基类"""
 13 |     
 14 |     @abstractmethod
 15 |     def get_game_name(self) -> str:
 16 |         """返回游戏名称"""
 17 |         pass
 18 |     
 19 |     @abstractmethod
 20 |     def generate_level(
 21 |         self,
 22 |         difficulty_config: Dict[str, Any],
 23 |         assets_folder: str,
 24 |         **kwargs
 25 |     ) -> Optional[Any]:
 26 |         """
 27 |         生成一个关卡
 28 |         
 29 |         Args:
 30 |             difficulty_config: 难度配置字典
 31 |             assets_folder: 素材文件夹路径
 32 |             **kwargs: 其他参数
 33 |             
 34 |         Returns:
 35 |             生成的关卡对象，失败返回 None
 36 |         """
 37 |         pass
 38 |     
 39 |     @abstractmethod
 40 |     def save_level(
 41 |         self,
 42 |         level: Any,
 43 |         output_dir: Path,
 44 |         level_id: int,
 45 |         difficulty_name: str,
 46 |         **kwargs
 47 |     ) -> Dict[str, Optional[str]]:
 48 |         """
 49 |         保存关卡（包括视频、图片等）
 50 |         
 51 |         Args:
 52 |             level: 关卡对象
 53 |             output_dir: 输出目录
 54 |             level_id: 关卡ID
 55 |             difficulty_name: 难度名称
 56 |             **kwargs: 其他参数（如 fps）
 57 |             
 58 |         Returns:
 59 |             包含文件信息的字典，例如:
 60 |             {
 61 |                 'video': 'video_0001.mp4',
 62 |                 'image': 'image_0001.png',
 63 |                 'state': 'state_0001.json'
 64 |             }
 65 |             如果某个文件生成失败，对应值为 None
 66 |         """
 67 |         pass
 68 |     
 69 |     @abstractmethod
 70 |     def get_level_hash(self, level: Any) -> str:
 71 |         """
 72 |         获取关卡的哈希值（用于去重）
 73 |         
 74 |         Args:
 75 |             level: 关卡对象
 76 |             
 77 |         Returns:
 78 |             关卡的哈希字符串
 79 |         """
 80 |         pass
 81 |     
 82 |     @abstractmethod
 83 |     def is_duplicate(self, level: Any, existing_hashes: set) -> bool:
 84 |         """
 85 |         检查关卡是否重复
 86 |         
 87 |         Args:
 88 |             level: 关卡对象
 89 |             existing_hashes: 已存在的哈希集合
 90 |             
 91 |         Returns:
 92 |             True 如果重复，False 如果不重复
 93 |         """
 94 |         pass
 95 |     
 96 |     def validate_difficulty_config(self, difficulty_config: Dict[str, Any]) -> bool:
 97 |         """
 98 |         验证难度配置是否有效
 99 |         
100 |         Args:
101 |             difficulty_config: 难度配置字典
102 |             
103 |         Returns:
104 |             True 如果配置有效，False 如果无效
105 |         """
106 |         # 默认实现：检查是否有 count 字段
107 |         return 'count' in difficulty_config
108 |     
109 |     def get_required_texture_files(self) -> list:
110 |         """
111 |         返回游戏需要的纹理文件列表
112 |         
113 |         Returns:
114 |             纹理文件名列表（不含扩展名）
115 |         """
116 |         return []
117 |     
118 |     def cleanup(self):
119 |         """清理资源（可选）"""
120 |         pass
121 | 
122 | 
123 | class LevelDeduplicator:
124 |     """关卡去重器（通用版本）"""
125 |     
126 |     def __init__(self):
127 |         self.hashes = set()
128 |     
129 |     def add_hash(self, hash_value: str):
130 |         """添加哈希值"""
131 |         self.hashes.add(hash_value)
132 |     
133 |     def is_duplicate(self, hash_value: str) -> bool:
134 |         """检查是否重复"""
135 |         return hash_value in self.hashes
136 |     
137 |     def get_count(self) -> int:
138 |         """获取已存储的哈希数量"""
139 |         return len(self.hashes)
140 | 
141 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/prompts/__init__.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pathlib import Path
  3 | from typing import Dict
  4 | 
  5 | from .maze_prompt import MAZE_SYSTEM_PROMPT_TEMPLATE, MAZE_USER_PROMPT_TEMPLATE
  6 | from .sokoban_prompt import SOKOBAN_SYSTEM_PROMPT_TEMPLATE, SOKOBAN_USER_PROMPT_TEMPLATE
  7 | from .trapfield_prompt import TRAPFIELD_SYSTEM_PROMPT_TEMPLATE, TRAPFIELD_USER_PROMPT_TEMPLATE
  8 | from .pathfinder_prompt import PATHFINDER_SYSTEM_PROMPT_TEMPLATE, PATHFINDER_USER_PROMPT_TEMPLATE
  9 | from .maze3d_prompt import MAZE3D_SYSTEM_PROMPT_TEMPLATE, MAZE3D_USER_PROMPT_TEMPLATE
 10 | 
 11 | # Prompt 模板映射
 12 | PROMPT_TEMPLATES = {
 13 |     'maze': {
 14 |         'system': MAZE_SYSTEM_PROMPT_TEMPLATE,
 15 |         'user': MAZE_USER_PROMPT_TEMPLATE,
 16 |     },
 17 |     'sokoban': {
 18 |         'system': SOKOBAN_SYSTEM_PROMPT_TEMPLATE,
 19 |         'user': SOKOBAN_USER_PROMPT_TEMPLATE,
 20 |     },
 21 |     'trapfield': {
 22 |         'system': TRAPFIELD_SYSTEM_PROMPT_TEMPLATE,
 23 |         'user': TRAPFIELD_USER_PROMPT_TEMPLATE,
 24 |     },
 25 |     'pathfinder': {
 26 |         'system': PATHFINDER_SYSTEM_PROMPT_TEMPLATE,
 27 |         'user': PATHFINDER_USER_PROMPT_TEMPLATE,
 28 |     },
 29 |     'maze3d': {
 30 |         'system': MAZE3D_SYSTEM_PROMPT_TEMPLATE,
 31 |         'user': MAZE3D_USER_PROMPT_TEMPLATE,
 32 |     },
 33 | }
 34 | 
 35 | # 游戏名称别名
 36 | GAME_ALIASES = {'3dmaze': 'maze3d'}
 37 | 
 38 | 
 39 | def load_skin_description(assets_folder: str) -> Dict[str, str]:
 40 |     """
 41 |     从 assets_folder 加载 description.json 并返回 visual_description 字典。
 42 | 
 43 |     Args:
 44 |         assets_folder: 皮肤资源文件夹路径
 45 | 
 46 |     Returns:
 47 |         visual_description 字典
 48 | 
 49 |     Raises:
 50 |         FileNotFoundError: description.json 不存在
 51 |         ValueError: JSON 解析失败或缺少 visual_description
 52 |     """
 53 |     description_path = Path(assets_folder) / "description.json"
 54 | 
 55 |     if not description_path.exists():
 56 |         raise FileNotFoundError(f"description.json not found in {assets_folder}")
 57 | 
 58 |     try:
 59 |         with open(description_path, 'r', encoding='utf-8') as f:
 60 |             data = json.load(f)
 61 |     except json.JSONDecodeError as e:
 62 |         raise ValueError(f"Failed to parse description.json in {assets_folder}: {e}")
 63 | 
 64 |     visual_desc = data.get("visual_description")
 65 |     if not visual_desc:
 66 |         raise ValueError(f"visual_description not found in {description_path}")
 67 | 
 68 |     return visual_desc
 69 | 
 70 | 
 71 | def get_dynamic_prompt(game_name: str, prompt_type: str, assets_folder: str) -> str:
 72 |     """
 73 |     获取动态替换后的 prompt。
 74 | 
 75 |     Args:
 76 |         game_name: 游戏类型 (maze, sokoban, trapfield, pathfinder, maze3d)
 77 |         prompt_type: prompt 类型 ('system' 或 'user')
 78 |         assets_folder: 皮肤资源文件夹路径
 79 | 
 80 |     Returns:
 81 |         格式化后的 prompt 字符串
 82 | 
 83 |     Raises:
 84 |         ValueError: 游戏类型/prompt类型未知，或皮肤描述缺少必需键
 85 |         FileNotFoundError: description.json 不存在
 86 |     """
 87 |     # 解析别名
 88 |     game_name = GAME_ALIASES.get(game_name, game_name)
 89 | 
 90 |     if game_name not in PROMPT_TEMPLATES:
 91 |         raise ValueError(f"Unknown game: {game_name}")
 92 |     if prompt_type not in PROMPT_TEMPLATES[game_name]:
 93 |         raise ValueError(f"Unknown prompt type: {prompt_type}")
 94 | 
 95 |     template = PROMPT_TEMPLATES[game_name][prompt_type]
 96 |     visual_desc = load_skin_description(assets_folder)
 97 | 
 98 |     try:
 99 |         return template.format(**visual_desc)
100 |     except KeyError as e:
101 |         raise ValueError(f"Missing key in visual_description: {e}")
102 | 
103 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/action_metrics.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Any, Union
  2 | 
  3 | 
  4 | def calculate_sr(is_win: bool) -> float:
  5 |     return 1.0 if is_win else 0.0
  6 | 
  7 | 
  8 | def calculate_pr(
  9 |     pred_actions: List[Dict[str, Any]],
 10 |     opt_actions: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]
 11 | ) -> float:
 12 |     if isinstance(opt_actions, list) and opt_actions and isinstance(opt_actions[0], list):
 13 |         return max(_calculate_pr_single(pred_actions, opt) for opt in opt_actions)
 14 |     return _calculate_pr_single(pred_actions, opt_actions)
 15 | 
 16 | 
 17 | def _calculate_pr_single(
 18 |     pred_actions: List[Dict[str, Any]],
 19 |     opt_actions: List[Dict[str, Any]]
 20 | ) -> float:
 21 |     if not opt_actions:
 22 |         return 1.0 if not pred_actions else 0.0
 23 | 
 24 |     # 检查是否是 PathFinder 游戏（通过检查 action 类型）
 25 |     if pred_actions and pred_actions[0].get('action') == 'path':
 26 |         # PathFinder: 计算字母数组的连续匹配数
 27 |         pred_path = pred_actions[0].get('path', [])
 28 |         opt_path = opt_actions[0].get('path', [])
 29 | 
 30 |         if not opt_path:
 31 |             return 1.0 if not pred_path else 0.0
 32 | 
 33 |         # 计算从头开始连续匹配的节点数
 34 |         matched = 0
 35 |         for p_node, o_node in zip(pred_path, opt_path):
 36 |             if p_node == o_node:
 37 |                 matched += 1
 38 |             else:
 39 |                 break
 40 | 
 41 |         return matched / len(opt_path)
 42 |     else:
 43 |         # 其他游戏: 计算动作序列的连续匹配数
 44 |         matched = 0
 45 |         for p, o in zip(pred_actions, opt_actions):
 46 |             if p == o:
 47 |                 matched += 1
 48 |             else:
 49 |                 break
 50 | 
 51 |         return matched / len(opt_actions)
 52 | 
 53 | 
 54 | def calculate_mr(
 55 |     pred_actions: List[Dict[str, Any]],
 56 |     opt_actions: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]
 57 | ) -> float:
 58 |     if isinstance(opt_actions, list) and opt_actions and isinstance(opt_actions[0], list):
 59 |         return 1.0 if any(pred_actions == opt for opt in opt_actions) else 0.0
 60 |     return 1.0 if pred_actions == opt_actions else 0.0
 61 | 
 62 | 
 63 | def calculate_step(
 64 |     pred_actions: List[Dict[str, Any]],
 65 |     opt_actions: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
 66 |     is_win: bool = False
 67 | ) -> float:
 68 |     """Calculate step metric: (predicted_length / optimal_length) - 1
 69 | 
 70 |     Only calculates for successful cases (is_win=True).
 71 |     Returns None for failed cases.
 72 | 
 73 |     For PathFinder game, calculates based on sequence length instead of action count.
 74 |     """
 75 |     if not is_win:
 76 |         return None
 77 | 
 78 |     # 检查是否是 PathFinder 游戏（通过检查 action 类型）
 79 |     if pred_actions and pred_actions[0].get('action') == 'path':
 80 |         # PathFinder: 使用字母数组的长度
 81 |         pred_length = len(pred_actions[0].get('path', []))
 82 | 
 83 |         if isinstance(opt_actions, list) and opt_actions and isinstance(opt_actions[0], list):
 84 |             opt_length = min(len(opt[0].get('path', [])) for opt in opt_actions)
 85 |         else:
 86 |             opt_length = len(opt_actions[0].get('path', []))
 87 | 
 88 |         if opt_length == 0:
 89 |             return 0.0
 90 | 
 91 |         return pred_length / opt_length - 1.0
 92 |     else:
 93 |         # 其他游戏: 使用动作数量
 94 |         if isinstance(opt_actions, list) and opt_actions and isinstance(opt_actions[0], list):
 95 |             opt_length = min(len(opt) for opt in opt_actions)
 96 |         else:
 97 |             opt_length = len(opt_actions)
 98 | 
 99 |         if opt_length == 0:
100 |             return 0.0
101 | 
102 |         return len(pred_actions) / opt_length - 1.0
103 | 


--------------------------------------------------------------------------------
/dataset_init.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | 从 Hugging Face 下载并解压 VR-Bench 数据集
  4 | """
  5 | 
  6 | import argparse
  7 | import logging
  8 | from pathlib import Path
  9 | from huggingface_hub import hf_hub_download
 10 | from dotenv import load_dotenv
 11 | import os
 12 | import tarfile
 13 | 
 14 | logging.basicConfig(
 15 |     level=logging.INFO,
 16 |     format='%(asctime)s - %(levelname)s - %(message)s'
 17 | )
 18 | 
 19 | load_dotenv()
 20 | 
 21 | 
 22 | def download_and_extract(
 23 |     repo_id: str = "amagipeng/VR-Bench",
 24 |     output_dir: str = "./dataset_VR",
 25 |     token: str = None
 26 | ):
 27 |     """
 28 |     下载并解压数据集
 29 |     
 30 |     Args:
 31 |         repo_id: Hugging Face 仓库 ID
 32 |         output_dir: 输出目录
 33 |         token: Hugging Face token (可选)
 34 |     """
 35 |     output_path = Path(output_dir)
 36 |     
 37 |     if output_path.exists() and any(output_path.iterdir()):
 38 |         logging.warning(f"目录 {output_dir} 已存在且非空")
 39 |         response = input("是否继续并覆盖? (y/n): ")
 40 |         if response.lower() != 'y':
 41 |             logging.info("取消下载")
 42 |             return
 43 |     
 44 |     output_path.mkdir(parents=True, exist_ok=True)
 45 |     
 46 |     if token is None:
 47 |         token = os.getenv("HF_TOKEN")
 48 |     
 49 |     logging.info(f"开始下载数据集: {repo_id}")
 50 |     
 51 |     # 下载 train.tar.gz
 52 |     logging.info("\n下载 train.tar.gz...")
 53 |     train_file = hf_hub_download(
 54 |         repo_id=repo_id,
 55 |         repo_type="dataset",
 56 |         filename="train.tar.gz",
 57 |         token=token
 58 |     )
 59 |     
 60 |     logging.info("解压 train.tar.gz...")
 61 |     with tarfile.open(train_file, 'r:gz') as tar:
 62 |         tar.extractall(output_path)
 63 |     logging.info("✓ train 解压完成")
 64 |     
 65 |     # 下载 eval.tar.gz
 66 |     logging.info("\n下载 eval.tar.gz...")
 67 |     eval_file = hf_hub_download(
 68 |         repo_id=repo_id,
 69 |         repo_type="dataset",
 70 |         filename="eval.tar.gz",
 71 |         token=token
 72 |     )
 73 |     
 74 |     logging.info("解压 eval.tar.gz...")
 75 |     with tarfile.open(eval_file, 'r:gz') as tar:
 76 |         tar.extractall(output_path)
 77 |     logging.info("✓ eval 解压完成")
 78 |     
 79 |     # 下载 README
 80 |     try:
 81 |         logging.info("\n下载 README.md...")
 82 |         readme_file = hf_hub_download(
 83 |             repo_id=repo_id,
 84 |             repo_type="dataset",
 85 |             filename="README.md",
 86 |             token=token
 87 |         )
 88 |         import shutil
 89 |         shutil.copy(readme_file, output_path / "README.md")
 90 |         logging.info("✓ README.md 下载完成")
 91 |     except Exception as e:
 92 |         logging.warning(f"README.md 下载失败: {e}")
 93 |     
 94 |     logging.info(f"\n✓ 数据集下载并解压完成!")
 95 |     logging.info(f"数据集位置: {output_path.absolute()}")
 96 |     
 97 |     # 显示数据集结构
 98 |     logging.info("\n数据集结构:")
 99 |     for split in ['train', 'eval']:
100 |         split_dir = output_path / split
101 |         if split_dir.exists():
102 |             games = [d.name for d in split_dir.iterdir() if d.is_dir()]
103 |             logging.info(f"  {split}/: {', '.join(games)}")
104 | 
105 | 
106 | def main():
107 |     parser = argparse.ArgumentParser(
108 |         description='从 Hugging Face 下载并解压 VR-Bench 数据集'
109 |     )
110 |     parser.add_argument(
111 |         '--repo-id',
112 |         type=str,
113 |         default='amagipeng/VR-Bench',
114 |         help='Hugging Face 仓库 ID (默认: amagipeng/VR-Bench)'
115 |     )
116 |     parser.add_argument(
117 |         '--output-dir',
118 |         type=str,
119 |         default='./dataset_VR',
120 |         help='输出目录 (默认: ./dataset_VR)'
121 |     )
122 |     parser.add_argument(
123 |         '--token',
124 |         type=str,
125 |         default=None,
126 |         help='Hugging Face token (默认: 从 .env 文件读取 HF_TOKEN)'
127 |     )
128 |     
129 |     args = parser.parse_args()
130 |     
131 |     download_and_extract(
132 |         repo_id=args.repo_id,
133 |         output_dir=args.output_dir,
134 |         token=args.token
135 |     )
136 | 
137 | 
138 | if __name__ == '__main__':
139 |     main()
140 | 
141 | 


--------------------------------------------------------------------------------
/AutoEnv/base/engine/cost_monitor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Global cost monitoring for LLM calls using contextvars.
  3 | 
  4 | Usage:
  5 |     with CostMonitor() as monitor:
  6 |         await pipeline.run(...)
  7 |         print(f"Total cost: ${monitor.total_cost:.4f}")
  8 |         monitor.save()  # saves to workspace/costs/<timestamp>.json
  9 | """
 10 | 
 11 | import contextvars
 12 | import json
 13 | from dataclasses import dataclass, field
 14 | from datetime import datetime
 15 | from pathlib import Path
 16 | from typing import TYPE_CHECKING
 17 | 
 18 | if TYPE_CHECKING:
 19 |     from base.engine.cost_monitor import CostMonitor
 20 | 
 21 | _current_monitor: contextvars.ContextVar["CostMonitor | None"] = contextvars.ContextVar(
 22 |     "cost_monitor", default=None
 23 | )
 24 | 
 25 | 
 26 | @dataclass
 27 | class CostRecord:
 28 |     """Single LLM call cost record."""
 29 | 
 30 |     model: str
 31 |     input_tokens: int
 32 |     output_tokens: int
 33 |     cost: float
 34 | 
 35 | 
 36 | @dataclass
 37 | class CostMonitor:
 38 |     """Aggregates LLM costs during a context scope."""
 39 | 
 40 |     records: list[CostRecord] = field(default_factory=list)
 41 |     _token: contextvars.Token | None = field(default=None, repr=False)
 42 | 
 43 |     @property
 44 |     def total_cost(self) -> float:
 45 |         return sum(r.cost for r in self.records)
 46 | 
 47 |     @property
 48 |     def total_input_tokens(self) -> int:
 49 |         return sum(r.input_tokens for r in self.records)
 50 | 
 51 |     @property
 52 |     def total_output_tokens(self) -> int:
 53 |         return sum(r.output_tokens for r in self.records)
 54 | 
 55 |     @property
 56 |     def call_count(self) -> int:
 57 |         return len(self.records)
 58 | 
 59 |     def record(self, model: str, input_tokens: int, output_tokens: int, cost: float) -> None:
 60 |         """Record a single LLM call's cost."""
 61 |         self.records.append(CostRecord(model, input_tokens, output_tokens, cost))
 62 | 
 63 |     def summary(self) -> dict:
 64 |         """Get aggregated summary."""
 65 |         return {
 66 |             "total_cost": self.total_cost,
 67 |             "total_input_tokens": self.total_input_tokens,
 68 |             "total_output_tokens": self.total_output_tokens,
 69 |             "call_count": self.call_count,
 70 |             "by_model": self._group_by_model(),
 71 |         }
 72 | 
 73 |     def _group_by_model(self) -> dict[str, dict]:
 74 |         result: dict[str, dict] = {}
 75 |         for r in self.records:
 76 |             if r.model not in result:
 77 |                 result[r.model] = {"cost": 0.0, "input_tokens": 0, "output_tokens": 0, "calls": 0}
 78 |             result[r.model]["cost"] += r.cost
 79 |             result[r.model]["input_tokens"] += r.input_tokens
 80 |             result[r.model]["output_tokens"] += r.output_tokens
 81 |             result[r.model]["calls"] += 1
 82 |         return result
 83 | 
 84 |     def save(self, save_dir: str = "workspace/costs") -> Path:
 85 |         """Save cost summary to JSON file."""
 86 |         cost_dir = Path(save_dir)
 87 |         cost_dir.mkdir(parents=True, exist_ok=True)
 88 |         ts = datetime.now().strftime("%Y%m%d_%H%M%S")
 89 |         path = cost_dir / f"{ts}.json"
 90 |         path.write_text(
 91 |             json.dumps(self.summary(), indent=2, ensure_ascii=False),
 92 |             encoding="utf-8",
 93 |         )
 94 |         return path
 95 | 
 96 |     def __enter__(self) -> "CostMonitor":
 97 |         self._token = _current_monitor.set(self)
 98 |         return self
 99 | 
100 |     def __exit__(self, *args) -> None:
101 |         if self._token is not None:
102 |             _current_monitor.reset(self._token)
103 | 
104 | 
105 | def get_current_monitor() -> "CostMonitor | None":
106 |     """Get the current cost monitor from context, if any."""
107 |     return _current_monitor.get()
108 | 
109 | 
110 | def record_cost(model: str, input_tokens: int, output_tokens: int, cost: float) -> None:
111 |     """Record cost to the current monitor if one is active."""
112 |     monitor = get_current_monitor()
113 |     if monitor is not None:
114 |         monitor.record(model, input_tokens, output_tokens, cost)
115 | 
116 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/executors/maze_executor.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from typing import List, Dict, Any, Tuple
  3 | from pathlib import Path
  4 | 
  5 | from core.schema import UnifiedState
  6 | from evaluation.vlm_eval.game_executor import GameExecutor
  7 | from evaluation.vlm_eval.prompts import get_dynamic_prompt
  8 | from games.maze import constants
  9 | from games.maze.generators.image_gen import draw_maze
 10 | from generation.path_finder import find_maze_paths
 11 | 
 12 | 
 13 | class MazeExecutor(GameExecutor):
 14 |     def __init__(self, assets_folder: str = None):
 15 |         self.assets_folder = assets_folder
 16 |     
 17 |     def load_state(self, state_path: str) -> UnifiedState:
 18 |         return UnifiedState.load(state_path)
 19 |     
 20 |     def get_optimal_solution(self, state: UnifiedState) -> List[List[Dict[str, Any]]]:
 21 |         all_paths = find_maze_paths(state)
 22 |         return [self._path_to_actions(path) for path in all_paths]
 23 |     
 24 |     def execute_action(self, state: UnifiedState, action: Dict[str, Any]) -> Tuple[UnifiedState, bool, str]:
 25 |         if action.get('action') != 'move':
 26 |             return state, False, f"Invalid action type: {action.get('action')}"
 27 | 
 28 |         direction = action.get('direction')
 29 |         if direction not in ['up', 'down', 'left', 'right']:
 30 |             return state, False, f"Invalid direction: {direction}"
 31 | 
 32 |         current_pos = state.player.grid_pos
 33 |         new_pos = self._calculate_new_position(current_pos, direction)
 34 | 
 35 |         maze = state.grid.data
 36 |         rows = len(maze)
 37 |         cols = len(maze[0]) if maze else 0
 38 | 
 39 |         if not (0 <= new_pos.row < rows and 0 <= new_pos.col < cols):
 40 |             return state, False, "Out of bounds"
 41 | 
 42 |         cell_value = maze[new_pos.row][new_pos.col]
 43 |         if cell_value == constants.WALL_CELL:
 44 |             return state, False, "Hit wall"
 45 | 
 46 |         new_state = copy.deepcopy(state)
 47 | 
 48 |         from core.schema.entity import Entity
 49 | 
 50 |         cell_size = state.render.cell_size
 51 |         new_state.player = Entity.from_grid_pos(new_pos.row, new_pos.col, cell_size)
 52 | 
 53 |         new_maze = [list(row) for row in maze]
 54 |         new_maze[current_pos.row][current_pos.col] = constants.EMPTY_CELL
 55 |         if new_maze[new_pos.row][new_pos.col] != constants.GOAL_CELL:
 56 |             new_maze[new_pos.row][new_pos.col] = constants.PLAYER_CELL
 57 |         else:
 58 |             new_maze[new_pos.row][new_pos.col] = constants.PLAYER_CELL
 59 | 
 60 |         new_state.grid.data = new_maze
 61 | 
 62 |         return new_state, True, "OK"
 63 |     
 64 |     def check_win(self, state: UnifiedState) -> bool:
 65 |         return (state.player.grid_pos.row == state.goal.grid_pos.row and 
 66 |                 state.player.grid_pos.col == state.goal.grid_pos.col)
 67 |     
 68 |     def render_state(self, state: UnifiedState, output_path: str) -> None:
 69 |         Path(output_path).parent.mkdir(parents=True, exist_ok=True)
 70 |         draw_maze(state.grid.data, state.render.cell_size, output_path, assets_folder=self.assets_folder)
 71 |     
 72 |     def get_system_prompt(self) -> str:
 73 |         return get_dynamic_prompt('maze', 'system', self.assets_folder)
 74 | 
 75 |     def get_user_prompt(self) -> str:
 76 |         return get_dynamic_prompt('maze', 'user', self.assets_folder)
 77 | 
 78 |     def get_game_type(self) -> str:
 79 |         return 'maze'
 80 |     
 81 |     def _path_to_actions(self, path: List[Tuple[int, int]]) -> List[Dict[str, Any]]:
 82 |         actions = []
 83 |         for i in range(len(path) - 1):
 84 |             curr_row, curr_col = path[i]
 85 |             next_row, next_col = path[i + 1]
 86 |             
 87 |             if next_row < curr_row:
 88 |                 direction = 'up'
 89 |             elif next_row > curr_row:
 90 |                 direction = 'down'
 91 |             elif next_col < curr_col:
 92 |                 direction = 'left'
 93 |             else:
 94 |                 direction = 'right'
 95 |             
 96 |             actions.append({'action': 'move', 'direction': direction})
 97 |         
 98 |         return actions
 99 |     
100 |     def _calculate_new_position(self, pos, direction: str):
101 |         from core.schema.position import Position
102 |         
103 |         if direction == 'up':
104 |             return Position(row=pos.row - 1, col=pos.col)
105 |         elif direction == 'down':
106 |             return Position(row=pos.row + 1, col=pos.col)
107 |         elif direction == 'left':
108 |             return Position(row=pos.row, col=pos.col - 1)
109 |         else:
110 |             return Position(row=pos.row, col=pos.col + 1)
111 | 
112 | 


--------------------------------------------------------------------------------
/utils/video_metadata.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 视频元数据提取工具
  3 | 支持 ffprobe 和 OpenCV 两种方式
  4 | """
  5 | 
  6 | from __future__ import annotations
  7 | 
  8 | import json
  9 | import math
 10 | import shutil
 11 | import subprocess
 12 | from fractions import Fraction
 13 | from pathlib import Path
 14 | from typing import Tuple
 15 | 
 16 | 
 17 | class VideoMetadataError(RuntimeError):
 18 |     """视频元数据提取失败"""
 19 |     pass
 20 | 
 21 | 
 22 | def get_video_metadata(path: Path) -> Tuple[float, float, float]:
 23 |     """
 24 |     获取视频元数据
 25 |     
 26 |     Args:
 27 |         path: 视频文件路径
 28 |         
 29 |     Returns:
 30 |         (duration_s, frame_count, fps)
 31 |         
 32 |     Raises:
 33 |         VideoMetadataError: 无法提取元数据
 34 |     """
 35 |     try:
 36 |         return _metadata_with_ffprobe(path)
 37 |     except FileNotFoundError:
 38 |         pass
 39 |     except VideoMetadataError:
 40 |         raise
 41 |     except Exception as exc:
 42 |         raise VideoMetadataError(f"ffprobe error for {path}: {exc}") from exc
 43 | 
 44 |     try:
 45 |         return _metadata_with_cv2(path)
 46 |     except ImportError:
 47 |         raise VideoMetadataError(
 48 |             "Neither ffprobe (from ffmpeg) nor OpenCV (cv2) is available"
 49 |         )
 50 |     except Exception as exc:
 51 |         raise VideoMetadataError(f"OpenCV error for {path}: {exc}") from exc
 52 | 
 53 | 
 54 | def _metadata_with_ffprobe(path: Path) -> Tuple[float, float, float]:
 55 |     """使用 ffprobe 提取元数据"""
 56 |     if not shutil.which("ffprobe"):
 57 |         raise FileNotFoundError("ffprobe not found")
 58 | 
 59 |     cmd = [
 60 |         "ffprobe",
 61 |         "-v", "error",
 62 |         "-select_streams", "v:0",
 63 |         "-show_entries", "format=duration",
 64 |         "-show_entries", "stream=nb_frames,avg_frame_rate",
 65 |         "-of", "json",
 66 |         str(path),
 67 |     ]
 68 |     result = subprocess.run(cmd, capture_output=True, text=True, check=False)
 69 |     if result.returncode != 0:
 70 |         raise VideoMetadataError(result.stderr.strip() or "ffprobe failed")
 71 | 
 72 |     data = json.loads(result.stdout)
 73 |     try:
 74 |         duration_s = float(data["format"]["duration"])
 75 |     except (KeyError, ValueError) as exc:
 76 |         raise VideoMetadataError("Duration unavailable") from exc
 77 | 
 78 |     stream = data.get("streams", [{}])[0]
 79 |     raw_frames = stream.get("nb_frames")
 80 |     avg_frame_rate = stream.get("avg_frame_rate")
 81 |     fps = _fps_from_rate(avg_frame_rate)
 82 | 
 83 |     if raw_frames in (None, "N/A"):
 84 |         frames = duration_s * fps
 85 |     else:
 86 |         try:
 87 |             frames = float(raw_frames)
 88 |         except ValueError as exc:
 89 |             raise VideoMetadataError("Invalid frame count") from exc
 90 | 
 91 |     return duration_s, frames, fps
 92 | 
 93 | 
 94 | def _metadata_with_cv2(path: Path) -> Tuple[float, float, float]:
 95 |     """使用 OpenCV 提取元数据"""
 96 |     import cv2
 97 | 
 98 |     capture = cv2.VideoCapture(str(path))
 99 |     if not capture.isOpened():
100 |         raise VideoMetadataError("Unable to open video with OpenCV")
101 | 
102 |     fps = float(capture.get(cv2.CAP_PROP_FPS))
103 |     frame_count = float(capture.get(cv2.CAP_PROP_FRAME_COUNT))
104 | 
105 |     if not math.isfinite(fps) or fps <= 0:
106 |         capture.release()
107 |         raise VideoMetadataError("Frame rate unavailable via OpenCV")
108 | 
109 |     if not math.isfinite(frame_count) or frame_count <= 0:
110 |         frame_count = float(_count_frames_with_cv2(capture))
111 | 
112 |     frames = frame_count
113 |     capture.release()
114 | 
115 |     duration_s = frames / fps
116 |     return duration_s, frames, fps
117 | 
118 | 
119 | def _count_frames_with_cv2(capture) -> int:
120 |     """手动计数帧数"""
121 |     frames = 0
122 |     while True:
123 |         ok, _ = capture.read()
124 |         if not ok:
125 |             break
126 |         frames += 1
127 |     return frames
128 | 
129 | 
130 | def _fps_from_rate(avg_frame_rate: str | None) -> float:
131 |     """从帧率字符串解析 FPS"""
132 |     if not avg_frame_rate or avg_frame_rate in ("0/0", "0"):
133 |         raise VideoMetadataError("Frame rate unavailable")
134 | 
135 |     try:
136 |         rate = Fraction(avg_frame_rate)
137 |     except (ZeroDivisionError, ValueError) as exc:
138 |         raise VideoMetadataError("Invalid frame rate") from exc
139 | 
140 |     return float(rate)
141 | 
142 | 
143 | def has_audio_stream(path: Path) -> bool:
144 |     """检查视频是否有音频流"""
145 |     cmd = [
146 |         "ffprobe",
147 |         "-v", "error",
148 |         "-select_streams", "a",
149 |         "-show_entries", "stream=index",
150 |         "-of", "csv=p=0",
151 |         str(path),
152 |     ]
153 |     result = subprocess.run(cmd, capture_output=True, text=True, check=False)
154 |     return bool(result.stdout.strip())
155 | 
156 | 


--------------------------------------------------------------------------------
/games/maze/default_textures.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Maze 游戏默认纹理生成器
  3 | 生成简单的纯色方块作为默认纹理
  4 | """
  5 | 
  6 | from PIL import Image, ImageDraw
  7 | from pathlib import Path
  8 | import logging
  9 | 
 10 | 
 11 | # 默认颜色配置（参考图片）
 12 | DEFAULT_COLORS = {
 13 |     'floor': (173, 216, 230),      # 浅蓝色 (light blue)
 14 |     'wall': (255, 255, 255),       # 白色 (white)
 15 |     'player': (0, 128, 0),         # 绿色 (green)
 16 |     'target': (255, 0, 0),         # 红色 (red)
 17 | }
 18 | 
 19 | 
 20 | def create_default_texture(color: tuple, size: int = 64, border: bool = True) -> Image.Image:
 21 |     """
 22 |     创建默认纹理
 23 |     
 24 |     Args:
 25 |         color: RGB 颜色元组
 26 |         size: 纹理大小（像素）
 27 |         border: 是否添加边框
 28 |         
 29 |     Returns:
 30 |         PIL Image 对象
 31 |     """
 32 |     img = Image.new('RGB', (size, size), color)
 33 |     
 34 |     if border:
 35 |         draw = ImageDraw.Draw(img)
 36 |         # 绘制浅灰色边框
 37 |         border_color = (200, 200, 200)
 38 |         draw.rectangle([0, 0, size-1, size-1], outline=border_color, width=1)
 39 |     
 40 |     return img
 41 | 
 42 | 
 43 | def create_player_texture(size: int = 64) -> Image.Image:
 44 |     """
 45 |     创建玩家纹理（绿色方块）
 46 |     
 47 |     Args:
 48 |         size: 纹理大小（像素）
 49 |         
 50 |     Returns:
 51 |         PIL Image 对象
 52 |     """
 53 |     # 创建透明背景
 54 |     img = Image.new('RGBA', (size, size), (0, 0, 0, 0))
 55 |     draw = ImageDraw.Draw(img)
 56 |     
 57 |     # 绘制绿色方块（居中，占 60% 大小）
 58 |     margin = int(size * 0.2)
 59 |     color = DEFAULT_COLORS['player']
 60 |     draw.rectangle(
 61 |         [margin, margin, size - margin - 1, size - margin - 1],
 62 |         fill=color,
 63 |         outline=(0, 100, 0),  # 深绿色边框
 64 |         width=2
 65 |     )
 66 |     
 67 |     return img
 68 | 
 69 | 
 70 | def create_target_texture(size: int = 64) -> Image.Image:
 71 |     """
 72 |     创建目标纹理（红色圆点）
 73 |     
 74 |     Args:
 75 |         size: 纹理大小（像素）
 76 |         
 77 |     Returns:
 78 |         PIL Image 对象
 79 |     """
 80 |     # 创建透明背景
 81 |     img = Image.new('RGBA', (size, size), (0, 0, 0, 0))
 82 |     draw = ImageDraw.Draw(img)
 83 |     
 84 |     # 绘制红色圆点（居中，占 50% 大小）
 85 |     margin = int(size * 0.25)
 86 |     color = DEFAULT_COLORS['target']
 87 |     draw.ellipse(
 88 |         [margin, margin, size - margin - 1, size - margin - 1],
 89 |         fill=color,
 90 |         outline=(200, 0, 0),  # 深红色边框
 91 |         width=2
 92 |     )
 93 |     
 94 |     return img
 95 | 
 96 | 
 97 | def generate_default_textures(output_dir: str | Path, size: int = 64):
 98 |     """
 99 |     生成所有默认纹理并保存到指定目录
100 |     
101 |     Args:
102 |         output_dir: 输出目录路径
103 |         size: 纹理大小（像素）
104 |     """
105 |     output_path = Path(output_dir)
106 |     output_path.mkdir(parents=True, exist_ok=True)
107 |     
108 |     logging.info(f"Generating default maze textures to {output_path}")
109 |     
110 |     # 生成地板纹理
111 |     floor_img = create_default_texture(DEFAULT_COLORS['floor'], size, border=True)
112 |     floor_img.save(output_path / 'floor.png')
113 |     
114 |     # 生成墙壁纹理
115 |     wall_img = create_default_texture(DEFAULT_COLORS['wall'], size, border=True)
116 |     wall_img.save(output_path / 'wall.png')
117 |     
118 |     # 生成玩家纹理
119 |     player_img = create_player_texture(size)
120 |     player_img.save(output_path / 'player.png')
121 |     
122 |     # 生成目标纹理
123 |     target_img = create_target_texture(size)
124 |     target_img.save(output_path / 'target.png')
125 |     
126 |     logging.info(f"✓ Generated 4 default textures: floor, wall, player, target")
127 | 
128 | 
129 | def ensure_default_textures(assets_folder: str | Path = None) -> Path:
130 |     """
131 |     确保默认纹理存在，如果不存在则生成
132 |     
133 |     Args:
134 |         assets_folder: 素材文件夹路径，如果为 None 则使用 assets/default_maze
135 |         
136 |     Returns:
137 |         纹理文件夹路径
138 |     """
139 |     if assets_folder is None:
140 |         assets_folder = Path(__file__).parent.parent.parent / 'assets' / 'default_maze'
141 |     else:
142 |         assets_folder = Path(assets_folder)
143 |     
144 |     # 检查是否所有纹理都存在
145 |     required_textures = ['floor', 'wall', 'player', 'target']
146 |     all_exist = all(
147 |         any((assets_folder / f"{name}{ext}").exists() for ext in ['.png', '.jpg', '.jpeg'])
148 |         for name in required_textures
149 |     )
150 |     
151 |     if not all_exist:
152 |         logging.info(f"Default textures not found, generating...")
153 |         generate_default_textures(assets_folder)
154 |     
155 |     return assets_folder
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     # 测试：生成默认纹理
160 |     logging.basicConfig(level=logging.INFO)
161 |     output_dir = Path(__file__).parent.parent.parent / 'assets' / 'default_maze'
162 |     generate_default_textures(output_dir, size=64)
163 |     print(f"Default textures generated in: {output_dir}")
164 | 
165 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/executors/trapfield_executor.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from typing import List, Dict, Any, Tuple
  3 | from pathlib import Path
  4 | 
  5 | from core.schema import UnifiedState
  6 | from evaluation.vlm_eval.game_executor import GameExecutor
  7 | from evaluation.vlm_eval.prompts import get_dynamic_prompt
  8 | from games.trapfield import constants
  9 | from games.trapfield.renderer import TrapFieldRenderer
 10 | from generation.path_finder import find_trapfield_paths
 11 | 
 12 | 
 13 | class TrapFieldExecutor(GameExecutor):
 14 |     def __init__(self, assets_folder: str = None):
 15 |         self.assets_folder = assets_folder
 16 |     
 17 |     def load_state(self, state_path: str) -> UnifiedState:
 18 |         return UnifiedState.load(state_path)
 19 |     
 20 |     def get_optimal_solution(self, state: UnifiedState) -> List[List[Dict[str, Any]]]:
 21 |         all_paths = find_trapfield_paths(state)
 22 |         return [self._path_to_actions(path) for path in all_paths]
 23 |     
 24 |     def execute_action(self, state: UnifiedState, action: Dict[str, Any]) -> Tuple[UnifiedState, bool, str]:
 25 |         if action.get('action') != 'move':
 26 |             return state, False, f"Invalid action type: {action.get('action')}"
 27 |         
 28 |         direction = action.get('direction')
 29 |         if direction not in ['up', 'down', 'left', 'right']:
 30 |             return state, False, f"Invalid direction: {direction}"
 31 |         
 32 |         current_pos = state.player.grid_pos
 33 |         new_pos = self._calculate_new_position(current_pos, direction)
 34 |         
 35 |         grid = state.grid.data
 36 |         rows = len(grid)
 37 |         cols = len(grid[0]) if grid else 0
 38 |         
 39 |         if not (0 <= new_pos.row < rows and 0 <= new_pos.col < cols):
 40 |             return state, False, "Out of bounds"
 41 |         
 42 |         cell_value = grid[new_pos.row][new_pos.col]
 43 |         
 44 |         if cell_value == constants.TRAP_CELL:
 45 |             return state, False, "Hit trap"
 46 |         
 47 |         new_state = copy.deepcopy(state)
 48 |         
 49 |         from core.schema.entity import Entity
 50 |         cell_size = state.render.cell_size
 51 |         new_state.player = Entity.from_grid_pos(new_pos.row, new_pos.col, cell_size)
 52 |         
 53 |         new_grid = [list(row) for row in grid]
 54 |         new_grid[current_pos.row][current_pos.col] = constants.EMPTY_CELL
 55 |         if new_grid[new_pos.row][new_pos.col] != constants.GOAL_CELL:
 56 |             new_grid[new_pos.row][new_pos.col] = constants.PLAYER_CELL
 57 |         else:
 58 |             new_grid[new_pos.row][new_pos.col] = constants.PLAYER_CELL
 59 |         
 60 |         new_state.grid.data = new_grid
 61 |         
 62 |         return new_state, True, "OK"
 63 |     
 64 |     def check_win(self, state: UnifiedState) -> bool:
 65 |         return (state.player.grid_pos.row == state.goal.grid_pos.row and 
 66 |                 state.player.grid_pos.col == state.goal.grid_pos.col)
 67 |     
 68 |     def render_state(self, state: UnifiedState, output_path: str) -> None:
 69 |         Path(output_path).parent.mkdir(parents=True, exist_ok=True)
 70 |         renderer = TrapFieldRenderer(cell_size=state.render.cell_size, assets_folder=self.assets_folder)
 71 |         renderer.render_grid(state.grid.data, output_path)
 72 |     
 73 |     def get_system_prompt(self) -> str:
 74 |         return get_dynamic_prompt('trapfield', 'system', self.assets_folder)
 75 | 
 76 |     def get_user_prompt(self) -> str:
 77 |         return get_dynamic_prompt('trapfield', 'user', self.assets_folder)
 78 |     
 79 |     def get_game_type(self) -> str:
 80 |         return 'trapfield'
 81 |     
 82 |     def _path_to_actions(self, path: List[Tuple[int, int]]) -> List[Dict[str, Any]]:
 83 |         actions = []
 84 |         for i in range(len(path) - 1):
 85 |             curr_row, curr_col = path[i]
 86 |             next_row, next_col = path[i + 1]
 87 |             
 88 |             if next_row < curr_row:
 89 |                 direction = 'up'
 90 |             elif next_row > curr_row:
 91 |                 direction = 'down'
 92 |             elif next_col < curr_col:
 93 |                 direction = 'left'
 94 |             else:
 95 |                 direction = 'right'
 96 |             
 97 |             actions.append({'action': 'move', 'direction': direction})
 98 |         
 99 |         return actions
100 |     
101 |     def _calculate_new_position(self, pos, direction: str):
102 |         from core.schema.position import Position
103 |         
104 |         if direction == 'up':
105 |             return Position(row=pos.row - 1, col=pos.col)
106 |         elif direction == 'down':
107 |             return Position(row=pos.row + 1, col=pos.col)
108 |         elif direction == 'left':
109 |             return Position(row=pos.row, col=pos.col - 1)
110 |         else:
111 |             return Position(row=pos.row, col=pos.col + 1)
112 | 
113 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/run_vlm_eval.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import sys
  3 | from pathlib import Path
  4 | sys.path.insert(0, str(Path(__file__).parent.parent))
  5 | 
  6 | import argparse
  7 | import logging
  8 | import yaml
  9 | from concurrent.futures import ProcessPoolExecutor, as_completed
 10 | from typing import Dict, Any
 11 | 
 12 | from evaluation.vlm_eval.vlm_client import VLMClient
 13 | from evaluation.vlm_eval.vlm_evaluator import VLMEvaluator
 14 | from evaluation.vlm_eval.executors.maze_executor import MazeExecutor
 15 | from evaluation.vlm_eval.executors.sokoban_executor import SokobanExecutor
 16 | from evaluation.vlm_eval.executors.trapfield_executor import TrapFieldExecutor
 17 | from evaluation.vlm_eval.executors.pathfinder_executor import PathfinderExecutor
 18 | from evaluation.vlm_eval.executors.maze3d_executor import Maze3DExecutor
 19 | from dotenv import load_dotenv
 20 | # 强制使用项目 .env 文件的值，覆盖系统环境变量
 21 | load_dotenv(override=True)
 22 | 
 23 | 
 24 | def create_executor(game: str, assets_folder: str = None):
 25 |     if game == 'maze':
 26 |         return MazeExecutor(assets_folder=assets_folder)
 27 |     elif game == 'sokoban':
 28 |         return SokobanExecutor(assets_folder=assets_folder)
 29 |     elif game == 'trapfield':
 30 |         return TrapFieldExecutor(assets_folder=assets_folder)
 31 |     elif game == 'pathfinder':
 32 |         return PathfinderExecutor(assets_folder=assets_folder)
 33 |     elif game in ['maze3d', '3dmaze' ,'maze3d_new']:
 34 |         return Maze3DExecutor(assets_folder=assets_folder)
 35 |     else:
 36 |         raise ValueError(f"Unsupported game: {game}")
 37 | 
 38 | 
 39 | def evaluate_single_model(game: str, dataset: str, model_config: Dict[str, Any],
 40 |                          output_base: str, workers: int, max_levels: int,
 41 |                          assets_folder: str = None) -> Dict[str, Any]:
 42 |     # 在子进程中重新加载环境变量，强制使用项目 .env 文件的值
 43 |     load_dotenv(override=True)
 44 | 
 45 |     model_name = model_config['name']
 46 |     output_dir = Path(output_base) / model_name
 47 | 
 48 |     logging.info(f"[{model_name}] Starting evaluation")
 49 | 
 50 |     vlm_client = VLMClient(
 51 |         model=model_name,
 52 |         base_url=model_config.get('base_url'),
 53 |         max_tokens=model_config.get('max_tokens', 10000),
 54 |         temperature=model_config.get('temperature', 0.0)
 55 |     )
 56 | 
 57 |     executor = create_executor(game, assets_folder)
 58 |     evaluator = VLMEvaluator(vlm_client, executor)
 59 | 
 60 |     summary = evaluator.evaluate_dataset(
 61 |         dataset_dir=dataset,
 62 |         output_dir=str(output_dir),
 63 |         max_workers=workers,
 64 |         max_levels=max_levels
 65 |     )
 66 | 
 67 |     logging.info(f"[{model_name}] Complete - SR: {summary['avg_sr']:.4f}, PR: {summary['avg_pr']:.4f}, MR: {summary['avg_mr']:.4f}")
 68 | 
 69 |     return {
 70 |         'model': model_name,
 71 |         'summary': summary
 72 |     }
 73 | 
 74 | 
 75 | def main():
 76 |     parser = argparse.ArgumentParser(description='VLM Game Evaluation')
 77 |     parser.add_argument('config', type=str, help='Config file path')
 78 |     args = parser.parse_args()
 79 | 
 80 |     logging.basicConfig(
 81 |         level=logging.INFO,
 82 |         format='%(asctime)s - %(levelname)s - %(message)s'
 83 |     )
 84 | 
 85 |     with open(args.config, 'r') as f:
 86 |         config = yaml.safe_load(f)
 87 | 
 88 |     game = config['game']
 89 |     dataset = config['dataset']
 90 |     output_base = config['output']
 91 |     models = config['models']
 92 |     workers = config.get('workers', 10)
 93 |     max_levels = config.get('max_levels', -1)
 94 |     assets_folder = config.get('assets_folder')
 95 | 
 96 |     logging.info(f"Game: {game}")
 97 |     logging.info(f"Dataset: {dataset}")
 98 |     logging.info(f"Models: {[m['name'] for m in models]}")
 99 |     logging.info(f"Workers per model: {workers}")
100 |     logging.info(f"Total parallel tasks: {len(models) * workers}")
101 | 
102 |     results = []
103 |     with ProcessPoolExecutor(max_workers=len(models)) as executor:
104 |         futures = []
105 |         for model_config in models:
106 |             future = executor.submit(
107 |                 evaluate_single_model,
108 |                 game, dataset, model_config, output_base,
109 |                 workers, max_levels, assets_folder
110 |             )
111 |             futures.append(future)
112 | 
113 |         for future in as_completed(futures):
114 |             try:
115 |                 result = future.result()
116 |                 results.append(result)
117 |             except Exception as e:
118 |                 logging.error(f"Model evaluation failed: {e}")
119 | 
120 |     logging.info("\n=== Final Results ===")
121 |     for result in results:
122 |         model = result['model']
123 |         summary = result['summary']
124 |         logging.info(f"{model}: SR={summary['avg_sr']:.5f}, PR={summary['avg_pr']:.5f}, MR={summary['avg_mr']:.5f}, Step={summary['avg_step']:.5f}")
125 | 
126 | 
127 | if __name__ == '__main__':
128 |     main()
129 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/recalculate_avg_step.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | 重新计算 VLM 评估结果中的 avg_step 指标
  4 | 
  5 | 旧方法：对所有 case 都计算 step
  6 | 新方法：只对 SR=1（成功的 case）计算 step
  7 | 
  8 | 使用方法：
  9 |     python evaluation/vlm_eval/recalculate_avg_step.py
 10 | """
 11 | 
 12 | import json
 13 | import sys
 14 | from pathlib import Path
 15 | from typing import Dict, Any, List
 16 | 
 17 | 
 18 | def recalculate_summary(summary_path: Path) -> Dict[str, Any]:
 19 |     """重新计算单个 summary.json 的 avg_step"""
 20 |     
 21 |     with open(summary_path, 'r') as f:
 22 |         summary = json.load(f)
 23 |     
 24 |     results = summary.get('results', [])
 25 |     
 26 |     # 只计算 SR=1.0 的 case 的 step
 27 |     successful_steps = []
 28 |     for result in results:
 29 |         metrics = result.get('metrics', {})
 30 |         sr = metrics.get('sr', 0.0)
 31 |         step = metrics.get('step')
 32 |         
 33 |         # 只统计成功的 case (SR=1.0) 且 step 不为 None
 34 |         if sr == 1.0 and step is not None:
 35 |             successful_steps.append(step)
 36 |     
 37 |     # 计算新的 avg_step
 38 |     old_avg_step = summary.get('avg_step', 0.0)
 39 |     old_successful_cases = summary.get('successful_cases', 0)
 40 |     
 41 |     new_avg_step = sum(successful_steps) / len(successful_steps) if successful_steps else 0.0
 42 |     new_successful_cases = len(successful_steps)
 43 |     
 44 |     # 更新 summary
 45 |     summary['avg_step'] = new_avg_step
 46 |     summary['successful_cases'] = new_successful_cases
 47 |     
 48 |     return {
 49 |         'path': str(summary_path),
 50 |         'old_avg_step': old_avg_step,
 51 |         'new_avg_step': new_avg_step,
 52 |         'old_successful_cases': old_successful_cases,
 53 |         'new_successful_cases': new_successful_cases,
 54 |         'changed': abs(old_avg_step - new_avg_step) > 1e-6 or old_successful_cases != new_successful_cases,
 55 |         'updated_summary': summary
 56 |     }
 57 | 
 58 | 
 59 | def process_directory(base_dir: Path) -> List[Dict[str, Any]]:
 60 |     """处理目录下的所有 summary.json 文件"""
 61 |     
 62 |     results = []
 63 |     
 64 |     # 查找所有 summary.json 文件
 65 |     summary_files = list(base_dir.rglob('summary.json'))
 66 |     
 67 |     if not summary_files:
 68 |         print(f"  ⚠ 未找到 summary.json 文件")
 69 |         return results
 70 |     
 71 |     print(f"  找到 {len(summary_files)} 个 summary.json 文件")
 72 |     
 73 |     for summary_file in sorted(summary_files):
 74 |         try:
 75 |             result = recalculate_summary(summary_file)
 76 |             results.append(result)
 77 |             
 78 |             # 显示相对路径
 79 |             rel_path = summary_file.relative_to(base_dir)
 80 |             
 81 |             if result['changed']:
 82 |                 print(f"  ✓ {rel_path}")
 83 |                 print(f"    旧: avg_step={result['old_avg_step']:.4f}, successful_cases={result['old_successful_cases']}")
 84 |                 print(f"    新: avg_step={result['new_avg_step']:.4f}, successful_cases={result['new_successful_cases']}")
 85 |             else:
 86 |                 print(f"  - {rel_path} (无变化)")
 87 |                 
 88 |         except Exception as e:
 89 |             print(f"  ✗ 处理 {summary_file} 时出错: {e}")
 90 |             import traceback
 91 |             traceback.print_exc()
 92 |     
 93 |     return results
 94 | 
 95 | 
 96 | def save_updated_summaries(results: List[Dict[str, Any]], dry_run: bool = False):
 97 |     """保存更新后的 summary.json 文件"""
 98 |     
 99 |     changed_count = sum(1 for r in results if r['changed'])
100 |     
101 |     if changed_count == 0:
102 |         print("\n没有需要更新的文件")
103 |         return
104 |     
105 |     if dry_run:
106 |         print(f"\n[DRY RUN] 将更新 {changed_count} 个文件（实际未保存）")
107 |         return
108 |     
109 |     print(f"\n正在保存 {changed_count} 个更新的文件...")
110 |     
111 |     for result in results:
112 |         if result['changed']:
113 |             summary_path = Path(result['path'])
114 |             with open(summary_path, 'w') as f:
115 |                 json.dump(result['updated_summary'], f, indent=2)
116 |             print(f"  ✓ 已保存: {summary_path}")
117 |     
118 |     print(f"\n✅ 成功更新 {changed_count} 个文件")
119 | 
120 | 
121 | def main():
122 |     import argparse
123 |     
124 |     parser = argparse.ArgumentParser(description='重新计算 VLM 评估结果的 avg_step')
125 |     parser.add_argument('--dry-run', action='store_true', help='只显示变化，不实际保存')
126 |     parser.add_argument('--dirs', nargs='+',
127 |                        default=['vlm_eval_results/maze', 'vlm_eval_results/sokoban', 'vlm_eval_results/trapfield'],
128 |                        help='要处理的目录列表')
129 |     args = parser.parse_args()
130 |     
131 |     print("=" * 70)
132 |     print("重新计算 VLM 评估结果的 avg_step")
133 |     print("=" * 70)
134 |     print(f"新方法: 只对 SR=1.0 的成功 case 计算 avg_step")
135 |     print()
136 |     
137 |     all_results = []
138 |     
139 |     for dir_path in args.dirs:
140 |         base_dir = Path(dir_path)
141 |         
142 |         if not base_dir.exists():
143 |             print(f"⚠ 目录不存在: {base_dir}")
144 |             continue
145 |         
146 |         print(f"处理目录: {base_dir}")
147 |         results = process_directory(base_dir)
148 |         all_results.extend(results)
149 |         print()
150 |     
151 |     # 统计
152 |     total_files = len(all_results)
153 |     changed_files = sum(1 for r in all_results if r['changed'])
154 |     
155 |     print("=" * 70)
156 |     print(f"统计: 共处理 {total_files} 个文件，其中 {changed_files} 个需要更新")
157 |     print("=" * 70)
158 |     
159 |     if args.dry_run:
160 |         print("\n[DRY RUN 模式] 未实际保存文件")
161 |         print("如需保存，请去掉 --dry-run 参数重新运行")
162 |     else:
163 |         # 保存更新
164 |         save_updated_summaries(all_results, dry_run=False)
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     main()
169 | 
170 | 


--------------------------------------------------------------------------------
/prompts/METADATA_USAGE.md:
--------------------------------------------------------------------------------
  1 | # Metadata Generation Tool
  2 | 
  3 | ## Overview
  4 | 
  5 | Generate metadata.csv files for VR-Bench dataset with dynamic prompts based on skin configurations.
  6 | 
  7 | **Features:**
  8 | - Dynamic prompt generation from skin descriptions
  9 | - Flexible filtering by game type, skin, and difficulty
 10 | - Separate or merged output modes
 11 | - Support for both train and eval splits
 12 | 
 13 | ## Quick Start
 14 | 
 15 | ```bash
 16 | # Generate all metadata files
 17 | python test_dynamic_metadata.py
 18 | 
 19 | # Generate for specific game type
 20 | python test_dynamic_metadata.py --games maze
 21 | 
 22 | # Generate for specific skins and difficulties
 23 | python test_dynamic_metadata.py --games maze --skins 1 2 --difficulties easy
 24 | ```
 25 | 
 26 | ## Command-Line Arguments
 27 | 
 28 | ### `--games`
 29 | Select game types (multiple allowed)
 30 | 
 31 | **Options:** `maze`, `irregular_maze`, `maze3d`, `sokoban`, `trapfield`
 32 | 
 33 | ```bash
 34 | python test_dynamic_metadata.py --games maze sokoban
 35 | ```
 36 | 
 37 | ### `--skins`
 38 | Select skin IDs (multiple allowed)
 39 | 
 40 | **Options:** `1`, `2`, `3`, `4`, `5` (varies by game type)
 41 | 
 42 | **Skin counts:**
 43 | - maze: 5 skins
 44 | - irregular_maze: 4 skins
 45 | - maze3d: 4 skins
 46 | - sokoban: 5 skins
 47 | - trapfield: 4 skins
 48 | 
 49 | ```bash
 50 | python test_dynamic_metadata.py --skins 1 2 3
 51 | ```
 52 | 
 53 | ### `--difficulties`
 54 | Select difficulty levels (multiple allowed)
 55 | 
 56 | **Options:** `easy`, `medium`, `hard`
 57 | 
 58 | ```bash
 59 | python test_dynamic_metadata.py --difficulties easy hard
 60 | ```
 61 | 
 62 | ### `--splits`
 63 | Select dataset splits (default: train eval)
 64 | 
 65 | **Options:** `train`, `eval`
 66 | 
 67 | ```bash
 68 | python test_dynamic_metadata.py --splits train
 69 | ```
 70 | 
 71 | ### `--merge`
 72 | Merge all matching data into a single metadata.csv
 73 | 
 74 | ```bash
 75 | python test_dynamic_metadata.py --games maze --skins 1 2 --merge
 76 | ```
 77 | 
 78 | ### `--dataset-root`
 79 | Specify dataset root directory (default: project_root/downloaded_dataset)
 80 | 
 81 | ```bash
 82 | python test_dynamic_metadata.py --dataset-root /path/to/dataset
 83 | ```
 84 | 
 85 | ### `--skins-root`
 86 | Specify skins configuration directory (default: project_root/skins)
 87 | 
 88 | ```bash
 89 | python test_dynamic_metadata.py --skins-root /path/to/skins
 90 | ```
 91 | 
 92 | ## Usage Examples
 93 | 
 94 | ### Generate all data
 95 | ```bash
 96 | python test_dynamic_metadata.py
 97 | ```
 98 | **Output:** 132 metadata.csv files (66 train + 66 eval)
 99 | 
100 | ### Generate specific game
101 | ```bash
102 | python test_dynamic_metadata.py --games maze
103 | ```
104 | **Output:** 30 files (5 skins × 3 difficulties × 2 splits)
105 | 
106 | ### Generate specific combination
107 | ```bash
108 | python test_dynamic_metadata.py --games maze --skins 1 --difficulties easy
109 | ```
110 | **Output:** 2 files (train/maze_1_easy and eval/maze_1_easy)
111 | 
112 | ### Merge multiple games
113 | ```bash
114 | python test_dynamic_metadata.py --games maze irregular_maze --merge
115 | ```
116 | **Output:** 2 merged files (one for train, one for eval)
117 | 
118 | ### Cross-skin training
119 | ```bash
120 | python test_dynamic_metadata.py --games maze --skins 1 2 3 --merge --splits train
121 | ```
122 | **Output:** 1 merged file containing all train data for maze skins 1, 2, 3
123 | 
124 | ### Regenerate specific skins
125 | ```bash
126 | python test_dynamic_metadata.py --games irregular_maze --skins 1 2 3
127 | ```
128 | **Output:** 18 files (3 skins × 3 difficulties × 2 splits)
129 | 
130 | ## Output Structure
131 | 
132 | ### Separate Mode (default)
133 | ```
134 | downloaded_dataset/
135 | └── metadata/
136 |     ├── train/
137 |     │   ├── maze_1_easy/
138 |     │   │   └── metadata.csv
139 |     │   ├── maze_1_medium/
140 |     │   │   └── metadata.csv
141 |     │   └── ...
142 |     └── eval/
143 |         ├── maze_1_easy/
144 |         │   └── metadata.csv
145 |         └── ...
146 | ```
147 | 
148 | ### Merge Mode
149 | ```
150 | downloaded_dataset/
151 | └── metadata/
152 |     ├── train/
153 |     │   └── maze_sokoban_1_2_easy/
154 |     │       └── metadata.csv
155 |     └── eval/
156 |         └── maze_sokoban_1_2_easy/
157 |             └── metadata.csv
158 | ```
159 | 
160 | ## Metadata CSV Format
161 | 
162 | Each CSV file contains 3 columns:
163 | 
164 | | Column | Description | Example |
165 | |--------|-------------|---------|
166 | | `video` | Video file path (relative to downloaded_dataset/) | `train/maze/1/easy/videos/easy_0001_0.mp4` |
167 | | `prompt` | Dynamically generated prompt | `Create a 2D animation...` |
168 | | `input_image` | Input image path (relative to downloaded_dataset/) | `train/maze/1/easy/images/easy_0001.png` |
169 | 
170 | ## Dynamic Prompt System
171 | 
172 | Prompts are automatically generated based on skin configurations in `skins/{game_type}/{skin_id}/description.json`.
173 | 
174 | **Example:** For maze skin 1:
175 | ```json
176 | {
177 |   "visual_description": {
178 |     "player": "red circle",
179 |     "goal": "green square",
180 |     "wall": "light blue square",
181 |     "floor": "white square"
182 |   }
183 | }
184 | ```
185 | 
186 | **Generated prompt:**
187 | ```
188 | Create a 2D animation based on the provided image of a maze.
189 | The red circle slides smoothly along the white square path,
190 | stopping perfectly on the green square...
191 | ```
192 | 
193 | Different skins produce different prompts automatically.
194 | 
195 | ## Notes
196 | 
197 | - All paths in metadata.csv are relative to `downloaded_dataset/` directory
198 | - Game type `irregular_maze` maps to `pathfinder` skin directory
199 | - If skin description is not found, a warning is displayed and the combination is skipped
200 | - Use `--merge` mode for training across multiple skins or game types
201 | 


--------------------------------------------------------------------------------
/evaluation/vlm_eval/action_utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import re
  3 | from typing import List, Dict, Any
  4 | 
  5 | 
  6 | def parse_actions(response: str, game_type: str = 'default') -> List[Dict[str, Any]]:
  7 |     if game_type in ['maze', 'trapfield']:
  8 |         return _parse_path_actions(response)
  9 |     elif game_type == 'sokoban':
 10 |         return _parse_sokoban_actions(response)
 11 |     elif game_type == 'pathfinder':
 12 |         return _parse_pathfinder_actions(response)
 13 |     elif game_type in ['maze3d', '3dmaze']:
 14 |         return _parse_maze3d_actions(response)
 15 | 
 16 |     attempts = [
 17 |         lambda: json.loads(response),
 18 |         lambda: _parse_markdown_json(response),
 19 |         lambda: _extract_json_array(response)
 20 |     ]
 21 | 
 22 |     for attempt in attempts:
 23 |         try:
 24 |             result = attempt()
 25 |             if isinstance(result, list):
 26 |                 return result
 27 |         except:
 28 |             continue
 29 | 
 30 |     raise ValueError(f"Failed to parse JSON from response: {response[:200]}")
 31 | 
 32 | 
 33 | def _parse_markdown_json(text: str) -> List[Dict[str, Any]]:
 34 |     match = re.search(r'```(?:json)?\s*(.*?)\s*```', text, re.DOTALL)
 35 |     if match:
 36 |         return json.loads(match.group(1))
 37 |     raise ValueError("No markdown JSON found")
 38 | 
 39 | 
 40 | def _extract_json_array(text: str) -> List[Dict[str, Any]]:
 41 |     match = re.search(r'\[.*\]', text, re.DOTALL)
 42 |     if match:
 43 |         return json.loads(match.group(0))
 44 |     raise ValueError("No JSON array found")
 45 | 
 46 | 
 47 | def _parse_path_actions(response: str) -> List[Dict[str, Any]]:
 48 |     attempts = [
 49 |         lambda: json.loads(response),
 50 |         lambda: _parse_markdown_json(response),
 51 |         lambda: _extract_json_object(response)
 52 |     ]
 53 | 
 54 |     for attempt in attempts:
 55 |         try:
 56 |             result = attempt()
 57 |             if isinstance(result, dict) and 'path' in result:
 58 |                 path = result['path']
 59 |                 if isinstance(path, list):
 60 |                     return [{'action': 'move', 'direction': d} for d in path]
 61 |         except:
 62 |             continue
 63 | 
 64 |     raise ValueError(f"Failed to parse path from response: {response[:200]}")
 65 | 
 66 | 
 67 | def _parse_sokoban_actions(response: str) -> List[Dict[str, Any]]:
 68 |     """Parse Sokoban actions - only move actions (up/down/left/right)"""
 69 |     attempts = [
 70 |         lambda: json.loads(response),
 71 |         lambda: _parse_markdown_json(response),
 72 |         lambda: _extract_json_object(response)
 73 |     ]
 74 | 
 75 |     for attempt in attempts:
 76 |         try:
 77 |             result = attempt()
 78 |             if isinstance(result, dict) and 'actions' in result:
 79 |                 actions = result['actions']
 80 |                 if isinstance(actions, list):
 81 |                     parsed = []
 82 |                     for a in actions:
 83 |                         # 只支持 move 操作，push 会自动发生
 84 |                         parsed.append({'action': 'move', 'direction': a})
 85 |                     return parsed
 86 |         except:
 87 |             continue
 88 | 
 89 |     raise ValueError(f"Failed to parse sokoban actions from response: {response[:200]}")
 90 | 
 91 | 
 92 | def _parse_pathfinder_actions(response: str) -> List[Dict[str, Any]]:
 93 |     """Parse PathFinder actions - letter array representing the path
 94 | 
 95 |     Expected format: {"path": ["A", "C", "D"]}
 96 |     """
 97 |     attempts = [
 98 |         lambda: json.loads(response),
 99 |         lambda: _parse_markdown_json(response),
100 |         lambda: _extract_json_object(response)
101 |     ]
102 | 
103 |     for attempt in attempts:
104 |         try:
105 |             result = attempt()
106 |             if isinstance(result, dict) and 'path' in result:
107 |                 path = result['path']
108 |                 if isinstance(path, list):
109 |                     # Validate all elements are strings
110 |                     if all(isinstance(item, str) for item in path):
111 |                         return [{'action': 'path', 'path': path}]
112 |         except:
113 |             continue
114 | 
115 |     raise ValueError(f"Failed to parse pathfinder actions from response: {response[:200]}")
116 | 
117 | 
118 | def _parse_maze3d_actions(response: str) -> List[Dict[str, Any]]:
119 |     """Parse 3D Maze actions - direction array
120 | 
121 |     Expected format: {"path": ["up", "forward_right", "forward_left", ...]}
122 |     Valid directions: forward_left, forward_right, backward_left, backward_right, up, down
123 |     """
124 |     valid_directions = {
125 |         'forward_left', 'forward_right', 'backward_left', 'backward_right', 'up', 'down'
126 |     }
127 | 
128 |     attempts = [
129 |         lambda: json.loads(response),
130 |         lambda: _parse_markdown_json(response),
131 |         lambda: _extract_json_object(response)
132 |     ]
133 | 
134 |     for attempt in attempts:
135 |         try:
136 |             result = attempt()
137 |             if isinstance(result, dict) and 'path' in result:
138 |                 path = result['path']
139 |                 if isinstance(path, list):
140 |                     # Validate all elements are valid direction strings
141 |                     if all(isinstance(item, str) and item in valid_directions for item in path):
142 |                         return [{'action': 'move', 'direction': direction} for direction in path]
143 |         except:
144 |             continue
145 | 
146 |     raise ValueError(f"Failed to parse maze3d actions from response: {response[:200]}")
147 | 
148 | 
149 | def _extract_json_object(text: str) -> Dict[str, Any]:
150 |     match = re.search(r'\{.*\}', text, re.DOTALL)
151 |     if match:
152 |         return json.loads(match.group(0))
153 |     raise ValueError("No JSON object found")
154 | 


--------------------------------------------------------------------------------
/core/texture_handler.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Unified texture handler for all games.
  3 | Handles loading, caching, and resizing of game textures.
  4 | """
  5 | 
  6 | import os
  7 | from pathlib import Path
  8 | from typing import Optional, Dict
  9 | from PIL import Image
 10 | 
 11 | from .constants import SUPPORTED_IMAGE_FORMATS, DEFAULT_CELL_SIZE
 12 | 
 13 | 
 14 | class BaseTextureHandler:
 15 |     """Base texture handler shared by all games"""
 16 |     
 17 |     def __init__(self, assets_folder: Optional[str] = None, cell_size: int = DEFAULT_CELL_SIZE):
 18 |         """
 19 |         Initialize texture handler.
 20 |         
 21 |         Args:
 22 |             assets_folder: Path to assets folder. If None, uses default.
 23 |             cell_size: Size of each cell in pixels.
 24 |         """
 25 |         if assets_folder is None:
 26 |             assets_folder = Path(__file__).parent.parent / "assets"
 27 |         
 28 |         self.assets_path = Path(assets_folder)
 29 |         self.cell_size = cell_size
 30 |         self.textures: Dict[str, Image.Image] = {}
 31 |         
 32 |         # Ensure assets directory exists
 33 |         self.assets_path.mkdir(parents=True, exist_ok=True)
 34 |     
 35 |     def load_textures(self, texture_names: list):
 36 |         """
 37 |         Load specified textures.
 38 |         
 39 |         Args:
 40 |             texture_names: List of texture names to load (e.g., ['floor', 'wall', 'player'])
 41 |         """
 42 |         for name in texture_names:
 43 |             texture = self._load_texture(name)
 44 |             if texture:
 45 |                 self.textures[name] = texture
 46 |     
 47 |     def _load_texture(self, name: str) -> Optional[Image.Image]:
 48 |         """
 49 |         Load a single texture from file.
 50 |         
 51 |         Args:
 52 |             name: Texture name (without extension)
 53 |             
 54 |         Returns:
 55 |             Loaded and resized texture, or None if not found
 56 |         """
 57 |         file_path = None
 58 |         for ext in SUPPORTED_IMAGE_FORMATS:
 59 |             candidate = self.assets_path / f"{name}{ext}"
 60 |             if candidate.exists():
 61 |                 file_path = candidate
 62 |                 break
 63 |         
 64 |         if file_path is None:
 65 |             return None
 66 |         
 67 |         try:
 68 |             img = Image.open(file_path).convert("RGBA")
 69 |             return self._resize_keep_aspect_ratio(img, self.cell_size)
 70 |         except Exception as e:
 71 |             print(f"Failed to load texture {name}: {e}")
 72 |             return None
 73 |     
 74 |     def _resize_keep_aspect_ratio(self, img: Image.Image, target_size: int) -> Image.Image:
 75 |         """
 76 |         Resize image while maintaining aspect ratio, centered on transparent canvas.
 77 |         
 78 |         Args:
 79 |             img: Source image
 80 |             target_size: Target size (width and height)
 81 |             
 82 |         Returns:
 83 |             Resized image on transparent canvas
 84 |         """
 85 |         original_width, original_height = img.size
 86 |         
 87 |         # Calculate scaling ratio
 88 |         ratio = min(target_size / original_width, target_size / original_height)
 89 |         new_width = int(original_width * ratio)
 90 |         new_height = int(original_height * ratio)
 91 |         
 92 |         # Resize with high quality
 93 |         resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
 94 |         
 95 |         # Create transparent canvas
 96 |         result = Image.new("RGBA", (target_size, target_size), (0, 0, 0, 0))
 97 |         
 98 |         # Center the resized image
 99 |         paste_x = (target_size - new_width) // 2
100 |         paste_y = (target_size - new_height) // 2
101 |         result.paste(resized_img, (paste_x, paste_y), resized_img if resized_img.mode == 'RGBA' else None)
102 |         
103 |         return result
104 |     
105 |     def get_texture(self, name: str) -> Optional[Image.Image]:
106 |         """
107 |         Get a loaded texture by name.
108 |         
109 |         Args:
110 |             name: Texture name
111 |             
112 |         Returns:
113 |             Texture image or None if not loaded
114 |         """
115 |         return self.textures.get(name)
116 |     
117 |     def has_texture(self, name: str) -> bool:
118 |         """
119 |         Check if a texture is loaded.
120 |         
121 |         Args:
122 |             name: Texture name
123 |             
124 |         Returns:
125 |             True if texture is loaded
126 |         """
127 |         return name in self.textures
128 | 
129 | 
130 | # Global texture handler cache
131 | _texture_handlers: Dict[str, BaseTextureHandler] = {}
132 | 
133 | 
134 | def get_texture_handler(assets_folder: Optional[str] = None, 
135 |                        cell_size: int = DEFAULT_CELL_SIZE,
136 |                        texture_names: Optional[list] = None) -> BaseTextureHandler:
137 |     """
138 |     Get or create a cached texture handler.
139 |     
140 |     Args:
141 |         assets_folder: Path to assets folder
142 |         cell_size: Size of each cell
143 |         texture_names: List of textures to load
144 |         
145 |     Returns:
146 |         Cached or new texture handler
147 |     """
148 |     if assets_folder is None:
149 |         assets_folder = str(Path(__file__).parent.parent / "assets")
150 |     
151 |     cache_key = f"{assets_folder}:{cell_size}"
152 |     
153 |     if cache_key not in _texture_handlers:
154 |         handler = BaseTextureHandler(assets_folder, cell_size)
155 |         if texture_names:
156 |             handler.load_textures(texture_names)
157 |         _texture_handlers[cache_key] = handler
158 |     
159 |     return _texture_handlers[cache_key]
160 | 
161 | 
162 | def clear_texture_cache():
163 |     """Clear the texture handler cache."""
164 |     global _texture_handlers
165 |     _texture_handlers.clear()
166 | 
167 | 


--------------------------------------------------------------------------------
/evaluation/videomodel_eval/evaluator.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import sys
  3 | from pathlib import Path
  4 | sys.path.insert(0, str(Path(__file__).parent.parent))
  5 | 
  6 | import numpy as np
  7 | from typing import Dict, Any, Optional, Tuple
  8 | from core.schema import UnifiedState
  9 | 
 10 | # 尝试导入GPU版本，如果失败则使用CPU版本
 11 | try:
 12 |     from evaluation.videomodel_eval.metrics_gpu import (
 13 |         PrecisionRateMetricGPU as PrecisionRateMetric,
 14 |         StepMetric,
 15 |         ExactMatchMetric,
 16 |         SuccessRateMetric,
 17 |         FidelityMetric,
 18 |         normalize_trajectory,
 19 |         GPU_AVAILABLE
 20 |     )
 21 |     if GPU_AVAILABLE:
 22 |         print("✓ GPU加速已启用")
 23 | except ImportError:
 24 |     from evaluation.videomodel_eval.metrics import (
 25 |         PrecisionRateMetric,
 26 |         StepMetric,
 27 |         ExactMatchMetric,
 28 |         normalize_trajectory
 29 |     )
 30 |     GPU_AVAILABLE = False
 31 |     print("✓ 使用CPU计算")
 32 |     # CPU版本没有SR和Fidelity，需要从GPU版本导入
 33 |     from evaluation.videomodel_eval.metrics_gpu import SuccessRateMetric, FidelityMetric
 34 | 
 35 | 
 36 | class TrajectoryEvaluator:
 37 |     """
 38 |     轨迹评估器 - 基于 GUI 路径跟踪一致性评测指标定义文档 v1.1
 39 |     """
 40 | 
 41 |     def __init__(self, eps_ratio: float = 0.01, num_samples: int = 1000,
 42 |                  pr_threshold: float = 0.98, step_threshold: float = 0.1,
 43 |                  fidelity_frame_step: int = 5, fidelity_pixel_threshold: int = 5):
 44 |         """
 45 |         Args:
 46 |             eps_ratio: 匹配阈值（相对对角线比例），默认 0.01 (1%)
 47 |             num_samples: 采样点数量，默认 1000
 48 |             pr_threshold: Exact Match 的 PR 阈值，默认 0.98
 49 |             step_threshold: Exact Match 的 Step 阈值，默认 0.1
 50 |             fidelity_frame_step: 保真度计算的帧采样步长，默认 5
 51 |             fidelity_pixel_threshold: 保真度计算的像素差异阈值，默认 5（±5灰度值）
 52 |         """
 53 |         self.eps_ratio = eps_ratio
 54 |         self.num_samples = num_samples
 55 |         self.pr_threshold = pr_threshold
 56 |         self.step_threshold = step_threshold
 57 | 
 58 |         self.metrics = [
 59 |             PrecisionRateMetric(eps_ratio, num_samples),
 60 |             SuccessRateMetric(),
 61 |             StepMetric(),
 62 |             ExactMatchMetric(pr_threshold, step_threshold),
 63 |             FidelityMetric(frame_step=fidelity_frame_step, pixel_threshold=fidelity_pixel_threshold),
 64 |         ]
 65 |     
 66 |     def evaluate(self,
 67 |                  gt_traj: np.ndarray,
 68 |                  gen_traj: np.ndarray,
 69 |                  video_width: int,
 70 |                  video_height: int,
 71 |                  state: Optional[UnifiedState] = None,
 72 |                  gen_box_traj: Optional[np.ndarray] = None,
 73 |                  **kwargs) -> Dict[str, Any]:
 74 |         """
 75 |         评估两条轨迹
 76 | 
 77 |         Args:
 78 |             gt_traj: Ground truth 轨迹 (N, 2)，像素坐标（玩家轨迹）
 79 |             gen_traj: Generated 轨迹 (M, 2)，像素坐标（玩家轨迹）
 80 |             video_width: 视频宽度
 81 |             video_height: 视频高度
 82 |             state: UnifiedState 对象（可选）
 83 |             gen_box_traj: Generated 箱子轨迹（仅推箱子游戏，用于SR计算）
 84 |             **kwargs: 额外参数
 85 | 
 86 |         Returns:
 87 |             result: 评估结果字典
 88 |         """
 89 |         if len(gt_traj) < 2 or len(gen_traj) < 2:
 90 |             return self._empty_result()
 91 | 
 92 |         # 归一化轨迹到 [0,1]×[0,1]
 93 |         gt_traj_norm = normalize_trajectory(gt_traj, video_width, video_height)
 94 |         gen_traj_norm = normalize_trajectory(gen_traj, video_width, video_height)
 95 | 
 96 |         result = {}
 97 |         shared_data = {
 98 |             'state': state,
 99 |             'video_width': video_width,
100 |             'video_height': video_height,
101 |         }
102 | 
103 |         # 如果有state，提取goal bbox并归一化
104 |         if state is not None:
105 |             goal_bbox_pixel = state.goal.bbox
106 |             # 使用 state 中记录的原始图片尺寸来归一化 bbox
107 |             # 因为 bbox 坐标是基于原始图片的，而不是生成视频的尺寸
108 |             state_width = state.render.image_width
109 |             state_height = state.render.image_height
110 |             goal_bbox_norm = (
111 |                 goal_bbox_pixel.x / state_width,
112 |                 goal_bbox_pixel.y / state_height,
113 |                 goal_bbox_pixel.width / state_width,
114 |                 goal_bbox_pixel.height / state_height
115 |             )
116 |             shared_data['goal_bbox'] = goal_bbox_norm
117 | 
118 |         # 如果是推箱子游戏，归一化箱子轨迹并用于 SR 计算
119 |         if gen_box_traj is not None:
120 |             gen_box_traj_norm = normalize_trajectory(gen_box_traj, video_width, video_height)
121 |             shared_data['gen_box_traj'] = gen_box_traj_norm
122 | 
123 |         # 依次计算所有 metrics
124 |         for metric in self.metrics:
125 |             value, extra = metric.compute(gt_traj_norm, gen_traj_norm, **kwargs, **shared_data)
126 |             result[metric.name] = value
127 |             shared_data.update(extra)
128 |             # 将 metric 的值也加入 shared_data，供后续 metric 使用
129 |             shared_data[metric.name] = value
130 | 
131 |         # 添加额外数据（用于可视化和调试）
132 |         result.update({
133 |             'gt_length': shared_data.get('gt_length', 0.0),
134 |             'gen_length': shared_data.get('gen_length', 0.0),
135 |             'is_perfect': shared_data.get('is_perfect', False),
136 |             'gt_resampled': shared_data.get('gt_resampled'),
137 |             'gen_resampled': shared_data.get('gen_resampled'),
138 |             'distances': shared_data.get('distances')
139 |         })
140 | 
141 |         return result
142 | 
143 |     def _empty_result(self) -> Dict[str, Any]:
144 |         """返回空结果"""
145 |         return {
146 |             'pr': 0.0,
147 |             'sd': 0.0,
148 |             'em': 0.0,
149 |             'sr': 0.0,
150 |             'mf': 0.0,
151 |             'gt_length': 0.0,
152 |             'gen_length': 0.0,
153 |             'is_perfect': False
154 |         }
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/games/pathfinder/texture_handler.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PathFinder 纹理处理器
  3 | 支持起点、终点图标和道路纹理
  4 | """
  5 | 
  6 | from pathlib import Path
  7 | from typing import Optional
  8 | from PIL import Image
  9 | 
 10 | 
 11 | class PathFinderTextureHandler:
 12 |     """PathFinder 纹理处理器"""
 13 |     
 14 |     # 支持的图片格式
 15 |     SUPPORTED_FORMATS = ['.png', '.jpg', '.jpeg']
 16 |     
 17 |     # 必需的纹理
 18 |     REQUIRED_TEXTURES = ['start', 'end', 'road']
 19 |     
 20 |     def __init__(self, assets_folder: Optional[str] = None):
 21 |         """
 22 |         初始化纹理处理器
 23 |         
 24 |         Args:
 25 |             assets_folder: 纹理文件夹路径，如果为 None 则使用默认路径
 26 |         """
 27 |         if assets_folder is None:
 28 |             # 默认使用 games/pathfinder/assets
 29 |             assets_folder = Path(__file__).parent / "assets"
 30 |         
 31 |         self.assets_path = Path(assets_folder)
 32 |         self.textures = {}
 33 |         
 34 |         # 确保资源文件夹存在
 35 |         self.assets_path.mkdir(parents=True, exist_ok=True)
 36 |     
 37 |     def load_textures(self):
 38 |         """加载所有纹理"""
 39 |         for texture_name in self.REQUIRED_TEXTURES:
 40 |             texture = self._load_texture(texture_name)
 41 |             if texture:
 42 |                 self.textures[texture_name] = texture
 43 |     
 44 |     def _load_texture(self, name: str) -> Optional[Image.Image]:
 45 |         """
 46 |         加载单个纹理
 47 |         
 48 |         Args:
 49 |             name: 纹理名称（不含扩展名）
 50 |             
 51 |         Returns:
 52 |             加载的纹理图片，如果不存在则返回 None
 53 |         """
 54 |         for ext in self.SUPPORTED_FORMATS:
 55 |             file_path = self.assets_path / f"{name}{ext}"
 56 |             if file_path.exists():
 57 |                 try:
 58 |                     img = Image.open(file_path).convert("RGBA")
 59 |                     return img
 60 |                 except Exception as e:
 61 |                     print(f"Failed to load texture {name}: {e}")
 62 |                     return None
 63 |         
 64 |         return None
 65 |     
 66 |     def get_texture(self, name: str) -> Optional[Image.Image]:
 67 |         """
 68 |         获取纹理
 69 |         
 70 |         Args:
 71 |             name: 纹理名称
 72 |             
 73 |         Returns:
 74 |             纹理图片，如果不存在则返回 None
 75 |         """
 76 |         return self.textures.get(name)
 77 |     
 78 |     def has_texture(self, name: str) -> bool:
 79 |         """
 80 |         检查是否有指定纹理
 81 |         
 82 |         Args:
 83 |             name: 纹理名称
 84 |             
 85 |         Returns:
 86 |             是否存在该纹理
 87 |         """
 88 |         return name in self.textures
 89 |     
 90 |     def get_start_icon(self, size: int) -> Optional[Image.Image]:
 91 |         """
 92 |         获取起点图标（调整到指定尺寸）
 93 |         
 94 |         Args:
 95 |             size: 图标尺寸（直径）
 96 |             
 97 |         Returns:
 98 |             调整尺寸后的起点图标
 99 |         """
100 |         texture = self.get_texture('start')
101 |         if texture:
102 |             return self._resize_keep_aspect_ratio(texture, size)
103 |         return None
104 |     
105 |     def get_end_icon(self, size: int) -> Optional[Image.Image]:
106 |         """
107 |         获取终点图标（调整到指定尺寸）
108 |         
109 |         Args:
110 |             size: 图标尺寸（直径）
111 |             
112 |         Returns:
113 |             调整尺寸后的终点图标
114 |         """
115 |         texture = self.get_texture('end')
116 |         if texture:
117 |             return self._resize_keep_aspect_ratio(texture, size)
118 |         return None
119 |     
120 |     def get_road_texture(self) -> Optional[Image.Image]:
121 |         """
122 |         获取道路纹理（原始尺寸）
123 |         
124 |         Returns:
125 |             道路纹理图片
126 |         """
127 |         return self.get_texture('road')
128 |     
129 |     @staticmethod
130 |     def _resize_keep_aspect_ratio(img: Image.Image, target_size: int) -> Image.Image:
131 |         """
132 |         调整图片尺寸，保持长宽比
133 |         
134 |         Args:
135 |             img: 原始图片
136 |             target_size: 目标尺寸（正方形边长）
137 |             
138 |         Returns:
139 |             调整后的图片
140 |         """
141 |         # 获取原始尺寸
142 |         width, height = img.size
143 |         
144 |         # 计算缩放比例
145 |         if width > height:
146 |             new_width = target_size
147 |             new_height = int(height * target_size / width)
148 |         else:
149 |             new_height = target_size
150 |             new_width = int(width * target_size / height)
151 |         
152 |         # 调整尺寸
153 |         resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
154 |         
155 |         # 创建正方形画布，居中放置
156 |         canvas = Image.new('RGBA', (target_size, target_size), (0, 0, 0, 0))
157 |         offset_x = (target_size - new_width) // 2
158 |         offset_y = (target_size - new_height) // 2
159 |         canvas.paste(resized, (offset_x, offset_y), resized)
160 |         
161 |         return canvas
162 |     
163 |     def validate_textures(self) -> tuple[bool, list]:
164 |         """
165 |         验证所有必需纹理是否存在
166 |         
167 |         Returns:
168 |             (是否全部存在, 缺失的纹理列表)
169 |         """
170 |         missing = []
171 |         for texture_name in self.REQUIRED_TEXTURES:
172 |             if not self.has_texture(texture_name):
173 |                 missing.append(texture_name)
174 |         
175 |         return len(missing) == 0, missing
176 | 
177 | 
178 | # 全局缓存
179 | _texture_handlers = {}
180 | 
181 | 
182 | def get_texture_handler(assets_folder: Optional[str] = None) -> PathFinderTextureHandler:
183 |     """
184 |     获取或创建缓存的纹理处理器
185 |     
186 |     Args:
187 |         assets_folder: 纹理文件夹路径
188 |         
189 |     Returns:
190 |         纹理处理器实例
191 |     """
192 |     if assets_folder is None:
193 |         assets_folder = str(Path(__file__).parent / "assets")
194 |     
195 |     cache_key = str(assets_folder)
196 |     
197 |     if cache_key not in _texture_handlers:
198 |         handler = PathFinderTextureHandler(assets_folder)
199 |         handler.load_textures()
200 |         _texture_handlers[cache_key] = handler
201 |     
202 |     return _texture_handlers[cache_key]
203 | 
204 | 


--------------------------------------------------------------------------------