├── games ├── maze │ ├── __init__.py │ ├── utils │ │ ├── __init__.py │ │ └── file_utils.py │ ├── generators │ │ ├── __init__.py │ │ ├── video_gen.py │ │ ├── image_gen.py │ │ ├── state_gen.py │ │ ├── data_gen.py │ │ └── maze_gen.py │ ├── README.md │ ├── __main__.py │ ├── constants.py │ ├── templates │ │ ├── __init__.py │ │ ├── turn_count.py │ │ ├── player_position.py │ │ ├── goal_position.py │ │ ├── base_template.py │ │ ├── find_path_to_goal.py │ │ ├── position_after_moving.py │ │ └── available_directions.py │ ├── config.py │ ├── test_skin.py │ ├── main.py │ └── default_textures.py ├── __init__.py ├── trapfield │ ├── __init__.py │ ├── config.py │ └── constants.py ├── maze3d │ ├── __init__.py │ └── color_handler.py ├── pathfinder │ ├── __init__.py │ ├── constants.py │ ├── board.py │ └── texture_handler.py └── sokoban │ ├── __init__.py │ └── config.py ├── AutoEnv ├── base │ ├── __init__.py │ ├── engine │ │ ├── __init__.py │ │ └── cost_monitor.py │ ├── utils │ │ ├── __init__.py │ │ └── image.py │ └── pipeline │ │ ├── __init__.py │ │ ├── base_node.py │ │ └── base_pipeline.py ├── config │ └── env_skin_gen.yaml ├── autoenv │ └── pipeline │ │ ├── __init__.py │ │ └── visual │ │ ├── __init__.py │ │ ├── prompt.py │ │ └── pipeline.py └── LICENSE ├── skins ├── maze │ ├── 1 │ │ ├── wall.png │ │ ├── floor.png │ │ ├── player.png │ │ ├── target.png │ │ └── description.json │ ├── 2 │ │ ├── wall.png │ │ ├── floor.png │ │ ├── player.png │ │ ├── target.png │ │ └── description.json │ ├── 3 │ │ ├── wall.png │ │ ├── floor.png │ │ ├── player.png │ │ ├── target.png │ │ └── description.json │ ├── 4 │ │ ├── wall.png │ │ ├── floor.png │ │ ├── player.png │ │ ├── target.png │ │ └── description.json │ └── 5 │ │ ├── wall.png │ │ ├── floor.png │ │ ├── player.png │ │ ├── target.png │ │ └── description.json ├── sokoban │ ├── 1 │ │ ├── box.png │ │ ├── floor.png │ │ ├── player.png │ │ ├── target.png │ │ ├── wall.png │ │ └── description.json │ ├── 2 │ │ ├── box.png │ │ ├── floor.png │ │ ├── player.png │ │ ├── target.png │ │ ├── wall.png │ │ └── description.json │ ├── 3 │ │ ├── box.png │ │ ├── floor.png │ │ ├── player.png │ │ ├── target.png │ │ ├── wall.png │ │ └── description.json │ ├── 4 │ │ ├── box.png │ │ ├── floor.png │ │ ├── player.png │ │ ├── target.png │ │ ├── wall.png │ │ └── description.json │ └── 5 │ │ ├── box.jpg │ │ ├── floor.jpg │ │ ├── player.jpg │ │ ├── target.jpg │ │ ├── wall.jpg │ │ └── description.json ├── pathfinder │ ├── 1 │ │ ├── end.png │ │ ├── road.png │ │ ├── start.png │ │ └── description.json │ ├── 2 │ │ ├── end.png │ │ ├── road.png │ │ ├── start.png │ │ └── description.json │ ├── 3 │ │ ├── end.png │ │ ├── road.png │ │ ├── start.png │ │ └── description.json │ └── 4 │ │ ├── end.png │ │ ├── road.png │ │ ├── start.png │ │ └── description.json ├── trapfield │ ├── 1 │ │ ├── goal.png │ │ ├── trap.png │ │ ├── floor.png │ │ ├── player.png │ │ └── description.json │ ├── 2 │ │ ├── goal.png │ │ ├── trap.png │ │ ├── floor.png │ │ ├── player.png │ │ └── description.json │ ├── 3 │ │ ├── goal.png │ │ ├── trap.png │ │ ├── floor.png │ │ ├── player.png │ │ └── description.json │ └── 4 │ │ ├── goal.png │ │ ├── trap.png │ │ ├── floor.png │ │ ├── player.png │ │ └── description.json └── maze3d │ ├── 1 │ ├── colors.json │ └── description.json │ ├── 2 │ ├── colors.json │ └── description.json │ ├── 3 │ ├── colors.json │ └── description.json │ └── 4 │ ├── colors.json │ └── description.json ├── scripts ├── vlm_evaluate.sh ├── generate_by_skins.sh ├── generate_videos.sh ├── Wan2.2-TI2V-5B_lora.py ├── start_sglang_server.sh └── videomodel_evaluate.sh ├── core ├── schema │ ├── __init__.py │ ├── grid.py │ ├── render.py │ ├── entity.py │ ├── position.py │ └── state.py ├── __init__.py ├── constants.py ├── game_adapter.py └── texture_handler.py ├── utils ├── __init__.py └── video_metadata.py ├── generation └── __init__.py ├── evaluation ├── vlm_eval │ ├── executors │ │ ├── __init__.py │ │ ├── maze_executor.py │ │ └── trapfield_executor.py │ ├── __init__.py │ ├── game_executor.py │ ├── prompts │ │ ├── trapfield_prompt.py │ │ ├── maze_prompt.py │ │ ├── sokoban_prompt.py │ │ ├── pathfinder_prompt.py │ │ ├── maze3d_prompt.py │ │ └── __init__.py │ ├── vlm_client.py │ ├── MODEL_CONFIG.md │ ├── action_metrics.py │ ├── run_vlm_eval.py │ ├── recalculate_avg_step.py │ └── action_utils.py ├── videomodel_eval │ ├── __init__.py │ └── evaluator.py └── __init__.py ├── config ├── config_maze.yaml ├── vlm │ ├── maze3d_eval.yaml │ ├── sokoban_eval.yaml │ ├── trapfield_eval.yaml │ ├── maze_eval.yaml │ └── pathfinder_eval.yaml ├── config_trapfield.yaml ├── config_3d_maze.yaml ├── config_pathfinder.yaml └── config.yaml ├── .env.example ├── LICENSE ├── requirements.txt ├── .gitignore ├── prompts ├── videomodel_pathfinder_prompt.py ├── videomodel_maze_prompt.py ├── videomodel_trapfield_prompt.py ├── videomodel_sokoban_prompt.py ├── videomodel_maze3d_prompt.py ├── __init__.py └── METADATA_USAGE.md └── dataset_init.py /games/maze/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /games/maze/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /games/maze/generators/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /AutoEnv/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Base package initializer 2 | -------------------------------------------------------------------------------- /AutoEnv/base/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Engine subpackage 2 | -------------------------------------------------------------------------------- /AutoEnv/base/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Utils subpackage 2 | -------------------------------------------------------------------------------- /AutoEnv/base/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | # Pipeline subpackage 2 | -------------------------------------------------------------------------------- /games/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Game modules using unified core. 3 | """ 4 | 5 | -------------------------------------------------------------------------------- /games/maze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/games/maze/README.md -------------------------------------------------------------------------------- /skins/maze/1/wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/1/wall.png -------------------------------------------------------------------------------- /skins/maze/2/wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/2/wall.png -------------------------------------------------------------------------------- /skins/maze/3/wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/3/wall.png -------------------------------------------------------------------------------- /skins/maze/4/wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/4/wall.png -------------------------------------------------------------------------------- /skins/maze/5/wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/5/wall.png -------------------------------------------------------------------------------- /skins/maze/1/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/1/floor.png -------------------------------------------------------------------------------- /skins/maze/1/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/1/player.png -------------------------------------------------------------------------------- /skins/maze/1/target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/1/target.png -------------------------------------------------------------------------------- /skins/maze/2/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/2/floor.png -------------------------------------------------------------------------------- /skins/maze/2/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/2/player.png -------------------------------------------------------------------------------- /skins/maze/2/target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/2/target.png -------------------------------------------------------------------------------- /skins/maze/3/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/3/floor.png -------------------------------------------------------------------------------- /skins/maze/3/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/3/player.png -------------------------------------------------------------------------------- /skins/maze/3/target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/3/target.png -------------------------------------------------------------------------------- /skins/maze/4/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/4/floor.png -------------------------------------------------------------------------------- /skins/maze/4/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/4/player.png -------------------------------------------------------------------------------- /skins/maze/4/target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/4/target.png -------------------------------------------------------------------------------- /skins/maze/5/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/5/floor.png -------------------------------------------------------------------------------- /skins/maze/5/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/5/player.png -------------------------------------------------------------------------------- /skins/maze/5/target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/maze/5/target.png -------------------------------------------------------------------------------- /skins/sokoban/1/box.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/1/box.png -------------------------------------------------------------------------------- /skins/sokoban/2/box.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/2/box.png -------------------------------------------------------------------------------- /skins/sokoban/3/box.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/3/box.png -------------------------------------------------------------------------------- /skins/sokoban/4/box.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/4/box.png -------------------------------------------------------------------------------- /skins/sokoban/5/box.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/5/box.jpg -------------------------------------------------------------------------------- /skins/pathfinder/1/end.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/1/end.png -------------------------------------------------------------------------------- /skins/pathfinder/2/end.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/2/end.png -------------------------------------------------------------------------------- /skins/pathfinder/3/end.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/3/end.png -------------------------------------------------------------------------------- /skins/pathfinder/4/end.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/4/end.png -------------------------------------------------------------------------------- /skins/sokoban/1/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/1/floor.png -------------------------------------------------------------------------------- /skins/sokoban/1/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/1/player.png -------------------------------------------------------------------------------- /skins/sokoban/1/target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/1/target.png -------------------------------------------------------------------------------- /skins/sokoban/1/wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/1/wall.png -------------------------------------------------------------------------------- /skins/sokoban/2/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/2/floor.png -------------------------------------------------------------------------------- /skins/sokoban/2/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/2/player.png -------------------------------------------------------------------------------- /skins/sokoban/2/target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/2/target.png -------------------------------------------------------------------------------- /skins/sokoban/2/wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/2/wall.png -------------------------------------------------------------------------------- /skins/sokoban/3/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/3/floor.png -------------------------------------------------------------------------------- /skins/sokoban/3/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/3/player.png -------------------------------------------------------------------------------- /skins/sokoban/3/target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/3/target.png -------------------------------------------------------------------------------- /skins/sokoban/3/wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/3/wall.png -------------------------------------------------------------------------------- /skins/sokoban/4/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/4/floor.png -------------------------------------------------------------------------------- /skins/sokoban/4/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/4/player.png -------------------------------------------------------------------------------- /skins/sokoban/4/target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/4/target.png -------------------------------------------------------------------------------- /skins/sokoban/4/wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/4/wall.png -------------------------------------------------------------------------------- /skins/sokoban/5/floor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/5/floor.jpg -------------------------------------------------------------------------------- /skins/sokoban/5/player.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/5/player.jpg -------------------------------------------------------------------------------- /skins/sokoban/5/target.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/5/target.jpg -------------------------------------------------------------------------------- /skins/sokoban/5/wall.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/sokoban/5/wall.jpg -------------------------------------------------------------------------------- /skins/trapfield/1/goal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/1/goal.png -------------------------------------------------------------------------------- /skins/trapfield/1/trap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/1/trap.png -------------------------------------------------------------------------------- /skins/trapfield/2/goal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/2/goal.png -------------------------------------------------------------------------------- /skins/trapfield/2/trap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/2/trap.png -------------------------------------------------------------------------------- /skins/trapfield/3/goal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/3/goal.png -------------------------------------------------------------------------------- /skins/trapfield/3/trap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/3/trap.png -------------------------------------------------------------------------------- /skins/trapfield/4/goal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/4/goal.png -------------------------------------------------------------------------------- /skins/trapfield/4/trap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/4/trap.png -------------------------------------------------------------------------------- /skins/pathfinder/1/road.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/1/road.png -------------------------------------------------------------------------------- /skins/pathfinder/1/start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/1/start.png -------------------------------------------------------------------------------- /skins/pathfinder/2/road.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/2/road.png -------------------------------------------------------------------------------- /skins/pathfinder/2/start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/2/start.png -------------------------------------------------------------------------------- /skins/pathfinder/3/road.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/3/road.png -------------------------------------------------------------------------------- /skins/pathfinder/3/start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/3/start.png -------------------------------------------------------------------------------- /skins/pathfinder/4/road.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/4/road.png -------------------------------------------------------------------------------- /skins/pathfinder/4/start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/pathfinder/4/start.png -------------------------------------------------------------------------------- /skins/trapfield/1/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/1/floor.png -------------------------------------------------------------------------------- /skins/trapfield/1/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/1/player.png -------------------------------------------------------------------------------- /skins/trapfield/2/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/2/floor.png -------------------------------------------------------------------------------- /skins/trapfield/2/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/2/player.png -------------------------------------------------------------------------------- /skins/trapfield/3/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/3/floor.png -------------------------------------------------------------------------------- /skins/trapfield/3/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/3/player.png -------------------------------------------------------------------------------- /skins/trapfield/4/floor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/4/floor.png -------------------------------------------------------------------------------- /skins/trapfield/4/player.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoundationAgents/VR-Bench/HEAD/skins/trapfield/4/player.png -------------------------------------------------------------------------------- /games/maze/__main__.py: -------------------------------------------------------------------------------- 1 | from .main import main 2 | 3 | if __name__ == "__main__": 4 | raise SystemExit(main()) 5 | -------------------------------------------------------------------------------- /games/trapfield/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | TrapField Game - 陷阱场游戏 3 | 类似迷宫,但没有外围墙,内部墙换成陷阱 4 | """ 5 | 6 | __version__ = "1.0.0" 7 | 8 | -------------------------------------------------------------------------------- /scripts/vlm_evaluate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CONFIG_FILE=config/vlm/$your_config$.yaml 4 | 5 | python -m evaluation.vlm_eval.run_vlm_eval "$CONFIG_FILE" 6 | 7 | -------------------------------------------------------------------------------- /games/maze3d/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Maze3D 游戏模块 3 | """ 4 | 5 | from .main import QAGenerator, PuzzleGenerator 6 | 7 | __all__ = ['QAGenerator', 'PuzzleGenerator'] 8 | 9 | -------------------------------------------------------------------------------- /skins/maze3d/1/colors.json: -------------------------------------------------------------------------------- 1 | { 2 | "start_pos": "#4444FF", 3 | "goal_pos": "#FF4444", 4 | "default_cube": "#888888", 5 | "ball": "#FFD700", 6 | "ball_edge": "#FF8C00" 7 | } 8 | 9 | -------------------------------------------------------------------------------- /skins/maze3d/2/colors.json: -------------------------------------------------------------------------------- 1 | { 2 | "start_pos": "#750725", 3 | "goal_pos": "#0DC27D", 4 | "default_cube": "#34495E", 5 | "ball": "#9C0BD0", 6 | "ball_edge": "#C40E75" 7 | } 8 | 9 | -------------------------------------------------------------------------------- /skins/maze3d/3/colors.json: -------------------------------------------------------------------------------- 1 | { 2 | "start_pos": "#00FF00", 3 | "goal_pos": "#FF00FF", 4 | "default_cube": "#CCCCCC", 5 | "ball": "#00FFFF", 6 | "ball_edge": "#0088FF" 7 | } 8 | 9 | -------------------------------------------------------------------------------- /skins/maze3d/4/colors.json: -------------------------------------------------------------------------------- 1 | { 2 | "start_pos": "#FF6B6B", 3 | "goal_pos": "#4E5DCD", 4 | "default_cube": "#95E1D3", 5 | "ball": "#34F66B", 6 | "ball_edge": "#0AABA3" 7 | } 8 | 9 | -------------------------------------------------------------------------------- /skins/pathfinder/1/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "pathfinder", 3 | "skin_id": "1", 4 | "visual_description": { 5 | "start": "green circle", 6 | "end": "red circle", 7 | "road": "white" 8 | } 9 | } -------------------------------------------------------------------------------- /skins/pathfinder/3/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "pathfinder", 3 | "skin_id": "3", 4 | "visual_description": { 5 | "start": "blue ice ball", 6 | "end": "blue circle", 7 | "road": "icy" 8 | } 9 | } -------------------------------------------------------------------------------- /scripts/generate_by_skins.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 根据皮肤批量生成关卡 3 | # 用法: ./scripts/generate_by_skins.sh [config_file] 4 | 5 | CONFIG_FILE=${1:-config/config.yaml} 6 | 7 | python generation/batch_generate.py "$CONFIG_FILE" 8 | 9 | -------------------------------------------------------------------------------- /skins/pathfinder/2/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "pathfinder", 3 | "skin_id": "2", 4 | "visual_description": { 5 | "start": "white golf ball", 6 | "end": "golf hole", 7 | "road": "green grass" 8 | } 9 | } -------------------------------------------------------------------------------- /skins/maze/1/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "maze", 3 | "skin_id": "1", 4 | "visual_description": { 5 | "player": "red circle", 6 | "goal": "green square", 7 | "wall": "light blue square", 8 | "floor": "white square" 9 | } 10 | } -------------------------------------------------------------------------------- /skins/maze/2/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "maze", 3 | "skin_id": "2", 4 | "visual_description": { 5 | "player": "white rabbit", 6 | "goal": "orange carrots", 7 | "wall": "gray rock", 8 | "floor": "green grass tiles" 9 | } 10 | } -------------------------------------------------------------------------------- /skins/maze/5/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "maze", 3 | "skin_id": "5", 4 | "visual_description": { 5 | "player": "green circle", 6 | "goal": "red circle", 7 | "wall": "blue potion bottle", 8 | "floor": "white square" 9 | } 10 | } -------------------------------------------------------------------------------- /skins/pathfinder/4/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "pathfinder", 3 | "skin_id": "4", 4 | "visual_description": { 5 | "start": "white boat with yellow dots", 6 | "end": "white flag with yellow dots", 7 | "road": "blue water tiles" 8 | } 9 | } -------------------------------------------------------------------------------- /skins/trapfield/1/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "trapfield", 3 | "skin_id": "1", 4 | "visual_description": { 5 | "player": "blue circle", 6 | "goal": "green circle", 7 | "trap": "red x", 8 | "floor": "white square" 9 | } 10 | } -------------------------------------------------------------------------------- /games/pathfinder/__init__.py: -------------------------------------------------------------------------------- 1 | """PathFinder Game - A path finding puzzle game with Bezier curve roads.""" 2 | 3 | from .board import PathFinderBoard 4 | from .generator import generate_pathfinder_board 5 | 6 | __all__ = ['PathFinderBoard', 'generate_pathfinder_board'] 7 | 8 | -------------------------------------------------------------------------------- /skins/trapfield/4/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "trapfield", 3 | "skin_id": "4", 4 | "visual_description": { 5 | "player": "gray robot", 6 | "goal": "golden star", 7 | "trap": "blue crystal block", 8 | "floor": "silver metal plate" 9 | } 10 | } -------------------------------------------------------------------------------- /skins/maze/3/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "maze", 3 | "skin_id": "3", 4 | "visual_description": { 5 | "player": "blue robot head", 6 | "goal": "green and yellow tile", 7 | "wall": "black brick wall", 8 | "floor": "gray decorative tile" 9 | } 10 | } -------------------------------------------------------------------------------- /skins/maze/4/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "maze", 3 | "skin_id": "4", 4 | "visual_description": { 5 | "player": "anime schoolgirl character", 6 | "goal": "green square", 7 | "wall": "gray stone wall", 8 | "floor": "wooden floor tiles" 9 | } 10 | } -------------------------------------------------------------------------------- /skins/trapfield/3/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "trapfield", 3 | "skin_id": "3", 4 | "visual_description": { 5 | "player": "blue adventurer", 6 | "goal": "golden eagle emblem", 7 | "trap": "fiery explosion", 8 | "floor": "gray stone bricks" 9 | } 10 | } -------------------------------------------------------------------------------- /skins/trapfield/2/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "trapfield", 3 | "skin_id": "2", 4 | "visual_description": { 5 | "player": "blue parka explorer and penguin", 6 | "goal": "red flag", 7 | "trap": "blue water pool", 8 | "floor": "blue ice crystals" 9 | } 10 | } -------------------------------------------------------------------------------- /skins/maze3d/1/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "maze3d", 3 | "skin_id": "1", 4 | "visual_description": { 5 | "start_cube": "blue cube", 6 | "goal_cube": "red cube", 7 | "default_cube": "gray cube", 8 | "ball": "golden ball with orange edge" 9 | } 10 | } 11 | 12 | -------------------------------------------------------------------------------- /skins/sokoban/1/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "sokoban", 3 | "skin_id": "1", 4 | "visual_description": { 5 | "player": "blue circle", 6 | "goal": "pink square", 7 | "box": "yellow square", 8 | "wall": "gray square", 9 | "floor": "white square" 10 | } 11 | } -------------------------------------------------------------------------------- /AutoEnv/config/env_skin_gen.yaml: -------------------------------------------------------------------------------- 1 | # Image generation model (required) 2 | image_model: "gemini-2.5-flash-image" 3 | 4 | # Maze mode configuration 5 | maze_type: "maze" 6 | theme: "ancient stone with moss and cracks" 7 | 8 | # Output directory 9 | envs_root_path: "workspace/envs" 10 | 11 | -------------------------------------------------------------------------------- /skins/maze3d/3/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "maze3d", 3 | "skin_id": "3", 4 | "visual_description": { 5 | "start_cube": "bright green cube", 6 | "goal_cube": "magenta cube", 7 | "default_cube": "light gray cube", 8 | "ball": "cyan ball with blue edge" 9 | } 10 | } 11 | 12 | -------------------------------------------------------------------------------- /skins/sokoban/5/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "sokoban", 3 | "skin_id": "5", 4 | "visual_description": { 5 | "player": "brown-haired character", 6 | "goal": "green x", 7 | "box": "wooden crate", 8 | "wall": "orange floor tiles", 9 | "floor": "beige sand texture" 10 | } 11 | } -------------------------------------------------------------------------------- /skins/maze3d/4/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "maze3d", 3 | "skin_id": "4", 4 | "visual_description": { 5 | "start_cube": "coral red cube", 6 | "goal_cube": "royal blue cube", 7 | "default_cube": "mint green cube", 8 | "ball": "bright green ball with teal edge" 9 | } 10 | } 11 | 12 | -------------------------------------------------------------------------------- /skins/maze3d/2/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "maze3d", 3 | "skin_id": "2", 4 | "visual_description": { 5 | "start_cube": "dark maroon cube", 6 | "goal_cube": "teal green cube", 7 | "default_cube": "dark slate blue cube", 8 | "ball": "purple ball with magenta edge" 9 | } 10 | } 11 | 12 | -------------------------------------------------------------------------------- /skins/sokoban/4/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "sokoban", 3 | "skin_id": "4", 4 | "visual_description": { 5 | "player": "purple wizard", 6 | "goal": "blue magic circle", 7 | "box": "purple wooden crate", 8 | "wall": "purple rune stones", 9 | "floor": "gray stone bricks" 10 | } 11 | } -------------------------------------------------------------------------------- /skins/sokoban/2/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "sokoban", 3 | "skin_id": "2", 4 | "visual_description": { 5 | "player": "blue worker with red pants", 6 | "goal": "golden square pattern", 7 | "box": "wooden crate", 8 | "wall": "rusty metal door", 9 | "floor": "gray stone tiles" 10 | } 11 | } -------------------------------------------------------------------------------- /skins/sokoban/3/description.json: -------------------------------------------------------------------------------- 1 | { 2 | "game_type": "sokoban", 3 | "skin_id": "3", 4 | "visual_description": { 5 | "player": "brown-haired boy with a box", 6 | "goal": "yellow and green checkered circle", 7 | "box": "wooden crate", 8 | "wall": "brown brick wall", 9 | "floor": "gray stone floor" 10 | } 11 | } -------------------------------------------------------------------------------- /core/schema/__init__.py: -------------------------------------------------------------------------------- 1 | from .position import Position, BBox 2 | from .entity import Entity 3 | from .grid import Grid 4 | from .render import RenderConfig 5 | from .state import UnifiedState 6 | 7 | __all__ = [ 8 | 'Position', 9 | 'BBox', 10 | 'Entity', 11 | 'Grid', 12 | 'RenderConfig', 13 | 'UnifiedState', 14 | ] 15 | 16 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | VR-Bench 工具模块 3 | """ 4 | 5 | from .video_processor import ( 6 | VideoProcessor, 7 | normalize_video, 8 | resize_video_to_frames, 9 | get_video_info, 10 | ) 11 | 12 | __all__ = [ 13 | 'VideoProcessor', 14 | 'normalize_video', 15 | 'resize_video_to_frames', 16 | 'get_video_info', 17 | ] 18 | 19 | -------------------------------------------------------------------------------- /generation/__init__.py: -------------------------------------------------------------------------------- 1 | """数据生成系统""" 2 | 3 | from generation.path_finder import ( 4 | find_optimal_paths, 5 | find_maze_paths, 6 | find_trapfield_paths, 7 | find_pathfinder_paths 8 | ) 9 | 10 | __all__ = [ 11 | 'find_optimal_paths', 12 | 'find_maze_paths', 13 | 'find_trapfield_paths', 14 | 'find_pathfinder_paths' 15 | ] 16 | -------------------------------------------------------------------------------- /AutoEnv/base/utils/image.py: -------------------------------------------------------------------------------- 1 | """Image utilities.""" 2 | 3 | import base64 4 | from pathlib import Path 5 | 6 | 7 | def save_base64_image(img_b64: str, path: Path) -> None: 8 | """Save a base64-encoded image to file.""" 9 | path.parent.mkdir(parents=True, exist_ok=True) 10 | img_bytes = base64.b64decode(img_b64) 11 | path.write_bytes(img_bytes) 12 | 13 | -------------------------------------------------------------------------------- /games/trapfield/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | TrapField 游戏配置 3 | """ 4 | 5 | from .constants import EMPTY_CELL, TRAP_CELL, PLAYER_CELL, GOAL_CELL 6 | 7 | # 需要的纹理文件 8 | REQUIRED_TEXTURES = ['floor', 'trap', 'player', 'goal'] 9 | 10 | # 单元格到图层的映射 11 | CELL_LAYER_MAP = { 12 | EMPTY_CELL: 'floor', 13 | TRAP_CELL: 'trap', 14 | PLAYER_CELL: 'player', 15 | GOAL_CELL: 'goal' 16 | } 17 | 18 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/executors/__init__.py: -------------------------------------------------------------------------------- 1 | from .maze_executor import MazeExecutor 2 | from .sokoban_executor import SokobanExecutor 3 | from .trapfield_executor import TrapFieldExecutor 4 | from .pathfinder_executor import PathfinderExecutor 5 | from .maze3d_executor import Maze3DExecutor 6 | 7 | __all__ = ['MazeExecutor', 'SokobanExecutor', 'TrapFieldExecutor', 'PathfinderExecutor', 'Maze3DExecutor'] 8 | 9 | -------------------------------------------------------------------------------- /games/maze/constants.py: -------------------------------------------------------------------------------- 1 | EMPTY_CELL = 0 2 | WALL_CELL = 1 3 | PLAYER_CELL = 2 4 | GOAL_CELL = 3 5 | 6 | CELL_SIZE = 30 7 | 8 | IMAGES_DIR = "images" 9 | STATES_DIR = "states" 10 | VIDEOS_DIR = "video" 11 | DATA_PATH = "data.json" 12 | 13 | ALLOWED_SIZES = [9, 11, 13] 14 | SIZE_LABELS = ["Small", "Medium", "Large"] 15 | 16 | PLOT_LEVELS = { 17 | 9: "Easy", 18 | 11: "Medium", 19 | 13: "Hard", 20 | } 21 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/__init__.py: -------------------------------------------------------------------------------- 1 | from .vlm_client import VLMClient 2 | from .vlm_evaluator import VLMEvaluator 3 | from .game_executor import GameExecutor 4 | from .action_utils import parse_actions 5 | from .action_metrics import calculate_sr, calculate_pr, calculate_mr 6 | 7 | __all__ = [ 8 | 'VLMClient', 9 | 'VLMEvaluator', 10 | 'GameExecutor', 11 | 'parse_actions', 12 | 'calculate_sr', 13 | 'calculate_pr', 14 | 'calculate_mr', 15 | ] 16 | 17 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Core module for game rendering and texture handling. 3 | Shared by all games (Sokoban, Maze, etc.) 4 | """ 5 | 6 | from .constants import * 7 | from .texture_handler import BaseTextureHandler 8 | from .renderer import BaseRenderer 9 | 10 | __all__ = [ 11 | 'BaseTextureHandler', 12 | 'BaseRenderer', 13 | 'EMPTY', 14 | 'WALL', 15 | 'PLAYER', 16 | 'TARGET', 17 | 'BOX', 18 | 'BOX_ON_TARGET', 19 | 'PLAYER_ON_TARGET', 20 | ] 21 | 22 | -------------------------------------------------------------------------------- /games/maze/templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .available_directions import AvailableDirections 2 | from .player_position import PlayerPosition 3 | from .goal_position import GoalPosition 4 | from .position_after_moving import PositionAfterMoving 5 | from .find_path_to_goal import FindPathToGoal 6 | from .turn_count import TurnCount 7 | 8 | __all__ = [ 9 | "AvailableDirections", 10 | "PlayerPosition", 11 | "GoalPosition", 12 | "PositionAfterMoving", 13 | "FindPathToGoal", 14 | "TurnCount", 15 | ] 16 | -------------------------------------------------------------------------------- /AutoEnv/autoenv/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AutoEnv Pipeline Module 3 | Maze mode visual pipeline exports. 4 | """ 5 | 6 | from autoenv.pipeline.visual import ( 7 | AnalyzerNode, 8 | AssetGeneratorNode, 9 | AutoEnvContext, 10 | BackgroundRemovalNode, 11 | StrategistNode, 12 | VisualPipeline, 13 | ) 14 | 15 | __all__ = [ 16 | "VisualPipeline", 17 | "AutoEnvContext", 18 | "AnalyzerNode", 19 | "StrategistNode", 20 | "AssetGeneratorNode", 21 | "BackgroundRemovalNode", 22 | ] 23 | -------------------------------------------------------------------------------- /config/config_maze.yaml: -------------------------------------------------------------------------------- 1 | # Maze 数据集生成配置 2 | 3 | # 游戏类型 4 | game_type: "maze" 5 | 6 | # 皮肤文件夹根目录 7 | skins_root: "skins/maze" 8 | # 输出根目录 9 | output_root: "dataset_output/generated_levels_maze" 10 | 11 | # Maze 难度配置 12 | difficulties: 13 | easy: 14 | maze_size: 7 15 | count: 120 16 | 17 | medium: 18 | maze_size: 11 19 | count: 120 20 | 21 | hard: 22 | maze_size: 15 23 | count: 120 24 | 25 | # 生成配置 26 | generation: 27 | fps: 24 28 | max_duplicate_retries: 100 29 | 30 | # 并行配置 31 | parallel: 32 | max_workers: 4 33 | 34 | -------------------------------------------------------------------------------- /games/maze/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Maze game configuration. 3 | """ 4 | 5 | from core.constants import * 6 | 7 | # Required textures for Maze 8 | REQUIRED_TEXTURES = ['floor', 'wall', 'player', 'target'] 9 | 10 | # Cell type to layer and texture mapping 11 | # Format: cell_value -> (layer, texture_name) 12 | # Layer 1: floor (handled separately) 13 | # Layer 2: walls and targets (goals) 14 | # Layer 3: player 15 | CELL_LAYER_MAP = { 16 | EMPTY_CELL: (0, None), 17 | WALL_CELL: (2, 'wall'), 18 | PLAYER_CELL: (3, 'player'), 19 | GOAL_CELL: (2, 'target') 20 | } 21 | 22 | -------------------------------------------------------------------------------- /scripts/generate_videos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 为数据集生成所有最优路径的视频 3 | # 用法: ./scripts/generate_videos.sh [workers] 4 | # 示例: ./scripts/generate_videos.sh dataset/maze/1 5 | 6 | DATASET_DIR=${1:-dataset/maze/1/easy} 7 | WORKERS=${2:-8} 8 | SKIN=/data/pengyiran/cvpr_v1/skins/maze/1 9 | if [ ! -d "$DATASET_DIR" ]; then 10 | echo "错误: 数据集目录不存在: $DATASET_DIR" 11 | exit 1 12 | fi 13 | 14 | python generation/generate_videos.py \ 15 | "$DATASET_DIR" \ 16 | --workers "$WORKERS" \ 17 | --skin "$SKIN" \ 18 | 2>&1 | grep -v "^Processing" | grep -v "^Frame" 19 | 20 | -------------------------------------------------------------------------------- /AutoEnv/autoenv/pipeline/visual/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Visual Pipeline Module 3 | Maze mode skin generation pipeline. 4 | """ 5 | 6 | from autoenv.pipeline.visual.nodes import ( 7 | AnalyzerNode, 8 | AssetGeneratorNode, 9 | AutoEnvContext, 10 | BackgroundRemovalNode, 11 | StrategistNode, 12 | ) 13 | from autoenv.pipeline.visual.pipeline import VisualPipeline 14 | 15 | __all__ = [ 16 | "AnalyzerNode", 17 | "AssetGeneratorNode", 18 | "AutoEnvContext", 19 | "BackgroundRemovalNode", 20 | "StrategistNode", 21 | "VisualPipeline", 22 | ] 23 | -------------------------------------------------------------------------------- /games/sokoban/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sokoban game module. 3 | 4 | This module provides Sokoban game logic, rendering, and utilities. 5 | """ 6 | 7 | from .board import SokobanBoard, generate_random_board 8 | from .textured_board import TexturedSokobanBoard, generate_textured_random_board, get_shared_texture_handler 9 | from .renderer import SokobanRenderer, get_shared_renderer 10 | 11 | __all__ = [ 12 | 'SokobanBoard', 13 | 'generate_random_board', 14 | 'TexturedSokobanBoard', 15 | 'generate_textured_random_board', 16 | 'get_shared_texture_handler', 17 | 'SokobanRenderer', 18 | 'get_shared_renderer', 19 | ] 20 | 21 | -------------------------------------------------------------------------------- /config/vlm/maze3d_eval.yaml: -------------------------------------------------------------------------------- 1 | game: maze3d 2 | dataset: dataset/maze3d_new/1 3 | output: vlm_eval_results/maze3d 4 | 5 | models: 6 | # API 模型示例 7 | - name: Qwen/Qwen2.5-VL-7B-Instruct 8 | type: api 9 | base_url: http://localhost:8123/v1 10 | max_tokens: 10000 11 | temperature: 1.0 12 | 13 | - name: gpt-5 14 | type: api 15 | base_url: https://newapi.deepwisdom.ai/v1 16 | max_tokens: 60000 17 | temperature: 1.0 18 | - name: gemini-2.5-pro 19 | type: api 20 | base_url: https://newapi.deepwisdom.ai/v1 21 | max_tokens: 60000 22 | temperature: 1.0 23 | 24 | workers: 10 25 | max_levels: -1 26 | 27 | -------------------------------------------------------------------------------- /config/vlm/sokoban_eval.yaml: -------------------------------------------------------------------------------- 1 | game: sokoban 2 | dataset: dataset/sokoban/1 3 | output: vlm_eval_results/sokoban 4 | 5 | models: 6 | - name: gpt-5 7 | type: api 8 | base_url: https://newapi.deepwisdom.ai/v1 9 | max_tokens: 60000 10 | temperature: 1.0 11 | - name: gemini-2.5-pro 12 | type: api 13 | base_url: https://newapi.deepwisdom.ai/v1 14 | max_tokens: 60000 15 | temperature: 1.0 16 | - name: Qwen/Qwen2.5-VL-7B-Instruct 17 | type: api 18 | base_url: http://localhost:8123/v1 19 | max_tokens: 10000 20 | temperature: 1.0 21 | 22 | workers: 10 23 | max_levels: -1 24 | assets_folder: skins/sokoban/1 25 | 26 | -------------------------------------------------------------------------------- /config/vlm/trapfield_eval.yaml: -------------------------------------------------------------------------------- 1 | game: trapfield 2 | dataset: dataset/trapfield/1 3 | output: vlm_eval_results/trapfield 4 | 5 | models: 6 | - name: gpt-5 7 | type: api 8 | base_url: https://newapi.deepwisdom.ai/v1 9 | max_tokens: 60000 10 | temperature: 1.0 11 | - name: gemini-2.5-pro 12 | type: api 13 | base_url: https://newapi.deepwisdom.ai/v1 14 | max_tokens: 60000 15 | temperature: 1.0 16 | - name: Qwen/Qwen2.5-VL-7B-Instruct 17 | type: api 18 | base_url: http://localhost:8123/v1 19 | max_tokens: 10000 20 | temperature: 1.0 21 | 22 | workers: 10 23 | max_levels: -1 24 | assets_folder: skins/trapfield/1 25 | 26 | -------------------------------------------------------------------------------- /games/trapfield/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | TrapField 游戏常量定义 3 | """ 4 | 5 | # 单元格类型 6 | EMPTY_CELL = 0 # 空地(可以走) 7 | TRAP_CELL = 1 # 陷阱(踩上去游戏结束) 8 | PLAYER_CELL = 2 # 玩家起点 9 | GOAL_CELL = 3 # 目标终点 10 | 11 | # 渲染配置 12 | CELL_SIZE = 64 # 每个单元格的像素大小 13 | 14 | # 难度配置 15 | DIFFICULTY_CONFIG = { 16 | 'easy': { 17 | 'grid_size': 7, # 7x7 网格 18 | 'trap_density': 0.2, # 20% 陷阱密度 19 | }, 20 | 'medium': { 21 | 'grid_size': 11, # 11x11 网格 22 | 'trap_density': 0.3, # 30% 陷阱密度 23 | }, 24 | 'hard': { 25 | 'grid_size': 15, # 15x15 网格 26 | 'trap_density': 0.35,# 35% 陷阱密度 27 | } 28 | } 29 | 30 | -------------------------------------------------------------------------------- /games/maze/generators/video_gen.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Iterable, Optional, Sequence, Tuple 4 | 5 | from ..renderer import get_shared_renderer 6 | 7 | Coordinate = Tuple[int, int] 8 | 9 | 10 | class PillowNotInstalledError(RuntimeError): 11 | pass 12 | 13 | 14 | def create_solution_video( 15 | maze: Sequence[Sequence[int]], 16 | path: Iterable[Coordinate], 17 | cell_size: int, 18 | save_path: str, 19 | frame_duration_ms: int = 300, 20 | assets_folder: Optional[str] = None, 21 | ) -> None: 22 | renderer = get_shared_renderer(assets_folder) 23 | renderer.render_video(maze, path, save_path, frame_duration_ms) 24 | -------------------------------------------------------------------------------- /evaluation/videomodel_eval/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluator import TrajectoryEvaluator 2 | from .extractor import CSRTTracker 3 | from .metrics import ( 4 | PrecisionRateMetric, 5 | StepMetric, 6 | ExactMatchMetric, 7 | normalize_trajectory, 8 | resample_by_length, 9 | compute_path_length 10 | ) 11 | from .utils import get_video_info, draw_trajectory_comparison 12 | 13 | __all__ = [ 14 | 'TrajectoryEvaluator', 15 | 'CSRTTracker', 16 | 'PrecisionRateMetric', 17 | 'StepMetric', 18 | 'ExactMatchMetric', 19 | 'normalize_trajectory', 20 | 'resample_by_length', 21 | 'compute_path_length', 22 | 'get_video_info', 23 | 'draw_trajectory_comparison', 24 | ] 25 | 26 | -------------------------------------------------------------------------------- /config/vlm/maze_eval.yaml: -------------------------------------------------------------------------------- 1 | game: maze 2 | dataset: dataset/maze/1 3 | output: vlm_eval_results/maze 4 | 5 | models: 6 | # 使用 SGLang 服务(先运行 bash scripts/start_sglang_server.sh) 7 | - name: Qwen/Qwen2.5-VL-7B-Instruct 8 | type: api 9 | base_url: http://localhost:8123/v1 10 | max_tokens: 10000 11 | temperature: 1.0 12 | 13 | # API 模型示例 14 | - name: gpt-5 15 | type: api 16 | base_url: https://newapi.deepwisdom.ai/v1 17 | max_tokens: 60000 18 | temperature: 1.0 19 | - name: gemini-2.5-pro 20 | type: api 21 | base_url: https://newapi.deepwisdom.ai/v1 22 | max_tokens: 60000 23 | temperature: 1.0 24 | 25 | workers: 10 26 | max_levels: -1 27 | assets_folder: skins/maze/1 28 | 29 | -------------------------------------------------------------------------------- /games/maze/generators/image_gen.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Optional, Sequence 4 | 5 | from ..renderer import get_shared_renderer 6 | 7 | 8 | class PillowNotInstalledError(RuntimeError): 9 | pass 10 | 11 | 12 | def draw_maze(maze: Sequence[Sequence[int]], cell_size: int, save_path: str, 13 | assets_folder: Optional[str] = None) -> None: 14 | try: 15 | from PIL import Image 16 | except ImportError as exc: 17 | raise PillowNotInstalledError( 18 | "Pillow is required to render maze images. Install it with 'pip install pillow'." 19 | ) from exc 20 | 21 | renderer = get_shared_renderer(assets_folder) 22 | renderer.render_maze(maze, save_path) 23 | -------------------------------------------------------------------------------- /config/vlm/pathfinder_eval.yaml: -------------------------------------------------------------------------------- 1 | game: pathfinder 2 | dataset: dataset/irregular_maze/1 3 | output: vlm_eval_results/irregular_maze 4 | 5 | models: 6 | # 使用 SGLang 服务(先运行 bash scripts/start_sglang_server.sh) 7 | # - name: Qwen/Qwen2.5-VL-7B-Instruct 8 | # type: api 9 | # base_url: http://localhost:8123/v1 10 | # max_tokens: 10000 11 | # temperature: 1.0 12 | 13 | # API 模型示例 14 | - name: gpt-5 15 | type: api 16 | base_url: https://newapi.deepwisdom.ai/v1 17 | max_tokens: 60000 18 | temperature: 1.0 19 | - name: gemini-2.5-pro 20 | type: api 21 | base_url: https://newapi.deepwisdom.ai/v1 22 | max_tokens: 60000 23 | temperature: 1.0 24 | 25 | workers: 10 26 | max_levels: -1 27 | assets_folder: skins/pathfinder/1 28 | 29 | -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | """评估系统 - 包含视频模型评估和VLM评估""" 2 | from . import vlm_eval 3 | from . import videomodel_eval 4 | 5 | # 从videomodel_eval导出 6 | from evaluation.videomodel_eval.extractor import CSRTTracker 7 | from evaluation.videomodel_eval.evaluator import TrajectoryEvaluator 8 | from evaluation.videomodel_eval.metrics import ( 9 | PrecisionRateMetric, 10 | StepMetric, 11 | ExactMatchMetric, 12 | normalize_trajectory, 13 | resample_by_length, 14 | compute_path_length 15 | ) 16 | 17 | __all__ = [ 18 | 'vlm_eval', 19 | 'videomodel_eval', 20 | 'CSRTTracker', 21 | 'TrajectoryEvaluator', 22 | 'PrecisionRateMetric', 23 | 'StepMetric', 24 | 'ExactMatchMetric', 25 | 'normalize_trajectory', 26 | 'resample_by_length', 27 | 'compute_path_length' 28 | ] -------------------------------------------------------------------------------- /core/schema/grid.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Dict 3 | 4 | 5 | @dataclass 6 | class Grid: 7 | data: List[List[int]] 8 | height: int 9 | width: int 10 | 11 | def to_dict(self) -> Dict: 12 | return { 13 | "data": self.data, 14 | "height": self.height, 15 | "width": self.width 16 | } 17 | 18 | @classmethod 19 | def from_dict(cls, data: Dict) -> 'Grid': 20 | return cls( 21 | data=data["data"], 22 | height=data["height"], 23 | width=data["width"] 24 | ) 25 | 26 | @classmethod 27 | def from_2d_list(cls, grid: List[List[int]]) -> 'Grid': 28 | return cls( 29 | data=grid, 30 | height=len(grid), 31 | width=len(grid[0]) if grid else 0 32 | ) 33 | 34 | -------------------------------------------------------------------------------- /config/config_trapfield.yaml: -------------------------------------------------------------------------------- 1 | # TrapField 数据集生成配置 2 | 3 | # 游戏类型 4 | game_type: "trapfield" 5 | 6 | # 皮肤文件夹根目录 7 | skins_root: "skins/trapfield" 8 | 9 | # 输出根目录 10 | output_root: "generated_levels_trapfield" 11 | 12 | # TrapField 难度配置 13 | difficulties: 14 | easy: 15 | grid_size: 5 # 5x5 网格 16 | trap_density: 0.2 # 20% 陷阱密度 17 | max_attempts: 50 # 最大生成尝试次数 18 | count: 120 # 生成数量 19 | 20 | medium: 21 | grid_size: 7 # 7x7 网格 22 | trap_density: 0.3 # 30% 陷阱密度 23 | max_attempts: 50 24 | count: 120 25 | 26 | hard: 27 | grid_size: 11 # 11x11 网格 28 | trap_density: 0.35 # 35% 陷阱密度 29 | max_attempts: 50 30 | count: 120 31 | 32 | # 生成配置 33 | generation: 34 | fps: 24 # 固定24fps(连续移动动画) 35 | max_duplicate_retries: 100 36 | 37 | # 并行配置 38 | parallel: 39 | max_workers: 4 40 | 41 | -------------------------------------------------------------------------------- /config/config_3d_maze.yaml: -------------------------------------------------------------------------------- 1 | # 3D Maze 游戏数据集生成配置 2 | 3 | # 游戏类型 4 | game_type: "maze3d" 5 | 6 | # 输出根目录 7 | output_root: "generated_levels_maze3d" 8 | 9 | # 皮肤目录(包含多个皮肤子目录,每个子目录包含 colors.json 和 description.json) 10 | skins_root: "skins/maze3d" 11 | 12 | # 难度配置 13 | difficulties: 14 | # 路径查找问题 - 不同难度 15 | easy: 16 | qa_type: "path_finding" 17 | grid_size: [6, 6, 5] # [宽度, 深度, 高度] 18 | max_attempts: 100 19 | count: 120 20 | 21 | medium: 22 | qa_type: "path_finding" 23 | grid_size: [8, 8, 7] 24 | max_attempts: 100 25 | count: 120 26 | 27 | hard: 28 | qa_type: "path_finding" 29 | grid_size: [10, 10, 9] 30 | max_attempts: 100 31 | count: 120 32 | 33 | 34 | # 生成配置 35 | generation: 36 | max_retries: 100 37 | timeout_seconds: 30 38 | fps: 24 39 | generate_video: true # 是否生成视频 40 | 41 | # 并行配置 42 | parallel: 43 | max_workers: 4 44 | 45 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # VR-Bench Environment Configuration 2 | # Copy this file to .env and fill in your values 3 | 4 | # Dataset paths 5 | DATASET_ROOT=/path/to/dataset_VR 6 | OUTPUT_ROOT=/path/to/output 7 | 8 | # Model cache 9 | HF_CACHE_DIR=/path/to/huggingface_model 10 | TRANSFORMERS_CACHE=/path/to/huggingface_model 11 | 12 | # CUDA configuration 13 | CUDA_HOME=/usr/local/cuda 14 | CUDA_VISIBLE_DEVICES=0,1 15 | 16 | # API keys (for VLM evaluation) 17 | OPENAI_API_KEY=your_api_key_here 18 | OPENAI_BASE_URL=https://api.openai.com/v1 19 | 20 | # Image Generation API (for AutoEnv skin generation) 21 | IMAGE_GEN_API_KEY=your_image_gen_api_key_here 22 | IMAGE_GEN_BASE_URL=https://api.openai.com/v1 23 | IMAGE_GEN_MODEL=gemini-2.5-flash-image 24 | 25 | # Generation settings 26 | MAX_WORKERS=4 27 | DEFAULT_FPS=24 28 | 29 | # Evaluation settings 30 | EVAL_WORKERS=4 31 | EVAL_NUM_SAMPLES=1000 32 | EVAL_THRESHOLD=0.05 33 | 34 | -------------------------------------------------------------------------------- /games/sokoban/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sokoban game configuration. 3 | """ 4 | 5 | from core.constants import * 6 | 7 | # Required textures for Sokoban 8 | REQUIRED_TEXTURES = ['floor', 'wall', 'player', 'target', 'box'] 9 | 10 | # Cell type to layer and texture mapping 11 | # Format: cell_value -> (layer, texture_name) 12 | # Layer 1: floor (handled separately) 13 | # Layer 2: walls and targets 14 | # Layer 3: boxes and players 15 | CELL_LAYER_MAP = { 16 | EMPTY: (0, None), 17 | WALL: (2, 'wall'), 18 | PLAYER: (3, 'player'), 19 | TARGET: (2, 'target'), 20 | BOX: (3, 'box'), 21 | BOX_ON_TARGET: (3, 'box'), # Box on target: target in layer 2, box in layer 3 22 | PLAYER_ON_TARGET: (3, 'player') # Player on target: target in layer 2, player in layer 3 23 | } 24 | 25 | # Special handling for combined cells 26 | COMBINED_CELLS = { 27 | BOX_ON_TARGET: ('target', 'box'), # (layer 2, layer 3) 28 | PLAYER_ON_TARGET: ('target', 'player') # (layer 2, layer 3) 29 | } 30 | 31 | -------------------------------------------------------------------------------- /core/schema/render.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict 3 | 4 | 5 | @dataclass 6 | class RenderConfig: 7 | cell_size: int 8 | image_width: int 9 | image_height: int 10 | 11 | def to_dict(self) -> Dict[str, int]: 12 | return { 13 | "cell_size": self.cell_size, 14 | "image_width": self.image_width, 15 | "image_height": self.image_height 16 | } 17 | 18 | @classmethod 19 | def from_dict(cls, data: Dict[str, int]) -> 'RenderConfig': 20 | return cls( 21 | cell_size=data["cell_size"], 22 | image_width=data["image_width"], 23 | image_height=data["image_height"] 24 | ) 25 | 26 | @classmethod 27 | def from_grid_size(cls, height: int, width: int, cell_size: int) -> 'RenderConfig': 28 | return cls( 29 | cell_size=cell_size, 30 | image_width=width * cell_size, 31 | image_height=height * cell_size 32 | ) 33 | 34 | -------------------------------------------------------------------------------- /core/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unified constants for all games. 3 | """ 4 | 5 | # Cell types - common across all games 6 | EMPTY = 0 7 | WALL = 1 8 | PLAYER = 2 9 | TARGET = 3 10 | 11 | # Sokoban-specific cell types (Sokoban uses different values) 12 | SOKOBAN_EMPTY = 0 13 | SOKOBAN_WALL = 1 14 | SOKOBAN_BOX = 2 15 | SOKOBAN_TARGET = 3 16 | SOKOBAN_BOX_ON_TARGET = 4 17 | SOKOBAN_PLAYER = 5 18 | SOKOBAN_PLAYER_ON_TARGET = 6 19 | 20 | # Legacy Sokoban constants (for backward compatibility with core constants) 21 | BOX = 4 22 | BOX_ON_TARGET = 5 23 | PLAYER_ON_TARGET = 6 24 | 25 | # Maze-specific aliases (for compatibility) 26 | EMPTY_CELL = EMPTY 27 | WALL_CELL = WALL 28 | PLAYER_CELL = PLAYER 29 | GOAL_CELL = TARGET 30 | 31 | # Rendering configuration 32 | DEFAULT_CELL_SIZE = 64 33 | SUPPORTED_IMAGE_FORMATS = ('.png', '.jpg', '.jpeg') 34 | 35 | # Texture names - unified across all games 36 | TEXTURE_NAMES = { 37 | 'floor': 'floor', 38 | 'wall': 'wall', 39 | 'player': 'player', 40 | 'target': 'target', 41 | 'box': 'box' 42 | } 43 | 44 | -------------------------------------------------------------------------------- /games/maze/utils/file_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | import shutil 5 | from typing import Iterable, Optional 6 | 7 | 8 | def ensure_directory(path: str | Path) -> None: 9 | Path(path).mkdir(parents=True, exist_ok=True) 10 | 11 | 12 | def clean_directory(path: str | Path) -> None: 13 | directory = Path(path) 14 | if not directory.exists(): 15 | return 16 | for entry in directory.iterdir(): 17 | if entry.is_dir(): 18 | shutil.rmtree(entry) 19 | else: 20 | entry.unlink() 21 | 22 | 23 | def setup_output_directories( 24 | output_dir: str, 25 | images_dir: str, 26 | states_dir: str, 27 | video_dir: Optional[str] = None, 28 | ) -> None: 29 | for folder in (output_dir, images_dir, states_dir): 30 | ensure_directory(folder) 31 | clean_directory(images_dir) 32 | clean_directory(states_dir) 33 | 34 | if video_dir is not None: 35 | ensure_directory(video_dir) 36 | clean_directory(video_dir) 37 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/game_executor.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Dict, Any, Tuple 3 | from core.schema import UnifiedState 4 | 5 | 6 | class GameExecutor(ABC): 7 | @abstractmethod 8 | def load_state(self, state_path: str) -> UnifiedState: 9 | pass 10 | 11 | @abstractmethod 12 | def get_optimal_solution(self, state: UnifiedState) -> List[Dict[str, Any]]: 13 | pass 14 | 15 | @abstractmethod 16 | def execute_action(self, state: UnifiedState, action: Dict[str, Any]) -> Tuple[UnifiedState, bool, str]: 17 | pass 18 | 19 | @abstractmethod 20 | def check_win(self, state: UnifiedState) -> bool: 21 | pass 22 | 23 | @abstractmethod 24 | def render_state(self, state: UnifiedState, output_path: str) -> None: 25 | pass 26 | 27 | @abstractmethod 28 | def get_system_prompt(self) -> str: 29 | pass 30 | 31 | @abstractmethod 32 | def get_user_prompt(self) -> str: 33 | pass 34 | 35 | def get_game_type(self) -> str: 36 | return 'default' 37 | 38 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/prompts/trapfield_prompt.py: -------------------------------------------------------------------------------- 1 | # 占位符格式: {player}, {goal}, {trap}, {floor} 2 | TRAPFIELD_SYSTEM_PROMPT_TEMPLATE = """You are given an image of a grid-based maze. 3 | {trap} tiles represent trap zones that must be avoided. 4 | {floor} tiles represent open paths that can be moved through. 5 | The {player} represents the starting point of the path. 6 | The {goal} represents the goal or destination. 7 | Task: 8 | Infer the shortest valid path for the {player} to reach the {goal}. 9 | Movement can only occur between adjacent open tiles — up, down, left, or right. 10 | Diagonal movement is not allowed. 11 | The path must not cross or touch any trap tiles. 12 | Output Format: 13 | Return the full movement sequence of the {player} as a JSON array of directions, where each element is one of "up", "down", "left", or "right". 14 | Do not include any explanations, reasoning, or extra text. 15 | Example of expected output: 16 | {{ 17 | "path": ["left", "left", "down", "down"] 18 | }} 19 | """ 20 | 21 | TRAPFIELD_USER_PROMPT_TEMPLATE = """Infer the shortest valid path for the {player} to reach the {goal}. 22 | """ 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 VR-Bench Team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /AutoEnv/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 FoundationAgents 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/prompts/maze_prompt.py: -------------------------------------------------------------------------------- 1 | # 占位符格式: {player}, {goal}, {wall}, {floor} 2 | MAZE_SYSTEM_PROMPT_TEMPLATE = """You are given an image of a grid-based maze. 3 | {wall} tiles represent walls and cannot be crossed. 4 | {floor} tiles represent open paths that can be moved through. 5 | The {player} represents the starting point of the path. 6 | The {goal} represents the goal or destination. 7 | 8 | 9 | Task: 10 | Infer the shortest valid path from the {player} starting point to the {goal} goal. 11 | Movement can only occur between adjacent open tiles — up, down, left, or right. 12 | Diagonal movement is not allowed, and the path must not cross or touch any walls. 13 | 14 | 15 | Output Format: 16 | Return the entire movement sequence of the {player} as a JSON array of directions, where each element is one of "up", "down", "left", or "right". 17 | Do not include any explanations or additional text. 18 | 19 | 20 | Example of expected output: 21 | {{ 22 | "path": ["up", "up", "left", "down", "right", "right"] 23 | }} 24 | """ 25 | 26 | MAZE_USER_PROMPT_TEMPLATE = """Infer the shortest valid path from the {player} starting point to the {goal} goal. 27 | """ 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /games/maze/generators/state_gen.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import sys 4 | from pathlib import Path 5 | from typing import Sequence 6 | 7 | sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) 8 | 9 | from core.schema import UnifiedState, Grid, Entity, RenderConfig 10 | from .. import constants 11 | from ..utils import maze_utils 12 | 13 | 14 | def save_state(maze: Sequence[Sequence[int]], save_path: str) -> None: 15 | player_pos = maze_utils.find_position(maze, constants.PLAYER_CELL) 16 | goal_pos = maze_utils.find_position(maze, constants.GOAL_CELL) 17 | 18 | height = len(maze) 19 | width = len(maze[0]) if maze else 0 20 | cell_size = constants.CELL_SIZE 21 | 22 | state = UnifiedState( 23 | version="1.0", 24 | game_type="maze", 25 | grid=Grid.from_2d_list([list(row) for row in maze]), 26 | player=Entity.from_grid_pos(player_pos[0], player_pos[1], cell_size), 27 | goal=Entity.from_grid_pos(goal_pos[0], goal_pos[1], cell_size), 28 | boxes=[], 29 | render=RenderConfig.from_grid_size(height, width, cell_size), 30 | metadata={} 31 | ) 32 | 33 | state.save(save_path) 34 | -------------------------------------------------------------------------------- /scripts/Wan2.2-TI2V-5B_lora.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from PIL import Image 3 | from diffsynth import save_video 4 | from diffsynth.pipelines.wan_video_new import WanVideoPipeline, ModelConfig 5 | from modelscope import dataset_snapshot_download 6 | 7 | pipe = WanVideoPipeline.from_pretrained( 8 | torch_dtype=torch.bfloat16, 9 | device="cuda", 10 | model_configs=[ 11 | ModelConfig(model_id="Wan-AI/Wan2.2-TI2V-5B", origin_file_pattern="models_t5_umt5-xxl-enc-bf16.pth", offload_device="cpu"), 12 | ModelConfig(model_id="Wan-AI/Wan2.2-TI2V-5B", origin_file_pattern="diffusion_pytorch_model*.safetensors", offload_device="cpu"), 13 | ModelConfig(model_id="Wan-AI/Wan2.2-TI2V-5B", origin_file_pattern="Wan2.2_VAE.pth", offload_device="cpu"), 14 | ], 15 | ) 16 | 17 | # Load LoRA weights 18 | pipe.load_lora(pipe.dit, f"path/to/your/lora/checkpoint.safetensors", alpha=1) 19 | 20 | pipe.enable_vram_management() 21 | 22 | 23 | input_image = Image.open("path/to/your/input_image.png").resize((512, 512)) 24 | video = pipe( 25 | prompt = """Your prompt here""" 26 | , 27 | negative_prompt="", 28 | seed=0, tiled=True, 29 | height=512, width=512, 30 | input_image=input_image, 31 | num_frames=193, 32 | ) 33 | save_video(video, "path/to/your/output_video.mp4", fps=15, quality=5) 34 | -------------------------------------------------------------------------------- /config/config_pathfinder.yaml: -------------------------------------------------------------------------------- 1 | # PathFinder 游戏数据集生成配置 2 | 3 | # 游戏类型 4 | game_type: "pathfinder" 5 | 6 | # 输出根目录 7 | output_root: "generated_levels_pathfinder" 8 | 9 | # 皮肤根目录(PathFinder 不需要皮肤,但保留字段以兼容系统) 10 | 11 | skins_root: "skins/pathfinder" 12 | # 难度配置 13 | # 注意:难度通过多个因素综合区分 14 | # - 图片尺寸:图片越大,视觉范围越大 15 | # - 道路宽度:道路越窄,越难控制 16 | # - 节点密度:节点越密集,路径网络越复杂 17 | # - 支路数量:支路越多,干扰项越多 18 | # - 最短路径:要求的路径越长,难度越高 19 | # 实际配置在 games/pathfinder/constants.py 中的 DIFFICULTY_CONFIG 20 | difficulties: 21 | easy: 22 | difficulty: "easy" # 难度名称 23 | # 512x512, 30px道路, 稀疏节点(18%), 1条支路, 4节点路径 24 | max_attempts: 50 # 最大生成尝试次数 25 | count: 120 # 生成数量 26 | 27 | medium: 28 | difficulty: "medium" 29 | # 768x768, 22px道路, 中等节点(15%), 3条支路, 6节点路径 30 | max_attempts: 50 31 | count: 120 32 | 33 | hard: 34 | difficulty: "hard" 35 | # 1024x1024, 18px道路, 密集节点(12%), 5条支路, 8节点路径 36 | max_attempts: 50 37 | count: 120 38 | 39 | # 视频生成配置 40 | video: 41 | fps: 24 # 视频帧率(会被 constants.py 中的配置覆盖) 42 | add_grid: false # 是否添加网格(PathFinder 不适用) 43 | 44 | # 生成配置 45 | generation: 46 | max_retries: 100 # 单个关卡最大重试次数 47 | timeout_seconds: 30 # 单个关卡生成超时时间 48 | 49 | # 并行配置 50 | parallel: 51 | max_workers: 12 # 并行工作进程数 52 | 53 | -------------------------------------------------------------------------------- /scripts/start_sglang_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # SGLang 服务启动脚本 4 | 5 | MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct" 6 | HOST="0.0.0.0" 7 | PORT=8123 8 | CACHE_DIR="/data/pengyiran/cvpr_v1/huggingface_model" 9 | 10 | # GPU 配置 11 | # 方式 1: 使用单个 GPU 12 | # GPU_IDS="2" 13 | # TP_SIZE=1 14 | 15 | # 方式 2: 使用多个 GPU (Tensor Parallelism) 16 | GPU_IDS="2,3" 17 | TP_SIZE=2 18 | 19 | echo "启动 SGLang 服务..." 20 | echo "模型: $MODEL_NAME" 21 | echo "地址: http://$HOST:$PORT" 22 | echo "缓存目录: $CACHE_DIR" 23 | echo "GPU: $GPU_IDS" 24 | 25 | # 单 GPU 模式 26 | # CUDA_VISIBLE_DEVICES=$GPU_IDS python -m sglang.launch_server \ 27 | # --model-path $MODEL_NAME \ 28 | # --host $HOST \ 29 | # --port $PORT \ 30 | # --cache-dir $CACHE_DIR \ 31 | # --trust-remote-code 32 | 33 | # 设置 HuggingFace 缓存目录 34 | export HF_HOME=$CACHE_DIR 35 | export TRANSFORMERS_CACHE=$CACHE_DIR 36 | 37 | # 多 GPU 模式 38 | CUDA_VISIBLE_DEVICES=$GPU_IDS python -m sglang.launch_server \ 39 | --model-path $MODEL_NAME \ 40 | --host $HOST \ 41 | --port $PORT \ 42 | --tp $TP_SIZE \ 43 | --download-dir $CACHE_DIR \ 44 | --trust-remote-code \ 45 | --skip-server-warmup 46 | 47 | # 其他可选参数: 48 | # --mem-fraction-static 0.9 # GPU 显存使用比例 49 | # --chat-template qwen # 指定 chat template 50 | # --context-length 8192 # 最大上下文长度 51 | 52 | -------------------------------------------------------------------------------- /scripts/videomodel_evaluate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 批量评估生成的视频(自动匹配所有难度) 3 | # 用法: ./scripts/evaluate.sh 4 | 5 | # 设置 CUDA 环境变量(修复 CuPy 编译问题) 6 | export CUDA_PATH=${CUDA_HOME:-/usr/local/cuda} 7 | export CPATH=$CUDA_PATH/include:${CPATH} 8 | export LD_LIBRARY_PATH=$CUDA_PATH/lib64:${LD_LIBRARY_PATH} 9 | 10 | NAME= #maze name 11 | DATASET_DIR=dataset_VR/train/$NAME/1 12 | OUTPUT_DIR=dataset_VR/train/$NAME/1 13 | RESULT_DIR=eval_results/$NAME 14 | WORKERS=4 15 | NUM_SAMPLES=1000 16 | THRESHOLD=0.05 17 | FIDELITY_PIXEL_THRESHOLD=5 18 | FRAME_STEP=1 19 | TRACKER_TYPE=ncc # 追踪器类型: csrt, ncc, optical_flow 20 | SEARCH_MARGIN=50 # NCC追踪器搜索边距(0=全图搜索,>0=局部搜索范围) 21 | USE_GPU=gpu 22 | 23 | # 构建Python命令 24 | CMD="python evaluation/videomodel_eval/batch_evaluate.py \ 25 | \"$DATASET_DIR\" \ 26 | \"$OUTPUT_DIR\" \ 27 | \"$RESULT_DIR\" \ 28 | --threshold \"$THRESHOLD\" \ 29 | --num-samples \"$NUM_SAMPLES\" \ 30 | --workers \"$WORKERS\" \ 31 | --fidelity-pixel-threshold \"$FIDELITY_PIXEL_THRESHOLD\" \ 32 | --frame-step \"$FRAME_STEP\" \ 33 | --tracker-type \"$TRACKER_TYPE\" \ 34 | --search-margin \"$SEARCH_MARGIN\"" 35 | 36 | # 如果指定了gpu参数,添加--gpu标志 37 | if [ "$USE_GPU" = "gpu" ] || [ "$USE_GPU" = "GPU" ]; then 38 | CMD="$CMD --gpu" 39 | echo "启用GPU加速模式" 40 | fi 41 | 42 | # 执行命令 43 | eval $CMD 44 | 45 | 46 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # VR-Bench + AutoEnv Dependencies 2 | # Python >= 3.10 3 | 4 | # Core dependencies 5 | numpy>=1.24.0 6 | opencv-python>=4.8.0 7 | pillow>=10.0.0 8 | pyyaml>=6.0 9 | imageio>=2.31.0 10 | imageio-ffmpeg>=0.4.9 11 | pydantic>=2.11.0 12 | pydantic-core>=2.33.0 13 | 14 | # Data processing 15 | scipy>=1.11.0 16 | scikit-image>=0.21.0 17 | 18 | # Image processing (AutoEnv) 19 | rembg>=2.0.68 20 | onnxruntime-gpu>=1.23.0 21 | 22 | # LLM API 23 | openai>=1.0.0 24 | litellm>=1.75.0 25 | tiktoken>=0.11.0 26 | 27 | # Async support 28 | aiohttp>=3.12.0 29 | aiohappyeyeballs>=2.6.0 30 | aiosignal>=1.4.0 31 | frozenlist>=1.7.0 32 | multidict>=6.6.0 33 | yarl>=1.20.0 34 | propcache>=0.3.0 35 | 36 | # HTTP clients 37 | httpx>=0.28.0 38 | httpcore>=1.0.0 39 | h11>=0.16.0 40 | certifi>=2024.0.0 41 | idna>=3.10 42 | sniffio>=1.3.0 43 | anyio>=4.9.0 44 | requests>=2.32.0 45 | urllib3>=2.5.0 46 | charset-normalizer>=3.4.0 47 | 48 | # Utilities 49 | python-dotenv>=1.0.0 50 | distro>=1.9.0 51 | tenacity>=9.1.0 52 | tqdm>=4.67.0 53 | packaging>=25.0 54 | typing-extensions>=4.14.0 55 | 56 | # Optional: GPU acceleration 57 | # cupy-cuda12x>=12.0.0 # For CUDA 12.x 58 | # cupy-cuda11x>=11.0.0 # For CUDA 11.x 59 | 60 | # Development dependencies (optional) 61 | # pytest>=7.4.0 62 | # black>=23.0.0 63 | # flake8>=6.0.0 64 | # mypy>=1.5.0 65 | # pylint>=2.17.0 66 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/prompts/sokoban_prompt.py: -------------------------------------------------------------------------------- 1 | # 占位符格式: {player}, {goal}, {box}, {wall}, {floor} 2 | SOKOBAN_SYSTEM_PROMPT_TEMPLATE = """You are given an image of a grid-based Sokoban puzzle. 3 | {wall} tiles represent walls and cannot be crossed. 4 | {floor} tiles represent open floor tiles that can be moved through. 5 | The {player} represents the player or agent. 6 | The {box} represents the box that needs to be pushed. 7 | The {goal} represents the goal destination for the box. 8 | Task: 9 | Infer the complete movement sequence required for the {player} to push the {box} onto the {goal} goal. 10 | The {player} moves in four directions: up, down, left, right. 11 | When the {player} moves into a box, it automatically pushes the box if there is space behind it. 12 | The box and the {player} cannot cross or overlap any walls. 13 | Diagonal movement is not allowed, and the camera remains fixed from a top-down view. 14 | Output Format: 15 | Return the entire movement sequence as a JSON array of directional actions, where each element is one of "up", "down", "left", or "right". 16 | Do not include any explanations or additional text. 17 | Example of expected output: 18 | {{ 19 | "actions": ["right", "right", "down", "left", "down"] 20 | }} 21 | """ 22 | 23 | SOKOBAN_USER_PROMPT_TEMPLATE = """Infer the complete movement sequence required for the {player} to push the {box} onto the {goal} goal. 24 | """ 25 | 26 | 27 | -------------------------------------------------------------------------------- /games/maze/test_skin.py: -------------------------------------------------------------------------------- 1 | """ 2 | 测试 pymaze 换皮肤功能 3 | 4 | 使用方法: 5 | python -m pymaze.test_skin # 使用默认皮肤 6 | python -m pymaze.test_skin custom_assets # 使用自定义皮肤 7 | """ 8 | 9 | import sys 10 | from pathlib import Path 11 | 12 | try: 13 | from . import constants 14 | from .generators import data_gen 15 | from .utils import file_utils 16 | except ImportError: 17 | import constants 18 | from generators import data_gen 19 | from utils import file_utils 20 | 21 | 22 | def main(): 23 | assets_folder = sys.argv[1] if len(sys.argv) > 1 else None 24 | 25 | if assets_folder: 26 | print(f"Using custom skin: {assets_folder}") 27 | else: 28 | print("Using default skin") 29 | 30 | output_dir = Path("test_maze_output") 31 | images_dir = output_dir / "images" 32 | states_dir = output_dir / "states" 33 | video_dir = output_dir / "videos" 34 | 35 | file_utils.setup_output_directories( 36 | str(output_dir), str(images_dir), str(states_dir), str(video_dir) 37 | ) 38 | 39 | print("Generating 1 test maze (9x9)...") 40 | data_gen.generate_data( 41 | 0, 1, 9, str(images_dir), str(states_dir), str(video_dir), 42 | assets_folder=assets_folder 43 | ) 44 | 45 | print(f"Done! Check output in: {output_dir}") 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | 51 | -------------------------------------------------------------------------------- /games/maze/templates/turn_count.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import random 4 | from typing import List 5 | 6 | from ..utils import maze_utils 7 | from .base_template import BaseTemplate 8 | 9 | 10 | class TurnCount(BaseTemplate): 11 | def __init__(self, maze: List[List[int]], image_id: int) -> None: 12 | super().__init__(maze, image_id) 13 | 14 | self.question_id = 4 15 | self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}" 16 | self.qa_type = "TransitionPath" 17 | self.question_description = "Count how many turns it takes to reach the finish." 18 | self.qa_level = "Hard" 19 | self.question += ( 20 | "Find the path to the finish and count the number of turns it takes to get there. " 21 | "You only need to provide one number." 22 | ) 23 | 24 | solver_rng = random.Random(image_id) 25 | path_info: List[str] = [] 26 | path = maze_utils.dfs_solve_maze(maze, path_info, rng=solver_rng) 27 | turn_info: List[str] = [] 28 | turns = maze_utils.count_turns(path, turn_info) 29 | 30 | self.answer = str(turns) 31 | self.options = None 32 | 33 | self.analysis = "First," + "".join(path_info) 34 | self.analysis += f"Therefore, the path is: {maze_utils.path_to_string(path)}\n\nThen," 35 | self.analysis += "".join(turn_info) 36 | self.analysis += f"\nIn summary, the total number of turns is {turns}" 37 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/prompts/pathfinder_prompt.py: -------------------------------------------------------------------------------- 1 | # 占位符格式: {start}, {end}, {road} 2 | PATHFINDER_SYSTEM_PROMPT_TEMPLATE = """You are given an image of a pathfinding puzzle. 3 | The image shows a network of curved paths connecting various waypoints. 4 | Each waypoint (intersection or junction) is labeled with a letter or letter combination (A, B, C, ..., Z, AA, AB, etc.). 5 | The {start} represents the starting point. 6 | The {end} represents the goal or destination. 7 | 8 | Task: 9 | Find the shortest valid path from the {start} starting point to the {end} goal. 10 | The path must follow the visible roads/paths in the image. 11 | You can only move along the connected paths shown in the image. 12 | 13 | Output Format: 14 | You MUST return a JSON object with a "path" field containing an array of waypoint labels. 15 | The array should start with the label closest to the starting point and end with the label closest to the goal. 16 | Do not include any explanations or additional text. 17 | 18 | Required format: 19 | {{ 20 | "path": ["A", "B", "C", "D", "E"] 21 | }} 22 | 23 | For puzzles with more than 26 waypoints, labels may be multi-character (e.g., "AA", "AB"): 24 | {{ 25 | "path": ["A", "Z", "AA", "AB"] 26 | }} 27 | 28 | Important: The "path" field MUST be an array of strings, not a single string. 29 | """ 30 | 31 | PATHFINDER_USER_PROMPT_TEMPLATE = """Find the shortest path from the {start} starting point to the {end} goal by following the labeled waypoints. 32 | """ 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | pip-wheel-metadata/ 20 | share/python-wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | MANIFEST 25 | bak_dataset/ 26 | dataset/ 27 | vlm_eval_results/ 28 | huggingface_model/ 29 | bak/ 30 | # Virtual Environment 31 | venv/ 32 | ENV/ 33 | env/ 34 | .venv 35 | .env 36 | # IDE 37 | .vscode/ 38 | .idea/ 39 | *.swp 40 | *.swo 41 | *~ 42 | .DS_Store 43 | 44 | # Jupyter Notebook 45 | .ipynb_checkpoints 46 | *.ipynb 47 | 48 | # Testing 49 | .pytest_cache/ 50 | .coverage 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | 55 | # Logs 56 | *.log 57 | logs/ 58 | *.out 59 | 60 | # Dataset and Generated Files 61 | dataset/ 62 | dataset_*/ 63 | bak_dataset/ 64 | debug/ 65 | output/ 66 | eval_results/ 67 | *.mp4 68 | *.avi 69 | *.gif 70 | *.png 71 | *.jpg 72 | *.jpeg 73 | 74 | # But include skins folder 75 | !skins/ 76 | !skins/** 77 | 78 | # Temporary Files 79 | tmp/ 80 | temp/ 81 | *.tmp 82 | *.bak 83 | *.swp 84 | 85 | # Model Checkpoints 86 | checkpoints/ 87 | *.pth 88 | *.pt 89 | *.ckpt 90 | *.h5 91 | 92 | # Cache 93 | .cache/ 94 | *.cache 95 | 96 | # OS 97 | Thumbs.db 98 | .DS_Store 99 | 100 | #dataset 101 | dataset_VR 102 | dataset_output/ 103 | 104 | # AutoEnv workspace (generated assets and costs) 105 | AutoEnv/workspace/ 106 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- 1 | # 统一数据集生成配置 2 | # 支持多种游戏类型:sokoban, maze, pathfinder, trapfield 3 | 4 | # 游戏类型 (sokoban, maze, pathfinder, trapfield) 5 | game_type: "sokoban" 6 | 7 | # 皮肤文件夹根目录 8 | skins_root: "skins/sokoban/new" 9 | 10 | # 输出根目录 11 | output_root: "generated_levels" 12 | 13 | # 难度配置 14 | # Sokoban 难度配置 15 | difficulties: 16 | easy: 17 | board_size: 5 18 | num_boxes: 1 19 | count: 120 20 | 21 | medium: 22 | board_size: 8 23 | num_boxes: 1 24 | count: 120 25 | 26 | hard: 27 | board_size: 12 28 | num_boxes: 1 29 | count: 120 30 | 31 | # Maze 难度配置(切换 game_type 为 maze 时使用) 32 | # game_type: "maze" 33 | # skins_root: "skins/maze" 34 | # difficulties: 35 | # small: 36 | # maze_size: 9 37 | # count: 120 38 | # 39 | # medium: 40 | # maze_size: 11 41 | # count: 120 42 | # 43 | # large: 44 | # maze_size: 13 45 | # count: 120 46 | 47 | # PathFinder 难度配置(切换 game_type 为 pathfinder 时使用) 48 | # game_type: "pathfinder" 49 | # skins_root: "skins/pathfinder" 50 | # difficulties: 51 | # easy: 52 | # difficulty: "easy" 53 | # image_size: 1024 54 | # count: 120 55 | # 56 | # medium: 57 | # difficulty: "medium" 58 | # image_size: 1024 59 | # count: 120 60 | # 61 | # hard: 62 | # difficulty: "hard" 63 | # image_size: 1024 64 | # count: 120 65 | 66 | # 生成配置 67 | generation: 68 | check_solvable: true 69 | max_attempts: 50 70 | fps: 24 # 固定24fps(连续移动动画) 71 | add_grid: false 72 | max_duplicate_retries: 100 73 | 74 | # 并行配置 75 | parallel: 76 | max_workers: 4 77 | 78 | -------------------------------------------------------------------------------- /AutoEnv/base/pipeline/base_node.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import uuid 4 | from abc import ABC, abstractmethod 5 | 6 | from pydantic import BaseModel, Field 7 | 8 | 9 | def _generate_node_id() -> str: 10 | return uuid.uuid4().hex[:8] 11 | 12 | 13 | class NodeContext(BaseModel): 14 | """Base context for node execution. Subclasses define specific fields.""" 15 | 16 | model_config = {"arbitrary_types_allowed": True, "extra": "allow"} 17 | 18 | 19 | class BaseNode(BaseModel, ABC): 20 | """Abstract base class for DAG nodes.""" 21 | 22 | node_id: str = Field(default_factory=_generate_node_id) 23 | successors: list["BaseNode"] = Field(default_factory=list) 24 | predecessors: list["BaseNode"] = Field(default_factory=list) 25 | 26 | model_config = {"arbitrary_types_allowed": True} 27 | 28 | def add(self, nodes: BaseNode | list[BaseNode]) -> BaseNode | list[BaseNode]: 29 | """Add successor node(s).""" 30 | node_list = [nodes] if isinstance(nodes, BaseNode) else nodes 31 | for node in node_list: 32 | if node not in self.successors: 33 | self.successors.append(node) 34 | if self not in node.predecessors: 35 | node.predecessors.append(self) 36 | return nodes 37 | 38 | def __rshift__(self, other: BaseNode | list[BaseNode]) -> BaseNode | list[BaseNode]: 39 | """Syntactic sugar for a >> b.""" 40 | return self.add(other) 41 | 42 | @abstractmethod 43 | async def execute(self, ctx: NodeContext) -> None: 44 | """Execute node logic. Read inputs from ctx and write outputs to ctx.""" 45 | -------------------------------------------------------------------------------- /prompts/videomodel_pathfinder_prompt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Pathfinder (irregular_maze) 游戏的视频模型 prompt 模板。 4 | 5 | 占位符: {start}, {end}, {road} 6 | 从 description.json 的 visual_description 中读取。 7 | """ 8 | from string import Template 9 | 10 | PATHFINDER_PROMPT_TEMPLATE = Template("""Create a 2D animation based on the provided image of a maze. The $start slides smoothly along the $road path, stopping perfectly on the $end. The $start never slides or crosses into the black areas of the maze. The camera is a static, top-down view showing the entire maze. 11 | 12 | Maze: 13 | The maze paths are $road, the walls are black. 14 | The $start moves to the goal position, represented by $end. 15 | The $start slides smoothly along the $road path. 16 | The $start never slides or crosses into the black areas of the maze. 17 | The $start stops perfectly on the $end. 18 | 19 | Scene: 20 | No change in scene composition. 21 | No change in the layout of the maze. 22 | The $start travels along the $road path without speeding up or slowing down. 23 | 24 | Camera: 25 | Static camera. 26 | No zoom. 27 | No pan. 28 | No glitches, noise, or artifacts.""") 29 | 30 | 31 | def get_pathfinder_prompt(visual_description: dict) -> str: 32 | """ 33 | 生成 pathfinder 游戏的动态 prompt。 34 | 35 | Args: 36 | visual_description: 来自 description.json 的 visual_description 字段 37 | - start: 起点描述 (如 "green circle") 38 | - end: 终点描述 (如 "red circle") 39 | - road: 道路描述 (如 "white square") 40 | """ 41 | return PATHFINDER_PROMPT_TEMPLATE.substitute( 42 | start=visual_description.get("start", "green circle"), 43 | end=visual_description.get("end", "red circle"), 44 | road=visual_description.get("road", "white path"), 45 | ) 46 | 47 | -------------------------------------------------------------------------------- /games/maze/templates/player_position.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import List, Set 4 | 5 | from .. import constants 6 | from ..utils import maze_utils 7 | from .base_template import BaseTemplate 8 | 9 | 10 | class PlayerPosition(BaseTemplate): 11 | def __init__(self, maze: List[List[int]], image_id: int) -> None: 12 | super().__init__(maze, image_id) 13 | 14 | self.qa_type = "StateInfo" 15 | self.question_id = 1 16 | self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}" 17 | self.question_description = "Ask for the position of player." 18 | self.qa_level = "Easy" 19 | self.question += "Which of the following are the coordinates of the player?\n\n**Options:**" 20 | 21 | row, col = maze_utils.find_position(maze, constants.PLAYER_CELL) 22 | answer_str = f"({row}, {col})" 23 | 24 | choices: Set[str] = { 25 | answer_str, 26 | f"({row + 1}, {col})", 27 | f"({row - 1}, {col})", 28 | f"({row}, {col + 1})", 29 | f"({row}, {col - 1})", 30 | } 31 | 32 | option_list = sorted(choices) 33 | self.options = [] 34 | label_code = ord("A") 35 | for entry in option_list: 36 | label = chr(label_code) 37 | self.options.append(f"{label}. {entry}") 38 | if entry == answer_str: 39 | self.answer = label 40 | label_code += 1 41 | 42 | for option in self.options: 43 | self.question += f"\n{option}" 44 | 45 | self.analysis = ( 46 | "Take a look at the game screen, the red circle represents the player.\n" 47 | f"The coordinates of player are {answer_str}, so the right option is {self.answer}" 48 | ) 49 | -------------------------------------------------------------------------------- /games/maze/templates/goal_position.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import List, Set 4 | 5 | from .. import constants 6 | from ..utils import maze_utils 7 | from .base_template import BaseTemplate 8 | 9 | 10 | class GoalPosition(BaseTemplate): 11 | def __init__(self, maze: List[List[int]], image_id: int) -> None: 12 | super().__init__(maze, image_id) 13 | 14 | self.qa_type = "StateInfo" 15 | self.question_id = 2 16 | self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}" 17 | self.question_description = "Ask for the position of goal within the maze." 18 | self.qa_level = "Easy" 19 | self.question += "Which of the following are the coordinates of the goal?\n\n**Options:**" 20 | 21 | row, col = maze_utils.find_position(maze, constants.GOAL_CELL) 22 | answer_str = f"({row}, {col})" 23 | 24 | choices: Set[str] = { 25 | answer_str, 26 | f"({row + 1}, {col})", 27 | f"({row - 1}, {col})", 28 | f"({row}, {col + 1})", 29 | f"({row}, {col - 1})", 30 | } 31 | 32 | option_list = sorted(choices) 33 | self.options = [] 34 | label_code = ord("A") 35 | for entry in option_list: 36 | label = chr(label_code) 37 | self.options.append(f"{label}. {entry}") 38 | if entry == answer_str: 39 | self.answer = label 40 | label_code += 1 41 | 42 | for option in self.options: 43 | self.question += f"\n{option}" 44 | 45 | self.analysis = ( 46 | "Take a look at the game screen, the green block represents the goal.\n" 47 | f"The coordinates of goal are {answer_str}, so the right option is {self.answer}" 48 | ) 49 | -------------------------------------------------------------------------------- /games/pathfinder/constants.py: -------------------------------------------------------------------------------- 1 | """Constants for PathFinder game.""" 2 | 3 | # 难度配置 - 通过图片尺寸、道路宽度、节点密度、支路数量区分难度 4 | DIFFICULTY_CONFIG = { 5 | 'easy': { 6 | 'image_size': 1024, # 小图 7 | 'road_width': 60, # 宽道路 8 | 'node_spacing_ratio': 0.3, # 节点间距占图片尺寸的比例(18% = 稀疏) 9 | 'extra_paths': 1, # 额外支路数量(少) 10 | 'min_solution_nodes': 4, # 解决方案最少节点数(短路径) 11 | 'connectivity_ratio': 0.15, # 连通率(0-1):0.15 = 稀疏道路,看起来像真实道路 12 | }, 13 | 'medium': { 14 | 'image_size': 1024, # 小图 15 | 'road_width': 50, # 宽道路 16 | 'node_spacing_ratio': 0.2, # 节点间距占图片尺寸的比例(18% = 稀疏) 17 | 'extra_paths': 2, # 额外支路数量(少) 18 | 'min_solution_nodes': 6, # 解决方案最少节点数(短路径) 19 | 'connectivity_ratio': 0.2, # 连通率:0.2 = 中等密度 20 | }, 21 | 'hard': { 22 | 'image_size': 1024, # 大图 23 | 'road_width': 36, # 窄道路 24 | 'node_spacing_ratio': 0.15, # 节点间距占图片尺寸的比例(12% = 密集) 25 | 'extra_paths': 3, # 额外支路数量(多) 26 | 'min_solution_nodes': 7, # 解决方案最少节点数(长路径) 27 | 'connectivity_ratio': 0.25, # 连通率:0.25 = 较密集(但仍然像道路) 28 | } 29 | } 30 | 31 | # 渲染配置 32 | DEFAULT_IMAGE_SIZE = 500 # 默认图片尺寸(如果不指定难度) 33 | ROAD_WIDTH = 35 # 道路宽度(更细) 34 | NODE_RADIUS = 20 # 起点/终点半径 35 | START_COLOR = (255, 0, 0) # 起点颜色(红色) 36 | END_COLOR = (0, 255, 0) # 终点颜色(绿色) 37 | ROAD_COLOR = (255, 255, 255) # 道路颜色(白色) 38 | BG_COLOR = (0, 0, 0) # 背景颜色(黑色) 39 | 40 | # 曲线配置 41 | CURVE_SEGMENTS = 400 # 曲线分段数(更平滑) 42 | CURVE_CONTROL_POINTS = 3 # 每条曲线的控制点数量 43 | CURVE_BEND_FACTOR = 0.25 # 曲线弯曲程度 44 | 45 | # 边界留白 46 | MARGIN = 80 47 | 48 | # 视频配置 49 | FRAMES_PER_SECOND = 24 # 帧率(与其他游戏保持一致) 50 | MOVEMENT_SPEED = 1.0 # 移动速度(像素/帧) 51 | 52 | -------------------------------------------------------------------------------- /AutoEnv/autoenv/pipeline/visual/prompt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Maze Mode Skin Generation Prompts 3 | """ 4 | 5 | # Style consistency prompt for image-to-image generation 6 | STYLE_CONSISTENT_PROMPT = """Above is the style reference image. Generate a new asset matching this exact visual style. 7 | 8 | {base_prompt} 9 | 10 | CRITICAL REQUIREMENTS: 11 | 1. Match the art style, color palette, and rendering technique of the reference image 12 | 2. The new asset MUST look like it comes from the SAME GAME as the reference 13 | 3. PIXEL ART STYLE: Use retro pixel art aesthetic with appropriate detail: 14 | - Moderate pixel granularity (16x16 to 32x32 pixel level) 15 | - Include texture details, shading, and depth layers 16 | - Clear pixel borders and defined edges 17 | - Vintage game visual style with appropriate level of detail 18 | - NOT overly simplified - maintain texture richness 19 | 4. STRONG VISUAL DISTINCTION: This asset must be HIGHLY DISTINGUISHABLE from other game elements: 20 | - Use CONTRASTING colors (different hue, saturation, or brightness) 21 | - Use DISTINCT shapes and visual patterns 22 | - Ensure HIGH CONTRAST and CLEAR VISUAL IDENTITY 23 | - Make it instantly recognizable at a glance 24 | 5. WALL TILE REQUIREMENTS (for wall assets only): 25 | - Wall tiles MUST be COMPLETELY FILLED squares with NO empty or transparent areas 26 | - Wall MUST cover the ENTIRE tile area from edge to edge 27 | - NO irregular shapes, peaks, or protrusions extending beyond the square boundary 28 | - NO gaps, holes, or partial coverage in wall tiles 29 | - Wall and floor MUST have DISTINCTLY DIFFERENT visual appearance (different colors, textures, or patterns) 30 | - Wall should be clearly recognizable as an impassable barrier 31 | 6. Balance: Maintain thematic coherence with the reference while ensuring strong visual differentiation and pixel art aesthetics 32 | """ -------------------------------------------------------------------------------- /prompts/videomodel_maze_prompt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Maze 游戏的视频模型 prompt 模板。 4 | 5 | 占位符: {player}, {goal}, {wall}, {floor} 6 | 从 description.json 的 visual_description 中读取。 7 | """ 8 | from string import Template 9 | 10 | MAZE_PROMPT_TEMPLATE = Template("""Create a 2D animation based on the provided image of a maze. The $player slides smoothly along the $floor path, stopping perfectly on the $goal. The $player never slides or crosses into the $wall areas of the maze. The camera is a static, top-down view showing the entire maze. 11 | 12 | Maze: 13 | The maze paths are $floor, the walls are $wall. 14 | The $player moves to the goal position, represented by $goal. 15 | The $player slides smoothly along the $floor path. 16 | The $player never slides or crosses into the $wall areas of the maze. 17 | The $player stops perfectly on the $goal. 18 | 19 | Scene: 20 | No change in scene composition. 21 | No change in the layout of the maze. 22 | The $player travels along the $floor path without speeding up or slowing down. 23 | 24 | Camera: 25 | Static camera. 26 | No zoom. 27 | No pan. 28 | No glitches, noise, or artifacts.""") 29 | 30 | 31 | def get_maze_prompt(visual_description: dict) -> str: 32 | """ 33 | 生成 maze 游戏的动态 prompt。 34 | 35 | Args: 36 | visual_description: 来自 description.json 的 visual_description 字段 37 | - player: 玩家描述 (如 "red circle") 38 | - goal: 目标描述 (如 "green square") 39 | - wall: 墙壁描述 (如 "light blue square") 40 | - floor: 地板描述 (如 "white square") 41 | """ 42 | return MAZE_PROMPT_TEMPLATE.substitute( 43 | player=visual_description.get("player", "red circle"), 44 | goal=visual_description.get("goal", "green square"), 45 | wall=visual_description.get("wall", "blue"), 46 | floor=visual_description.get("floor", "white"), 47 | ) 48 | 49 | -------------------------------------------------------------------------------- /core/schema/entity.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Tuple, Dict, Optional 3 | from .position import Position, BBox 4 | 5 | 6 | @dataclass 7 | class Entity: 8 | pixel_pos: Tuple[int, int] 9 | bbox: BBox 10 | grid_pos: Optional[Position] = None 11 | 12 | def to_dict(self) -> Dict: 13 | result = { 14 | "pixel_pos": {"x": self.pixel_pos[0], "y": self.pixel_pos[1]}, 15 | "bbox": self.bbox.to_dict() 16 | } 17 | 18 | if self.grid_pos is not None: 19 | result["grid_pos"] = self.grid_pos.to_dict() 20 | 21 | return result 22 | 23 | @classmethod 24 | def from_dict(cls, data: Dict) -> 'Entity': 25 | pixel_data = data["pixel_pos"] 26 | grid_pos = Position.from_dict(data["grid_pos"]) if "grid_pos" in data else None 27 | 28 | return cls( 29 | pixel_pos=(pixel_data["x"], pixel_data["y"]), 30 | bbox=BBox.from_dict(data["bbox"]), 31 | grid_pos=grid_pos 32 | ) 33 | 34 | @classmethod 35 | def from_grid_pos(cls, row: int, col: int, cell_size: int) -> 'Entity': 36 | pixel_x = col * cell_size + cell_size // 2 37 | pixel_y = row * cell_size + cell_size // 2 38 | 39 | return cls( 40 | pixel_pos=(pixel_x, pixel_y), 41 | bbox=BBox.from_grid_pos(row, col, cell_size), 42 | grid_pos=Position(row=row, col=col) 43 | ) 44 | 45 | @classmethod 46 | def from_pixel_pos(cls, x: int, y: int, bbox_size: int) -> 'Entity': 47 | """从像素坐标创建 Entity(用于非网格游戏)""" 48 | return cls( 49 | pixel_pos=(x, y), 50 | bbox=BBox( 51 | x=x - bbox_size // 2, 52 | y=y - bbox_size // 2, 53 | width=bbox_size, 54 | height=bbox_size 55 | ), 56 | grid_pos=None 57 | ) 58 | 59 | -------------------------------------------------------------------------------- /prompts/videomodel_trapfield_prompt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Trapfield 游戏的视频模型 prompt 模板。 4 | 5 | 占位符: {player}, {goal}, {trap}, {floor} 6 | 从 description.json 的 visual_description 中读取。 7 | """ 8 | from string import Template 9 | 10 | TRAPFIELD_PROMPT_TEMPLATE = Template("""Create a 2D animation based on the provided image of a maze. The $player slides smoothly along the $floor path, stopping perfectly on the $goal. The $player never slides into or crosses the $trap (trap areas). The camera is a static, top-down view showing the entire maze. 11 | 12 | Maze: 13 | The maze paths are $floor, and the trap areas are $trap. 14 | The $player moves to the goal position, represented by the $goal. 15 | The $player slides smoothly along the $floor path. 16 | The $player never slides into or crosses the $trap of the maze. 17 | The $player stops perfectly on the $goal. 18 | 19 | Scene: 20 | No change in scene composition. 21 | No change in the layout of the maze. 22 | The $player travels along the $floor path without speeding up or slowing down. 23 | 24 | Camera: 25 | Static camera. 26 | No zoom. 27 | No pan. 28 | No glitches, noise, or artifacts.""") 29 | 30 | 31 | def get_trapfield_prompt(visual_description: dict) -> str: 32 | """ 33 | 生成 trapfield 游戏的动态 prompt。 34 | 35 | Args: 36 | visual_description: 来自 description.json 的 visual_description 字段 37 | - player: 玩家描述 (如 "blue circle") 38 | - goal: 目标描述 (如 "green circle") 39 | - trap: 陷阱描述 (如 "red x") 40 | - floor: 地板描述 (如 "white square") 41 | """ 42 | return TRAPFIELD_PROMPT_TEMPLATE.substitute( 43 | player=visual_description.get("player", "blue circle"), 44 | goal=visual_description.get("goal", "green circle"), 45 | trap=visual_description.get("trap", "red cross"), 46 | floor=visual_description.get("floor", "gray path"), 47 | ) 48 | 49 | -------------------------------------------------------------------------------- /AutoEnv/base/pipeline/base_pipeline.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from base.pipeline.base_node import BaseNode, NodeContext 8 | 9 | 10 | class BasePipeline(BaseModel): 11 | """DAG Pipeline. Executes nodes in parallel by level, sharing NodeContext.""" 12 | 13 | root: BaseNode = Field(...) 14 | 15 | model_config = {"arbitrary_types_allowed": True} 16 | 17 | def _collect_nodes(self) -> list[BaseNode]: 18 | """Collect all nodes from root via DFS.""" 19 | visited: set[str] = set() 20 | nodes: list[BaseNode] = [] 21 | 22 | def dfs(node: BaseNode) -> None: 23 | if node.node_id in visited: 24 | return 25 | visited.add(node.node_id) 26 | nodes.append(node) 27 | for s in node.successors: 28 | dfs(s) 29 | 30 | dfs(self.root) 31 | return nodes 32 | 33 | async def run(self, ctx: NodeContext | None = None) -> NodeContext: 34 | """Execute nodes in parallel by level. All nodes share ctx.""" 35 | if ctx is None: 36 | ctx = NodeContext() 37 | 38 | nodes = self._collect_nodes() 39 | node_map = {n.node_id: n for n in nodes} 40 | in_degree = {n.node_id: len(n.predecessors) for n in nodes} 41 | executed: set[str] = set() 42 | 43 | while len(executed) < len(nodes): 44 | ready = [nid for nid, deg in in_degree.items() if deg == 0 and nid not in executed] 45 | if not ready: 46 | raise ValueError("Cycle detected in DAG") 47 | 48 | await asyncio.gather(*[node_map[nid].execute(ctx) for nid in ready]) 49 | 50 | for node_id in ready: 51 | executed.add(node_id) 52 | for s in node_map[node_id].successors: 53 | in_degree[s.node_id] -= 1 54 | 55 | return ctx 56 | -------------------------------------------------------------------------------- /prompts/videomodel_sokoban_prompt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Sokoban 游戏的视频模型 prompt 模板。 4 | 5 | 占位符: {player}, {box}, {goal}, {wall}, {floor} 6 | 从 description.json 的 visual_description 中读取。 7 | """ 8 | from string import Template 9 | 10 | SOKOBAN_PROMPT_TEMPLATE = Template("""Create a 2D animation based on the provided image of a grid puzzle. 11 | The $player moves into position behind the $box and smoothly pushes it toward the $goal. 12 | The $box only slides when pushed from behind by the $player and moves in a straight line along the $floor tiles. 13 | When the direction of the $box's movement needs to change, the $player must reposition itself to a new side of the $box. 14 | The $box never crosses or overlaps any $wall. 15 | 16 | Gameplay Rules: 17 | The floor area is $floor, and the walls are $wall. 18 | The $box can only move when pushed by the $player from behind. 19 | The $player cannot pull the $box or move through walls. 20 | The $box slides smoothly in one direction until it reaches the $goal. 21 | The animation stops perfectly when the $box aligns with the $goal. 22 | 23 | Scene: 24 | No change in grid layout or tile design. 25 | The camera remains static, showing the entire play area. 26 | The movement is smooth, with no speed variation, camera shake, or visual artifacts.""") 27 | 28 | 29 | def get_sokoban_prompt(visual_description: dict) -> str: 30 | """ 31 | 生成 sokoban 游戏的动态 prompt。 32 | 33 | Args: 34 | visual_description: 来自 description.json 的 visual_description 字段 35 | - player: 玩家描述 (如 "blue circle") 36 | - box: 箱子描述 (如 "yellow square") 37 | - goal: 目标描述 (如 "pink square") 38 | - wall: 墙壁描述 (如 "gray square") 39 | - floor: 地板描述 (如 "white square") 40 | """ 41 | return SOKOBAN_PROMPT_TEMPLATE.substitute( 42 | player=visual_description.get("player", "blue ball"), 43 | box=visual_description.get("box", "yellow square"), 44 | goal=visual_description.get("goal", "red square"), 45 | wall=visual_description.get("wall", "gray wall"), 46 | floor=visual_description.get("floor", "white floor"), 47 | ) 48 | 49 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/prompts/maze3d_prompt.py: -------------------------------------------------------------------------------- 1 | # 动态模板 - 支持从皮肤 description.json 替换占位符 2 | # 占位符: {start_cube}, {goal_cube}, {default_cube}, {ball} 3 | MAZE3D_SYSTEM_PROMPT_TEMPLATE = """You are given an image of a 3D maze composed of \ 4 | {default_cube}s that represent walkable platforms suspended in space. 5 | Each cube represents a solid tile that the ball can stand on or move across. 6 | The {ball} represents the starting point. 7 | The {goal_cube} represents the goal or destination. 8 | The {start_cube} represents the initial platform where the ball begins. 9 | 10 | Task: 11 | Infer the shortest valid 3D path for the {ball} to move from its \ 12 | starting position to the {goal_cube}. 13 | 14 | Movement Rules: 15 | - Horizontal movements (forward_left, forward_right, backward_left, backward_right): \ 16 | Each move spans 2 grid units horizontally. 17 | - Vertical movements (up, down): Each move spans 3 grid units vertically via a ladder. \ 18 | The ladder must be present at the starting position. 19 | - The sphere cannot move through empty space or overlap any cube structure. 20 | - All movements must follow valid cube surfaces and ladder connections. 21 | 22 | The six valid directions of movement are: 23 | "forward_left" – move diagonally forward and to the left (2 units) within the same layer 24 | "forward_right" – move diagonally forward and to the right (2 units) within the same layer 25 | "backward_left" – move diagonally backward and to the left (2 units) within the same layer 26 | "backward_right" – move diagonally backward and to the right (2 units) within the same layer 27 | "up" – move vertically upward (3 units) via a ladder 28 | "down" – move vertically downward (3 units) via a ladder 29 | 30 | Output Format: 31 | Return the full sequence of movement directions as a JSON array, where each \ 32 | step is one of the six valid directions. 33 | Do not include any explanations, reasoning, or extra text. 34 | 35 | Example of expected output: 36 | {{{{ 37 | "path": ["up", "forward_right", "forward_left", "up", "forward_right"] 38 | }}}} 39 | """ 40 | 41 | 42 | MAZE3D_USER_PROMPT_TEMPLATE = """Infer the shortest valid 3D path for the {ball} \ 43 | to move from its starting position to the {goal_cube}. 44 | """ 45 | 46 | -------------------------------------------------------------------------------- /prompts/videomodel_maze3d_prompt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Maze3D 游戏的视频模型 prompt 模板。 4 | 5 | 占位符: {ball}, {start_cube}, {goal_cube}, {default_cube} 6 | 从 description.json 的 visual_description 中读取。 7 | """ 8 | from string import Template 9 | 10 | MAZE3D_PROMPT_TEMPLATE = Template("""Create a 3D animation based on the provided image of a cube maze. A $ball slides smoothly along the $default_cube pathway, climbs up the vertical ladders step by step, and finally stops perfectly on the $goal_cube at the top. The $ball never touches or passes through the $start_cube or any non-$default_cube areas of the maze. The camera remains static in an isometric, top-down angle showing the entire structure. 11 | 12 | Maze: 13 | The maze consists of stacked transparent $default_cube forming a 3D pathway. 14 | The $goal_cube represents the goal position. 15 | The $start_cube marks the starting platform where the $ball begins. 16 | The $ball moves upward along the $default_cube path, climbing vertically via the ladders. 17 | The ball slides smoothly without sudden changes in direction or speed. 18 | The ball stops exactly on top of the $goal_cube at the end. 19 | 20 | Scene: 21 | No structural or color changes during animation. 22 | The maze layout and cube arrangement remain unchanged. 23 | The $ball moves continuously at a constant speed along the 3D path. 24 | 25 | Camera: 26 | Static, isometric camera view. 27 | No zoom or pan. 28 | Smooth animation without flicker, noise, or artifacts.""") 29 | 30 | 31 | def get_maze3d_prompt(visual_description: dict) -> str: 32 | """ 33 | 生成 maze3d 游戏的动态 prompt。 34 | 35 | Args: 36 | visual_description: 来自 description.json 的 visual_description 字段 37 | - ball: 球的描述 (如 "golden ball with orange edge") 38 | - start_cube: 起点方块描述 (如 "blue cube") 39 | - goal_cube: 目标方块描述 (如 "red cube") 40 | - default_cube: 默认路径方块描述 (如 "gray cube") 41 | """ 42 | return MAZE3D_PROMPT_TEMPLATE.substitute( 43 | ball=visual_description.get("ball", "yellow ball"), 44 | start_cube=visual_description.get("start_cube", "blue cube"), 45 | goal_cube=visual_description.get("goal_cube", "red cube"), 46 | default_cube=visual_description.get("default_cube", "gray cube"), 47 | ) 48 | 49 | -------------------------------------------------------------------------------- /core/schema/position.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Tuple, Dict, List 3 | 4 | 5 | @dataclass 6 | class Position: 7 | row: int 8 | col: int 9 | 10 | def to_dict(self) -> Dict[str, int]: 11 | return {"row": self.row, "col": self.col} 12 | 13 | def to_list(self) -> List[int]: 14 | return [self.row, self.col] 15 | 16 | @classmethod 17 | def from_dict(cls, data: Dict[str, int]) -> 'Position': 18 | if "row" in data: 19 | return cls(row=data["row"], col=data["col"]) 20 | elif "y" in data: 21 | return cls(row=data["y"], col=data["x"]) 22 | raise ValueError(f"Unknown position format: {data}") 23 | 24 | @classmethod 25 | def from_list(cls, data: List[int]) -> 'Position': 26 | return cls(row=data[0], col=data[1]) 27 | 28 | 29 | @dataclass 30 | class BBox: 31 | x: int 32 | y: int 33 | width: int 34 | height: int 35 | 36 | @property 37 | def center(self) -> Tuple[int, int]: 38 | return (self.x + self.width // 2, self.y + self.height // 2) 39 | 40 | @property 41 | def center_x(self) -> int: 42 | return self.x + self.width // 2 43 | 44 | @property 45 | def center_y(self) -> int: 46 | return self.y + self.height // 2 47 | 48 | def to_dict(self) -> Dict[str, int]: 49 | return { 50 | "x": self.x, 51 | "y": self.y, 52 | "width": self.width, 53 | "height": self.height, 54 | "center_x": self.center_x, 55 | "center_y": self.center_y 56 | } 57 | 58 | def to_tuple(self) -> Tuple[int, int, int, int]: 59 | return (self.x, self.y, self.width, self.height) 60 | 61 | @classmethod 62 | def from_dict(cls, data: Dict[str, int]) -> 'BBox': 63 | return cls( 64 | x=data["x"], 65 | y=data["y"], 66 | width=data["width"], 67 | height=data["height"] 68 | ) 69 | 70 | @classmethod 71 | def from_grid_pos(cls, row: int, col: int, cell_size: int) -> 'BBox': 72 | return cls( 73 | x=col * cell_size, 74 | y=row * cell_size, 75 | width=cell_size, 76 | height=cell_size 77 | ) 78 | 79 | -------------------------------------------------------------------------------- /games/maze/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import sys 4 | from pathlib import Path 5 | from typing import List, Optional 6 | 7 | try: 8 | from . import constants 9 | from .generators import data_gen 10 | from .templates.base_template import BaseTemplate 11 | from .utils import file_utils 12 | except ImportError: 13 | import constants 14 | from generators import data_gen 15 | from templates.base_template import BaseTemplate 16 | from utils import file_utils 17 | 18 | # Configure output directory and maze counts here 19 | DEFAULT_OUTPUT_DIR = "maze_dataset_py" 20 | DEFAULT_COUNTS = { 21 | 9: 1, # number of 9x9 mazes 22 | 11: 1, # number of 11x11 mazes 23 | 13: 1, # number of 13x13 mazes 24 | } 25 | 26 | 27 | def main(assets_folder: Optional[str] = None) -> int: 28 | counts: List[int] = [DEFAULT_COUNTS.get(size, 0) for size in constants.ALLOWED_SIZES] 29 | if any(count < 0 for count in counts): 30 | raise ValueError("Counts must be non-negative integers") 31 | 32 | output_dir = Path(DEFAULT_OUTPUT_DIR) 33 | images_dir = output_dir / constants.IMAGES_DIR 34 | states_dir = output_dir / constants.STATES_DIR 35 | video_dir = output_dir / constants.VIDEOS_DIR 36 | data_file = output_dir / constants.DATA_PATH 37 | 38 | file_utils.setup_output_directories( 39 | str(output_dir), str(images_dir), str(states_dir), str(video_dir) 40 | ) 41 | 42 | start_id = 0 43 | templates: List[BaseTemplate] = [] 44 | 45 | for size, label, count in zip(constants.ALLOWED_SIZES, constants.SIZE_LABELS, counts): 46 | if count <= 0: 47 | continue 48 | print(f"Generating {count} {label} mazes...") 49 | templates.extend( 50 | data_gen.generate_data( 51 | start_id, count, size, str(images_dir), str(states_dir), str(video_dir), 52 | assets_folder=assets_folder 53 | ) 54 | ) 55 | start_id += count 56 | 57 | data_gen.save_data_to_json(templates, str(data_file)) 58 | 59 | print(f"Data generation completed. Output directory: {output_dir}") 60 | return 0 61 | 62 | 63 | if __name__ == "__main__": 64 | assets_folder = sys.argv[1] if len(sys.argv) > 1 else None 65 | raise SystemExit(main(assets_folder)) 66 | -------------------------------------------------------------------------------- /games/maze/templates/base_template.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any, Dict, List, Optional, Sequence 4 | 5 | from .. import constants 6 | 7 | _BASE_RULES = ( 8 | "**Rules:**\n" 9 | "1. This is a maze mini-game.The player needs to navigate around obstacles to reach the destination and achieve victory.\n" 10 | "2. The red circle represents the player, the green block is the goal and the blue blocks are obstacles.\n" 11 | "3. The player can only move within the white blocks.\n" 12 | "4. The coordinates are given in the format (row, col), where row represents the vertical position and col represents the horizontal position.\n\n" 13 | "**Question:** " 14 | ) 15 | 16 | 17 | class BaseTemplate: 18 | data_id: str 19 | qa_type: str 20 | question_id: int 21 | question_description: str 22 | image: str 23 | state: str 24 | plot_level: str 25 | qa_level: str 26 | question: str 27 | answer: str 28 | options: Optional[List[str]] 29 | analysis: str 30 | 31 | def __init__(self, maze: Sequence[Sequence[int]], image_id: int) -> None: 32 | self.image = f"{constants.IMAGES_DIR}/image_{image_id:05d}.png" 33 | self.state = f"{constants.STATES_DIR}/state_{image_id:05d}.json" 34 | self.plot_level = constants.PLOT_LEVELS.get(len(maze), "Unknown") 35 | self.question = _BASE_RULES 36 | self.answer = "" 37 | self.analysis = "" 38 | self.options: Optional[List[str]] = None 39 | 40 | def to_dict(self) -> Dict[str, Any]: 41 | payload: Dict[str, Any] = { 42 | "data_id": getattr(self, "data_id", ""), 43 | "qa_type": getattr(self, "qa_type", ""), 44 | "question_id": getattr(self, "question_id", None), 45 | "question_description": getattr(self, "question_description", ""), 46 | "image": self.image, 47 | "state": self.state, 48 | "plot_level": getattr(self, "plot_level", ""), 49 | "qa_level": getattr(self, "qa_level", ""), 50 | "question": getattr(self, "question", ""), 51 | "answer": getattr(self, "answer", ""), 52 | "options": getattr(self, "options", None), 53 | "analysis": getattr(self, "analysis", ""), 54 | } 55 | return payload 56 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/vlm_client.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import os 3 | import logging 4 | from openai import OpenAI 5 | 6 | 7 | class VLMClient: 8 | """API模型客户端""" 9 | def __init__(self, model: str = "gpt-4o", api_key: str = None, base_url: str = None, max_tokens: int = 10000, temperature: float = 0.0): 10 | self.model = model 11 | self.maxtokens = max_tokens 12 | self.temperature = temperature 13 | self.model_type = "api" 14 | self.client = OpenAI( 15 | api_key=api_key or os.getenv("OPENAI_API_KEY"), 16 | base_url=base_url 17 | ) 18 | 19 | def query(self, system_prompt: str, user_prompt: str, image_path: str = None) -> str: 20 | try: 21 | messages = [ 22 | { 23 | "role": "system", 24 | "content": system_prompt 25 | } 26 | ] 27 | 28 | if image_path: 29 | with open(image_path, "rb") as f: 30 | image_data = base64.b64encode(f.read()).decode("utf-8") 31 | 32 | user_content = [ 33 | {"type": "text", "text": user_prompt}, 34 | { 35 | "type": "image_url", 36 | "image_url": { 37 | "url": f"data:image/png;base64,{image_data}" 38 | } 39 | } 40 | ] 41 | else: 42 | user_content = user_prompt 43 | 44 | messages.append({ 45 | "role": "user", 46 | "content": user_content 47 | }) 48 | 49 | response = self.client.chat.completions.create( 50 | model=self.model, 51 | messages=messages, 52 | max_tokens=self.maxtokens, 53 | temperature=self.temperature 54 | ) 55 | 56 | print(response.choices[0].message.content) 57 | 58 | return response.choices[0].message.content 59 | 60 | except FileNotFoundError as e: 61 | logging.error(f"Image file not found: {image_path}") 62 | raise 63 | except Exception as e: 64 | logging.error(f"VLM API call failed: {type(e).__name__}: {e}") 65 | raise 66 | 67 | 68 | -------------------------------------------------------------------------------- /games/maze3d/color_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3D Maze 颜色处理器 3 | 负责从皮肤目录加载颜色配置 4 | """ 5 | 6 | import json 7 | import logging 8 | from pathlib import Path 9 | from typing import Dict, Optional 10 | 11 | 12 | # 必需的颜色键 13 | REQUIRED_COLOR_KEYS = ['start_pos', 'goal_pos', 'default_cube', 'ball', 'ball_edge'] 14 | 15 | 16 | def load_colors_from_skin(skin_folder: str) -> Dict[str, str]: 17 | """ 18 | 从皮肤目录加载颜色配置 19 | 20 | Args: 21 | skin_folder: 皮肤目录路径 22 | 23 | Returns: 24 | 颜色配置字典 25 | 26 | Raises: 27 | FileNotFoundError: 皮肤目录或 colors.json 不存在 28 | ValueError: colors.json 格式错误或缺少必需的颜色键 29 | """ 30 | if not skin_folder: 31 | raise FileNotFoundError("No skin folder specified") 32 | 33 | skin_path = Path(skin_folder) 34 | if not skin_path.exists(): 35 | raise FileNotFoundError(f"Skin folder not found: {skin_folder}") 36 | 37 | colors_path = skin_path / 'colors.json' 38 | 39 | if not colors_path.exists(): 40 | raise FileNotFoundError(f"colors.json not found in {skin_folder}") 41 | 42 | try: 43 | with open(colors_path, 'r', encoding='utf-8') as f: 44 | colors = json.load(f) 45 | except json.JSONDecodeError as e: 46 | raise ValueError(f"Failed to parse colors.json in {skin_folder}: {e}") 47 | 48 | missing_keys = [k for k in REQUIRED_COLOR_KEYS if k not in colors] 49 | if missing_keys: 50 | raise ValueError(f"Missing required color keys {missing_keys} in {colors_path}") 51 | 52 | logging.debug(f"Loaded colors from {colors_path}") 53 | return colors 54 | 55 | 56 | def load_skin_description(skin_folder: str) -> Optional[Dict[str, str]]: 57 | """ 58 | 从皮肤目录加载视觉描述 59 | 60 | Args: 61 | skin_folder: 皮肤目录路径 62 | 63 | Returns: 64 | 视觉描述字典,如果加载失败则返回 None 65 | """ 66 | if not skin_folder: 67 | return None 68 | 69 | desc_path = Path(skin_folder) / 'description.json' 70 | 71 | if not desc_path.exists(): 72 | return None 73 | 74 | try: 75 | with open(desc_path, 'r', encoding='utf-8') as f: 76 | data = json.load(f) 77 | 78 | return data.get('visual_description') 79 | 80 | except Exception as e: 81 | logging.error(f"Failed to load description from {skin_folder}: {e}") 82 | return None 83 | 84 | 85 | -------------------------------------------------------------------------------- /games/maze/templates/find_path_to_goal.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import random 4 | from typing import List, Set 5 | 6 | from ..utils import maze_utils 7 | from .base_template import BaseTemplate 8 | 9 | 10 | class FindPathToGoal(BaseTemplate): 11 | def __init__(self, maze: List[List[int]], image_id: int) -> None: 12 | super().__init__(maze, image_id) 13 | 14 | self.question_id = 3 15 | self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}" 16 | self.qa_type = "TransitionPath" 17 | self.question_description = "Find the path to the goal" 18 | self.qa_level = "Medium" 19 | self.question += "Which sequence of movements will allow the player to reach the destination?\n\n**Options:**" 20 | 21 | solver_rng = random.Random(image_id) 22 | info: List[str] = [] 23 | path = maze_utils.dfs_solve_maze(maze, info, rng=solver_rng) 24 | actions = _path_to_actions(path) 25 | answer_str = ", ".join(actions) 26 | 27 | variant_rng = random.Random(image_id + 1) 28 | variants: Set[str] = {answer_str} 29 | for _ in range(4): 30 | variants.add(_random_path(len(actions), variant_rng)) 31 | 32 | option_list = sorted(variants) 33 | self.options = [] 34 | label_code = ord("A") 35 | for entry in option_list: 36 | label = chr(label_code) 37 | self.options.append(f"{label}. {entry}") 38 | if entry == answer_str: 39 | self.answer = label 40 | label_code += 1 41 | 42 | for option in self.options: 43 | self.question += f"\n{option}" 44 | 45 | self.analysis = "".join(info) 46 | self.analysis += ( 47 | f"\n\nTherefore, the right sequence of movements are: {answer_str}\n" 48 | f"The right option is {self.answer}" 49 | ) 50 | 51 | 52 | def _path_to_actions(path: List[maze_utils.Coordinate]) -> List[str]: 53 | actions: List[str] = [] 54 | for index in range(1, len(path)): 55 | actions.append(maze_utils.get_direction(path[index - 1], path[index])) 56 | return actions 57 | 58 | 59 | def _random_path(length: int, rng: random.Random) -> str: 60 | directions = ["up", "down", "left", "right"] 61 | sequence = [rng.choice(directions) for _ in range(max(1, length))] 62 | return ", ".join(sequence) 63 | -------------------------------------------------------------------------------- /games/pathfinder/board.py: -------------------------------------------------------------------------------- 1 | """ 2 | PathFinder 游戏板 - 基于曲线路径 3 | """ 4 | 5 | from typing import List, Tuple 6 | 7 | 8 | class PathSegment: 9 | """路径段""" 10 | 11 | def __init__(self, control_points: List[Tuple[float, float]]): 12 | self.control_points = control_points 13 | 14 | def get_start(self) -> Tuple[float, float]: 15 | return self.control_points[0] 16 | 17 | def get_end(self) -> Tuple[float, float]: 18 | return self.control_points[-1] 19 | 20 | 21 | class PathFinderBoard: 22 | """PathFinder 游戏板""" 23 | 24 | def __init__( 25 | self, 26 | segments: List[PathSegment], 27 | start_point: Tuple[float, float], 28 | end_point: Tuple[float, float], 29 | solution_segments: List[int], 30 | solution_path: List[Tuple[float, float]] = None, # 新增:解决方案的节点路径 31 | image_size: int = 800, 32 | road_width: int = 35 # 新增:道路宽度 33 | ): 34 | self.segments = segments 35 | self.start_point = start_point 36 | self.end_point = end_point 37 | self.solution_segments = solution_segments 38 | self.solution_path = solution_path or [] # 节点序列 39 | self.image_size = image_size 40 | self.road_width = road_width # 保存道路宽度 41 | 42 | def is_solvable(self) -> bool: 43 | return len(self.solution_segments) > 0 44 | 45 | def to_dict(self) -> dict: 46 | return { 47 | 'segments': [[pt for pt in seg.control_points] for seg in self.segments], 48 | 'start_point': list(self.start_point), 49 | 'end_point': list(self.end_point), 50 | 'solution_segments': self.solution_segments, 51 | 'solution_path': [list(pt) for pt in self.solution_path], 52 | 'image_size': self.image_size, 53 | 'road_width': self.road_width 54 | } 55 | 56 | @classmethod 57 | def from_dict(cls, data: dict) -> 'PathFinderBoard': 58 | segments = [PathSegment(points) for points in data['segments']] 59 | return cls( 60 | segments=segments, 61 | start_point=tuple(data['start_point']), 62 | end_point=tuple(data['end_point']), 63 | solution_segments=data['solution_segments'], 64 | solution_path=[tuple(pt) for pt in data.get('solution_path', [])], 65 | image_size=data.get('image_size', 800), 66 | road_width=data.get('road_width', 35) 67 | ) 68 | -------------------------------------------------------------------------------- /games/maze/templates/position_after_moving.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import random 4 | from typing import List, Set 5 | 6 | from .. import constants 7 | from ..utils import maze_utils 8 | from .base_template import BaseTemplate 9 | 10 | 11 | class PositionAfterMoving(BaseTemplate): 12 | def __init__(self, maze: List[List[int]], image_id: int) -> None: 13 | super().__init__(maze, image_id) 14 | 15 | self.qa_type = "ActionOutcome" 16 | self.question_id = 6 17 | self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}" 18 | self.question_description = "The position after moving." 19 | self.qa_level = "Medium" 20 | 21 | rng = random.Random(image_id) 22 | 23 | directions = maze_utils.get_available_directions(maze) 24 | if not directions: 25 | raise ValueError("Player has no available moves to build question") 26 | direction = rng.choice(directions) 27 | 28 | self.question += f"What are the coordinates of player after moving {direction}?\n\n**Options:**" 29 | 30 | row, col = maze_utils.find_position(maze, constants.PLAYER_CELL) 31 | if direction == "up": 32 | answer_str = f"({row - 1}, {col})" 33 | elif direction == "down": 34 | answer_str = f"({row + 1}, {col})" 35 | elif direction == "left": 36 | answer_str = f"({row}, {col - 1})" 37 | else: 38 | answer_str = f"({row}, {col + 1})" 39 | 40 | choices: Set[str] = { 41 | answer_str, 42 | f"({row + 1}, {col})", 43 | f"({row - 1}, {col})", 44 | f"({row}, {col + 1})", 45 | f"({row}, {col - 1})", 46 | f"({row}, {col})", 47 | } 48 | 49 | option_list = sorted(choices) 50 | self.options = [] 51 | label_code = ord("A") 52 | for entry in option_list: 53 | label = chr(label_code) 54 | self.options.append(f"{label}. {entry}") 55 | if entry == answer_str: 56 | self.answer = label 57 | label_code += 1 58 | 59 | for option in self.options: 60 | self.question += f"\n{option}" 61 | 62 | self.analysis = ( 63 | f"Observe the screen, the position of player is ({row}, {col}). " 64 | f"After moving {direction}, the player is in {answer_str}. " 65 | f"Therefore, the right option is {self.answer}" 66 | ) 67 | -------------------------------------------------------------------------------- /games/maze/generators/data_gen.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | from pathlib import Path 5 | from typing import Dict, List, Optional 6 | import random 7 | 8 | from .. import constants 9 | from ..templates import ( 10 | AvailableDirections, 11 | FindPathToGoal, 12 | GoalPosition, 13 | PlayerPosition, 14 | PositionAfterMoving, 15 | TurnCount, 16 | ) 17 | from ..templates.base_template import BaseTemplate 18 | from ..utils import maze_utils 19 | from . import image_gen, maze_gen, state_gen, video_gen 20 | 21 | 22 | def generate_json_data(maze: List[List[int]], data_id: int) -> List[BaseTemplate]: 23 | return [ 24 | PlayerPosition(maze, data_id), 25 | GoalPosition(maze, data_id), 26 | PositionAfterMoving(maze, data_id), 27 | AvailableDirections(maze, data_id), 28 | FindPathToGoal(maze, data_id), 29 | TurnCount(maze, data_id), 30 | ] 31 | 32 | 33 | def generate_data( 34 | id_begin: int, 35 | amount: int, 36 | maze_size: int, 37 | images_dir: str, 38 | states_dir: str, 39 | video_dir: str, 40 | assets_folder: Optional[str] = None, 41 | ) -> List[BaseTemplate]: 42 | dataset: List[BaseTemplate] = [] 43 | for internal_id in range(id_begin, id_begin + amount): 44 | maze = maze_gen.generate_maze(maze_size, maze_size) 45 | 46 | image_path = Path(images_dir) / f"image_{internal_id:05d}.png" 47 | state_path = Path(states_dir) / f"state_{internal_id:05d}.json" 48 | video_path = Path(video_dir) / f"video_{internal_id:05d}.gif" 49 | 50 | image_gen.draw_maze(maze, constants.CELL_SIZE, str(image_path), assets_folder=assets_folder) 51 | state_gen.save_state(maze, str(state_path)) 52 | 53 | solver_rng = random.Random(internal_id) 54 | path = maze_utils.dfs_solve_maze(maze, [], rng=solver_rng) 55 | video_gen.create_solution_video( 56 | maze, 57 | path, 58 | constants.CELL_SIZE, 59 | str(video_path), 60 | assets_folder=assets_folder, 61 | ) 62 | 63 | dataset.extend(generate_json_data(maze, internal_id)) 64 | return dataset 65 | 66 | 67 | def save_data_to_json(records: List[BaseTemplate], file_path: str) -> None: 68 | payload: List[Dict[str, object]] = [item.to_dict() for item in records] 69 | Path(file_path).parent.mkdir(parents=True, exist_ok=True) 70 | with open(file_path, "w", encoding="utf-8") as fh: 71 | json.dump(payload, fh, indent=2) 72 | -------------------------------------------------------------------------------- /AutoEnv/autoenv/pipeline/visual/pipeline.py: -------------------------------------------------------------------------------- 1 | """ 2 | Maze Mode Skin Generation Pipeline 3 | Simplified DAG-based visual asset generation pipeline 4 | """ 5 | 6 | from pathlib import Path 7 | 8 | from autoenv.pipeline.visual.nodes import ( 9 | AnalyzerNode, 10 | AssetGeneratorNode, 11 | AutoEnvContext, 12 | BackgroundRemovalNode, 13 | StrategistNode, 14 | ) 15 | from base.engine.async_llm import AsyncLLM 16 | from base.pipeline.base_pipeline import BasePipeline 17 | 18 | 19 | class VisualPipeline(BasePipeline): 20 | """ 21 | Visualization pipeline for maze mode. 22 | 23 | DAG structure: 24 | Analyzer → Strategist → AssetGenerator → BackgroundRemoval 25 | """ 26 | 27 | model_config = {"arbitrary_types_allowed": True} 28 | 29 | @classmethod 30 | def create_default( 31 | cls, 32 | image_model: str, 33 | ) -> "VisualPipeline": 34 | """ 35 | Factory method: Create default visualization pipeline. 36 | 37 | Args: 38 | image_model: Image generation model name (required) 39 | 40 | Usage: 41 | pipeline = VisualPipeline.create_default( 42 | image_model="gemini-2.5-flash-image" 43 | ) 44 | ctx = await pipeline.run( 45 | maze_type="maze", 46 | theme="cyberpunk neon city", 47 | output_dir=Path("workspace/envs/maze_001") 48 | ) 49 | """ 50 | image_llm = AsyncLLM(image_model) 51 | 52 | analyzer = AnalyzerNode() 53 | strategist = StrategistNode() 54 | asset_generator = AssetGeneratorNode(image_llm=image_llm) 55 | bg_removal = BackgroundRemovalNode(vision_llm=image_llm) 56 | 57 | analyzer >> strategist >> asset_generator >> bg_removal 58 | 59 | return cls(root=analyzer) 60 | 61 | async def run( 62 | self, 63 | maze_type: str, 64 | theme: str, 65 | output_dir: Path = Path("."), 66 | ) -> AutoEnvContext: 67 | """ 68 | Execute pipeline. 69 | 70 | Args: 71 | maze_type: 迷宫类型(maze, pathfinder, sokoban, trapfield) 72 | theme: 视觉主题(如 "cyberpunk neon city") 73 | output_dir: 输出目录 74 | """ 75 | ctx = AutoEnvContext( 76 | maze_type=maze_type, 77 | theme=theme, 78 | output_dir=output_dir, 79 | ) 80 | return await super().run(ctx) 81 | -------------------------------------------------------------------------------- /core/schema/state.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import List, Dict, Any, Optional 3 | from pathlib import Path 4 | import json 5 | 6 | from .position import Position, BBox 7 | from .entity import Entity 8 | from .grid import Grid 9 | from .render import RenderConfig 10 | 11 | 12 | @dataclass 13 | class UnifiedState: 14 | version: str 15 | game_type: str 16 | player: Entity 17 | goal: Entity 18 | render: RenderConfig 19 | grid: Optional[Grid] = None 20 | boxes: List[Entity] = field(default_factory=list) 21 | metadata: Dict[str, Any] = field(default_factory=dict) 22 | 23 | def to_dict(self) -> Dict[str, Any]: 24 | result = { 25 | "version": self.version, 26 | "game_type": self.game_type, 27 | "entities": { 28 | "player": self.player.to_dict(), 29 | "goal": self.goal.to_dict(), 30 | "boxes": [box.to_dict() for box in self.boxes] 31 | }, 32 | "render": self.render.to_dict(), 33 | "metadata": self.metadata 34 | } 35 | 36 | if self.grid is not None: 37 | result["grid"] = self.grid.to_dict() 38 | 39 | return result 40 | 41 | def save(self, path: str): 42 | Path(path).parent.mkdir(parents=True, exist_ok=True) 43 | with open(path, 'w', encoding='utf-8') as f: 44 | json.dump(self.to_dict(), f, indent=2, ensure_ascii=False) 45 | 46 | @classmethod 47 | def from_dict(cls, data: Dict[str, Any]) -> 'UnifiedState': 48 | entities = data["entities"] 49 | grid = Grid.from_dict(data["grid"]) if "grid" in data else None 50 | 51 | return cls( 52 | version=data["version"], 53 | game_type=data["game_type"], 54 | player=Entity.from_dict(entities["player"]), 55 | goal=Entity.from_dict(entities["goal"]), 56 | render=RenderConfig.from_dict(data["render"]), 57 | grid=grid, 58 | boxes=[Entity.from_dict(box) for box in entities.get("boxes", [])], 59 | metadata=data.get("metadata", {}) 60 | ) 61 | 62 | @classmethod 63 | def load(cls, path: str) -> 'UnifiedState': 64 | with open(path, 'r', encoding='utf-8') as f: 65 | data = json.load(f) 66 | return cls.from_dict(data) 67 | 68 | def get_player_bbox(self) -> BBox: 69 | return self.player.bbox 70 | 71 | def get_goal_bbox(self) -> BBox: 72 | return self.goal.bbox 73 | 74 | def get_player_grid_pos(self) -> Optional[Position]: 75 | return self.player.grid_pos 76 | 77 | def get_goal_grid_pos(self) -> Optional[Position]: 78 | return self.goal.grid_pos 79 | 80 | -------------------------------------------------------------------------------- /games/maze/generators/maze_gen.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import random 4 | from typing import List, Tuple 5 | 6 | from .. import constants 7 | 8 | Coordinate = Tuple[int, int] 9 | 10 | _RANDOM = random.Random() 11 | 12 | 13 | def generate_maze(rows: int, cols: int) -> List[List[int]]: 14 | if rows % 2 == 0 or cols % 2 == 0: 15 | raise ValueError("The number of rows and columns in the maze must be odd!") 16 | 17 | maze = [[constants.WALL_CELL for _ in range(cols)] for _ in range(rows)] 18 | maze[1][1] = constants.EMPTY_CELL 19 | _dfs(maze, 1, 1) 20 | _place_player_and_goal(maze) 21 | return maze 22 | 23 | 24 | def _dfs(maze: List[List[int]], row: int, col: int) -> None: 25 | directions = [(-2, 0), (0, 2), (2, 0), (0, -2)] 26 | _RANDOM.shuffle(directions) 27 | 28 | for d_row, d_col in directions: 29 | next_row = row + d_row 30 | next_col = col + d_col 31 | if _is_in_bounds(maze, next_row, next_col) and maze[next_row][next_col] == constants.WALL_CELL: 32 | maze[row + d_row // 2][col + d_col // 2] = constants.EMPTY_CELL 33 | maze[next_row][next_col] = constants.EMPTY_CELL 34 | _dfs(maze, next_row, next_col) 35 | 36 | 37 | def _place_player_and_goal(maze: List[List[int]]) -> None: 38 | rows = len(maze) 39 | cols = len(maze[0]) if rows else 0 40 | 41 | empty_cells: List[Coordinate] = [ 42 | (r, c) 43 | for r in range(rows) 44 | for c in range(cols) 45 | if maze[r][c] == constants.EMPTY_CELL 46 | ] 47 | if not empty_cells: 48 | raise ValueError("There are no empty cells in the maze to place the player and goal.") 49 | 50 | player_row, player_col = _RANDOM.choice(empty_cells) 51 | 52 | distances = [] 53 | for cell in empty_cells: 54 | if cell == (player_row, player_col): 55 | continue 56 | distance = abs(cell[0] - player_row) + abs(cell[1] - player_col) 57 | distances.append((cell, distance)) 58 | 59 | if not distances: 60 | raise ValueError("There are not enough empty cells to place the goal.") 61 | 62 | unique_distances = sorted({distance for _, distance in distances}, reverse=True) 63 | if len(unique_distances) >= 3: 64 | target_distance = unique_distances[2] 65 | elif len(unique_distances) == 2: 66 | target_distance = unique_distances[1] 67 | else: 68 | target_distance = unique_distances[0] 69 | 70 | candidates = [cell for cell, distance in distances if distance == target_distance] 71 | if not candidates: 72 | raise ValueError("No cells found with the target distance to place the goal.") 73 | 74 | goal_row, goal_col = _RANDOM.choice(candidates) 75 | 76 | maze[player_row][player_col] = constants.PLAYER_CELL 77 | maze[goal_row][goal_col] = constants.GOAL_CELL 78 | 79 | 80 | def _is_in_bounds(maze: List[List[int]], row: int, col: int) -> bool: 81 | return 0 < row < len(maze) and 0 < col < len(maze[0]) 82 | -------------------------------------------------------------------------------- /games/maze/templates/available_directions.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import random 4 | from typing import List, Set 5 | 6 | from .. import constants 7 | from ..utils import maze_utils 8 | from .base_template import BaseTemplate 9 | 10 | 11 | class AvailableDirections(BaseTemplate): 12 | def __init__(self, maze: List[List[int]], image_id: int) -> None: 13 | super().__init__(maze, image_id) 14 | 15 | self.question_id = 5 16 | self.data_id = f"maze_{image_id:05d}_{self.question_id:02d}" 17 | self.qa_type = "StateInfo" 18 | self.question_description = "Ask for the available directions to move are currently." 19 | self.qa_level = "Easy" 20 | self.question += "Which directions are available to move now?\n\n**Options:**" 21 | 22 | answers = maze_utils.get_available_directions(maze) 23 | answer_str = ", ".join(answers) 24 | 25 | rng = random.Random(image_id) 26 | option_sets = [ 27 | ["up", "down", "left", "right"], 28 | ["up, down", "up, left", "up, right", "down, left", "down, right", "left, right"], 29 | ["up, down, left", "up, down, right", "up, left, right", "down, left, right"], 30 | ["up, down, left, right"], 31 | ] 32 | counts = [2, 2, 2, 1] 33 | 34 | pool: Set[str] = set() 35 | for choices, count in zip(option_sets, counts): 36 | _add_random_options(pool, choices, count, rng) 37 | pool.add(answer_str) 38 | 39 | option_list = sorted(pool, key=lambda item: (len(item), item)) 40 | self.options = [] 41 | label_code = ord("A") 42 | for entry in option_list: 43 | label = chr(label_code) 44 | self.options.append(f"{label}. {entry}") 45 | if entry == answer_str: 46 | self.answer = label 47 | label_code += 1 48 | 49 | for option in self.options: 50 | self.question += f"\n{option}" 51 | 52 | player_row, player_col = maze_utils.find_position(maze, constants.PLAYER_CELL) 53 | segments = [f"The player is on ({player_row}, {player_col})"] 54 | if "up" in answer_str: 55 | segments.append(f"({player_row - 1}, {player_col}) is empty") 56 | if "down" in answer_str: 57 | segments.append(f"({player_row + 1}, {player_col}) is empty") 58 | if "left" in answer_str: 59 | segments.append(f"({player_row}, {player_col - 1}) is empty") 60 | if "right" in answer_str: 61 | segments.append(f"({player_row}, {player_col + 1}) is empty") 62 | 63 | detail = ", and ".join(segments) 64 | self.analysis = f"{detail}. The player can move {answer_str}. Therefore, The option is {self.answer}" 65 | 66 | 67 | def _add_random_options( 68 | bucket: Set[str], 69 | choices: List[str], 70 | count: int, 71 | rng: random.Random, 72 | ) -> None: 73 | target = len(bucket) + count 74 | if not choices: 75 | return 76 | while len(bucket) < target: 77 | bucket.add(rng.choice(choices)) 78 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/MODEL_CONFIG.md: -------------------------------------------------------------------------------- 1 | # VLM 模型配置说明 2 | 3 | ## 模型类型 4 | 5 | 系统支持两种类型的 VLM 模型: 6 | 7 | ### 1. API 模型 (type: api) 8 | 9 | 通过 API 调用的远程模型,如 GPT-4、Gemini 等。 10 | 11 | **配置示例:** 12 | ```yaml 13 | models: 14 | - name: gpt-5 15 | type: api 16 | base_url: https://newapi.deepwisdom.ai/v1 17 | max_tokens: 60000 18 | temperature: 1.0 19 | ``` 20 | 21 | **参数说明:** 22 | - `name`: 模型名称 23 | - `type`: 必须为 `api` 24 | - `base_url`: API 端点地址 25 | - `max_tokens`: 最大生成 token 数 26 | - `temperature`: 生成温度(0.0-2.0) 27 | 28 | **环境变量:** 29 | 需要设置 `OPENAI_API_KEY` 环境变量(在 `.env` 文件中) 30 | 31 | ### 2. Local 模型 (type: local) 32 | 33 | 本地加载的 HuggingFace 模型。 34 | 35 | **配置示例:** 36 | ```yaml 37 | models: 38 | - name: Qwen/Qwen2-VL-7B-Instruct 39 | type: local 40 | device: cuda:0 41 | max_tokens: 10000 42 | temperature: 0.0 43 | 44 | - name: llava-hf/llava-v1.6-mistral-7b-hf 45 | type: local 46 | device: cuda:1 47 | max_tokens: 10000 48 | temperature: 0.0 49 | ``` 50 | 51 | **参数说明:** 52 | - `name`: HuggingFace 模型名称或路径 53 | - `type`: 必须为 `local` 54 | - `device`: 运行设备(如 `cuda:0`, `cuda:1`, `cpu`) 55 | - `max_tokens`: 最大生成 token 数 56 | - `temperature`: 生成温度(0.0-2.0) 57 | 58 | **模型加载逻辑:** 59 | 1. 首先尝试从本地缓存加载模型 60 | 2. 如果失败,自动从 HuggingFace 下载到 `/huggingface_model` 目录 61 | 3. 模型会被加载到指定的 GPU 设备上 62 | 63 | **依赖安装:** 64 | ```bash 65 | pip install transformers torch pillow accelerate 66 | 67 | # 如果使用 Qwen2.5-VL 模型,还需要安装: 68 | pip install qwen-vl-utils 69 | ``` 70 | 71 | ## 完整配置示例 72 | 73 | ```yaml 74 | game: maze 75 | dataset: dataset/maze/1 76 | output: vlm_eval_results/maze 77 | 78 | models: 79 | # API 模型 80 | - name: gpt-5 81 | type: api 82 | base_url: https://newapi.deepwisdom.ai/v1 83 | max_tokens: 60000 84 | temperature: 1.0 85 | 86 | # Local 模型 - GPU 0 87 | - name: Qwen/Qwen2-VL-7B-Instruct 88 | type: local 89 | device: cuda:0 90 | max_tokens: 10000 91 | temperature: 0.0 92 | 93 | # Local 模型 - GPU 1 94 | - name: llava-hf/llava-v1.6-mistral-7b-hf 95 | type: local 96 | device: cuda:1 97 | max_tokens: 10000 98 | temperature: 0.0 99 | 100 | workers: 10 101 | max_levels: -1 102 | assets_folder: skins/maze/1 103 | ``` 104 | 105 | ## 多 GPU 使用 106 | 107 | 可以配置多个 local 模型在不同的 GPU 上运行: 108 | 109 | ```yaml 110 | models: 111 | - name: model-1 112 | type: local 113 | device: cuda:0 # 第一张 GPU 114 | 115 | - name: model-2 116 | type: local 117 | device: cuda:1 # 第二张 GPU 118 | 119 | - name: model-3 120 | type: local 121 | device: cuda:2 # 第三张 GPU 122 | ``` 123 | 124 | ## 注意事项 125 | 126 | 1. **API 模型**:需要确保网络连接正常,API key 有效 127 | 2. **Local 模型**: 128 | - 首次运行会下载模型,可能需要较长时间 129 | - 确保有足够的磁盘空间(`/huggingface_model` 目录) 130 | - 确保 GPU 显存足够(7B 模型约需 14GB 显存) 131 | - 可以使用 `device: cpu` 在 CPU 上运行(速度较慢) 132 | 3. **并行执行**:不同模型会并行评估,注意资源分配 133 | 4. **每个难度只测试后 24 个 case** 134 | 135 | ## 支持的模型示例 136 | 137 | ### Local 模型 138 | - `Qwen/Qwen2-VL-7B-Instruct` 139 | - `Qwen/Qwen2-VL-2B-Instruct` 140 | - `llava-hf/llava-v1.6-mistral-7b-hf` 141 | - `llava-hf/llava-1.5-7b-hf` 142 | - 其他支持 `AutoModelForVision2Seq` 的模型 143 | 144 | ### API 模型 145 | - GPT-4o, GPT-4V 146 | - Gemini Pro Vision 147 | - Claude 3 Vision 148 | - 其他兼容 OpenAI API 的模型 149 | 150 | -------------------------------------------------------------------------------- /prompts/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Video Model Prompts 模块。 4 | 5 | 提供基于皮肤配置的动态 prompt 生成功能。 6 | """ 7 | import json 8 | from pathlib import Path 9 | from typing import Optional 10 | 11 | from .videomodel_maze_prompt import get_maze_prompt 12 | from .videomodel_maze3d_prompt import get_maze3d_prompt 13 | from .videomodel_sokoban_prompt import get_sokoban_prompt 14 | from .videomodel_trapfield_prompt import get_trapfield_prompt 15 | from .videomodel_pathfinder_prompt import get_pathfinder_prompt 16 | 17 | # 游戏类型别名映射 18 | GAME_ALIASES = { 19 | "irregular_maze": "pathfinder", 20 | "regular_maze": "maze", 21 | "3d_maze": "maze3d", 22 | } 23 | 24 | # 游戏类型到 prompt 生成函数的映射 25 | PROMPT_GENERATORS = { 26 | "maze": get_maze_prompt, 27 | "maze3d": get_maze3d_prompt, 28 | "sokoban": get_sokoban_prompt, 29 | "trapfield": get_trapfield_prompt, 30 | "pathfinder": get_pathfinder_prompt, 31 | } 32 | 33 | 34 | def load_skin_description(skins_root: Path, game_type: str, skin_id: str) -> Optional[dict]: 35 | """ 36 | 加载皮肤的 description.json 文件。 37 | 38 | Args: 39 | skins_root: skins 目录的根路径 40 | game_type: 游戏类型 (maze, maze3d, sokoban, trapfield, pathfinder) 41 | skin_id: 皮肤 ID (1, 2, 3, ...) 42 | 43 | Returns: 44 | description.json 的内容,或 None(如果文件不存在) 45 | """ 46 | # 处理游戏类型别名 47 | canonical_game_type = GAME_ALIASES.get(game_type, game_type) 48 | 49 | desc_path = skins_root / canonical_game_type / skin_id / "description.json" 50 | 51 | if not desc_path.exists(): 52 | return None 53 | 54 | with open(desc_path, 'r', encoding='utf-8') as f: 55 | return json.load(f) 56 | 57 | 58 | def get_dynamic_prompt( 59 | game_type: str, 60 | skin_id: str, 61 | skins_root: Optional[Path] = None, 62 | ) -> str: 63 | """ 64 | 根据游戏类型和皮肤 ID 生成动态 prompt。 65 | 66 | Args: 67 | game_type: 游戏类型 (maze, maze3d, sokoban, trapfield, pathfinder, irregular_maze, regular_maze) 68 | skin_id: 皮肤 ID 69 | skins_root: skins 目录的根路径,默认为 VR-Bench/skins 70 | 71 | Returns: 72 | 生成的 prompt 字符串 73 | 74 | Raises: 75 | ValueError: 如果游戏类型不支持或找不到皮肤描述文件 76 | """ 77 | # 处理游戏类型别名 78 | canonical_game_type = GAME_ALIASES.get(game_type, game_type) 79 | 80 | # 检查游戏类型是否支持 81 | if canonical_game_type not in PROMPT_GENERATORS: 82 | raise ValueError(f"Unsupported game type: {game_type}") 83 | 84 | # 确定 skins 目录路径 85 | if skins_root is None: 86 | # 默认路径: VR-Bench/skins (相对于此文件) 87 | skins_root = Path(__file__).parent.parent / "skins" 88 | 89 | # 加载皮肤描述 90 | description = load_skin_description(skins_root, canonical_game_type, skin_id) 91 | 92 | if description is None: 93 | raise ValueError( 94 | f"Skin description not found: skins/{canonical_game_type}/{skin_id}/description.json" 95 | ) 96 | 97 | visual_description = description.get("visual_description", {}) 98 | 99 | if not visual_description: 100 | raise ValueError( 101 | f"visual_description is empty in skins/{canonical_game_type}/{skin_id}/description.json" 102 | ) 103 | 104 | # 生成 prompt 105 | generator = PROMPT_GENERATORS[canonical_game_type] 106 | return generator(visual_description) 107 | 108 | 109 | __all__ = [ 110 | "get_dynamic_prompt", 111 | "load_skin_description", 112 | "GAME_ALIASES", 113 | "PROMPT_GENERATORS", 114 | ] 115 | 116 | -------------------------------------------------------------------------------- /core/game_adapter.py: -------------------------------------------------------------------------------- 1 | """ 2 | 游戏适配器基类 3 | 定义统一的接口,让不同游戏都能接入并发生成系统 4 | """ 5 | 6 | from abc import ABC, abstractmethod 7 | from pathlib import Path 8 | from typing import Any, Dict, Optional, Tuple 9 | 10 | 11 | class GameAdapter(ABC): 12 | """游戏适配器基类""" 13 | 14 | @abstractmethod 15 | def get_game_name(self) -> str: 16 | """返回游戏名称""" 17 | pass 18 | 19 | @abstractmethod 20 | def generate_level( 21 | self, 22 | difficulty_config: Dict[str, Any], 23 | assets_folder: str, 24 | **kwargs 25 | ) -> Optional[Any]: 26 | """ 27 | 生成一个关卡 28 | 29 | Args: 30 | difficulty_config: 难度配置字典 31 | assets_folder: 素材文件夹路径 32 | **kwargs: 其他参数 33 | 34 | Returns: 35 | 生成的关卡对象,失败返回 None 36 | """ 37 | pass 38 | 39 | @abstractmethod 40 | def save_level( 41 | self, 42 | level: Any, 43 | output_dir: Path, 44 | level_id: int, 45 | difficulty_name: str, 46 | **kwargs 47 | ) -> Dict[str, Optional[str]]: 48 | """ 49 | 保存关卡(包括视频、图片等) 50 | 51 | Args: 52 | level: 关卡对象 53 | output_dir: 输出目录 54 | level_id: 关卡ID 55 | difficulty_name: 难度名称 56 | **kwargs: 其他参数(如 fps) 57 | 58 | Returns: 59 | 包含文件信息的字典,例如: 60 | { 61 | 'video': 'video_0001.mp4', 62 | 'image': 'image_0001.png', 63 | 'state': 'state_0001.json' 64 | } 65 | 如果某个文件生成失败,对应值为 None 66 | """ 67 | pass 68 | 69 | @abstractmethod 70 | def get_level_hash(self, level: Any) -> str: 71 | """ 72 | 获取关卡的哈希值(用于去重) 73 | 74 | Args: 75 | level: 关卡对象 76 | 77 | Returns: 78 | 关卡的哈希字符串 79 | """ 80 | pass 81 | 82 | @abstractmethod 83 | def is_duplicate(self, level: Any, existing_hashes: set) -> bool: 84 | """ 85 | 检查关卡是否重复 86 | 87 | Args: 88 | level: 关卡对象 89 | existing_hashes: 已存在的哈希集合 90 | 91 | Returns: 92 | True 如果重复,False 如果不重复 93 | """ 94 | pass 95 | 96 | def validate_difficulty_config(self, difficulty_config: Dict[str, Any]) -> bool: 97 | """ 98 | 验证难度配置是否有效 99 | 100 | Args: 101 | difficulty_config: 难度配置字典 102 | 103 | Returns: 104 | True 如果配置有效,False 如果无效 105 | """ 106 | # 默认实现:检查是否有 count 字段 107 | return 'count' in difficulty_config 108 | 109 | def get_required_texture_files(self) -> list: 110 | """ 111 | 返回游戏需要的纹理文件列表 112 | 113 | Returns: 114 | 纹理文件名列表(不含扩展名) 115 | """ 116 | return [] 117 | 118 | def cleanup(self): 119 | """清理资源(可选)""" 120 | pass 121 | 122 | 123 | class LevelDeduplicator: 124 | """关卡去重器(通用版本)""" 125 | 126 | def __init__(self): 127 | self.hashes = set() 128 | 129 | def add_hash(self, hash_value: str): 130 | """添加哈希值""" 131 | self.hashes.add(hash_value) 132 | 133 | def is_duplicate(self, hash_value: str) -> bool: 134 | """检查是否重复""" 135 | return hash_value in self.hashes 136 | 137 | def get_count(self) -> int: 138 | """获取已存储的哈希数量""" 139 | return len(self.hashes) 140 | 141 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Dict 4 | 5 | from .maze_prompt import MAZE_SYSTEM_PROMPT_TEMPLATE, MAZE_USER_PROMPT_TEMPLATE 6 | from .sokoban_prompt import SOKOBAN_SYSTEM_PROMPT_TEMPLATE, SOKOBAN_USER_PROMPT_TEMPLATE 7 | from .trapfield_prompt import TRAPFIELD_SYSTEM_PROMPT_TEMPLATE, TRAPFIELD_USER_PROMPT_TEMPLATE 8 | from .pathfinder_prompt import PATHFINDER_SYSTEM_PROMPT_TEMPLATE, PATHFINDER_USER_PROMPT_TEMPLATE 9 | from .maze3d_prompt import MAZE3D_SYSTEM_PROMPT_TEMPLATE, MAZE3D_USER_PROMPT_TEMPLATE 10 | 11 | # Prompt 模板映射 12 | PROMPT_TEMPLATES = { 13 | 'maze': { 14 | 'system': MAZE_SYSTEM_PROMPT_TEMPLATE, 15 | 'user': MAZE_USER_PROMPT_TEMPLATE, 16 | }, 17 | 'sokoban': { 18 | 'system': SOKOBAN_SYSTEM_PROMPT_TEMPLATE, 19 | 'user': SOKOBAN_USER_PROMPT_TEMPLATE, 20 | }, 21 | 'trapfield': { 22 | 'system': TRAPFIELD_SYSTEM_PROMPT_TEMPLATE, 23 | 'user': TRAPFIELD_USER_PROMPT_TEMPLATE, 24 | }, 25 | 'pathfinder': { 26 | 'system': PATHFINDER_SYSTEM_PROMPT_TEMPLATE, 27 | 'user': PATHFINDER_USER_PROMPT_TEMPLATE, 28 | }, 29 | 'maze3d': { 30 | 'system': MAZE3D_SYSTEM_PROMPT_TEMPLATE, 31 | 'user': MAZE3D_USER_PROMPT_TEMPLATE, 32 | }, 33 | } 34 | 35 | # 游戏名称别名 36 | GAME_ALIASES = {'3dmaze': 'maze3d'} 37 | 38 | 39 | def load_skin_description(assets_folder: str) -> Dict[str, str]: 40 | """ 41 | 从 assets_folder 加载 description.json 并返回 visual_description 字典。 42 | 43 | Args: 44 | assets_folder: 皮肤资源文件夹路径 45 | 46 | Returns: 47 | visual_description 字典 48 | 49 | Raises: 50 | FileNotFoundError: description.json 不存在 51 | ValueError: JSON 解析失败或缺少 visual_description 52 | """ 53 | description_path = Path(assets_folder) / "description.json" 54 | 55 | if not description_path.exists(): 56 | raise FileNotFoundError(f"description.json not found in {assets_folder}") 57 | 58 | try: 59 | with open(description_path, 'r', encoding='utf-8') as f: 60 | data = json.load(f) 61 | except json.JSONDecodeError as e: 62 | raise ValueError(f"Failed to parse description.json in {assets_folder}: {e}") 63 | 64 | visual_desc = data.get("visual_description") 65 | if not visual_desc: 66 | raise ValueError(f"visual_description not found in {description_path}") 67 | 68 | return visual_desc 69 | 70 | 71 | def get_dynamic_prompt(game_name: str, prompt_type: str, assets_folder: str) -> str: 72 | """ 73 | 获取动态替换后的 prompt。 74 | 75 | Args: 76 | game_name: 游戏类型 (maze, sokoban, trapfield, pathfinder, maze3d) 77 | prompt_type: prompt 类型 ('system' 或 'user') 78 | assets_folder: 皮肤资源文件夹路径 79 | 80 | Returns: 81 | 格式化后的 prompt 字符串 82 | 83 | Raises: 84 | ValueError: 游戏类型/prompt类型未知,或皮肤描述缺少必需键 85 | FileNotFoundError: description.json 不存在 86 | """ 87 | # 解析别名 88 | game_name = GAME_ALIASES.get(game_name, game_name) 89 | 90 | if game_name not in PROMPT_TEMPLATES: 91 | raise ValueError(f"Unknown game: {game_name}") 92 | if prompt_type not in PROMPT_TEMPLATES[game_name]: 93 | raise ValueError(f"Unknown prompt type: {prompt_type}") 94 | 95 | template = PROMPT_TEMPLATES[game_name][prompt_type] 96 | visual_desc = load_skin_description(assets_folder) 97 | 98 | try: 99 | return template.format(**visual_desc) 100 | except KeyError as e: 101 | raise ValueError(f"Missing key in visual_description: {e}") 102 | 103 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/action_metrics.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Any, Union 2 | 3 | 4 | def calculate_sr(is_win: bool) -> float: 5 | return 1.0 if is_win else 0.0 6 | 7 | 8 | def calculate_pr( 9 | pred_actions: List[Dict[str, Any]], 10 | opt_actions: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]] 11 | ) -> float: 12 | if isinstance(opt_actions, list) and opt_actions and isinstance(opt_actions[0], list): 13 | return max(_calculate_pr_single(pred_actions, opt) for opt in opt_actions) 14 | return _calculate_pr_single(pred_actions, opt_actions) 15 | 16 | 17 | def _calculate_pr_single( 18 | pred_actions: List[Dict[str, Any]], 19 | opt_actions: List[Dict[str, Any]] 20 | ) -> float: 21 | if not opt_actions: 22 | return 1.0 if not pred_actions else 0.0 23 | 24 | # 检查是否是 PathFinder 游戏(通过检查 action 类型) 25 | if pred_actions and pred_actions[0].get('action') == 'path': 26 | # PathFinder: 计算字母数组的连续匹配数 27 | pred_path = pred_actions[0].get('path', []) 28 | opt_path = opt_actions[0].get('path', []) 29 | 30 | if not opt_path: 31 | return 1.0 if not pred_path else 0.0 32 | 33 | # 计算从头开始连续匹配的节点数 34 | matched = 0 35 | for p_node, o_node in zip(pred_path, opt_path): 36 | if p_node == o_node: 37 | matched += 1 38 | else: 39 | break 40 | 41 | return matched / len(opt_path) 42 | else: 43 | # 其他游戏: 计算动作序列的连续匹配数 44 | matched = 0 45 | for p, o in zip(pred_actions, opt_actions): 46 | if p == o: 47 | matched += 1 48 | else: 49 | break 50 | 51 | return matched / len(opt_actions) 52 | 53 | 54 | def calculate_mr( 55 | pred_actions: List[Dict[str, Any]], 56 | opt_actions: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]] 57 | ) -> float: 58 | if isinstance(opt_actions, list) and opt_actions and isinstance(opt_actions[0], list): 59 | return 1.0 if any(pred_actions == opt for opt in opt_actions) else 0.0 60 | return 1.0 if pred_actions == opt_actions else 0.0 61 | 62 | 63 | def calculate_step( 64 | pred_actions: List[Dict[str, Any]], 65 | opt_actions: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]], 66 | is_win: bool = False 67 | ) -> float: 68 | """Calculate step metric: (predicted_length / optimal_length) - 1 69 | 70 | Only calculates for successful cases (is_win=True). 71 | Returns None for failed cases. 72 | 73 | For PathFinder game, calculates based on sequence length instead of action count. 74 | """ 75 | if not is_win: 76 | return None 77 | 78 | # 检查是否是 PathFinder 游戏(通过检查 action 类型) 79 | if pred_actions and pred_actions[0].get('action') == 'path': 80 | # PathFinder: 使用字母数组的长度 81 | pred_length = len(pred_actions[0].get('path', [])) 82 | 83 | if isinstance(opt_actions, list) and opt_actions and isinstance(opt_actions[0], list): 84 | opt_length = min(len(opt[0].get('path', [])) for opt in opt_actions) 85 | else: 86 | opt_length = len(opt_actions[0].get('path', [])) 87 | 88 | if opt_length == 0: 89 | return 0.0 90 | 91 | return pred_length / opt_length - 1.0 92 | else: 93 | # 其他游戏: 使用动作数量 94 | if isinstance(opt_actions, list) and opt_actions and isinstance(opt_actions[0], list): 95 | opt_length = min(len(opt) for opt in opt_actions) 96 | else: 97 | opt_length = len(opt_actions) 98 | 99 | if opt_length == 0: 100 | return 0.0 101 | 102 | return len(pred_actions) / opt_length - 1.0 103 | -------------------------------------------------------------------------------- /dataset_init.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | 从 Hugging Face 下载并解压 VR-Bench 数据集 4 | """ 5 | 6 | import argparse 7 | import logging 8 | from pathlib import Path 9 | from huggingface_hub import hf_hub_download 10 | from dotenv import load_dotenv 11 | import os 12 | import tarfile 13 | 14 | logging.basicConfig( 15 | level=logging.INFO, 16 | format='%(asctime)s - %(levelname)s - %(message)s' 17 | ) 18 | 19 | load_dotenv() 20 | 21 | 22 | def download_and_extract( 23 | repo_id: str = "amagipeng/VR-Bench", 24 | output_dir: str = "./dataset_VR", 25 | token: str = None 26 | ): 27 | """ 28 | 下载并解压数据集 29 | 30 | Args: 31 | repo_id: Hugging Face 仓库 ID 32 | output_dir: 输出目录 33 | token: Hugging Face token (可选) 34 | """ 35 | output_path = Path(output_dir) 36 | 37 | if output_path.exists() and any(output_path.iterdir()): 38 | logging.warning(f"目录 {output_dir} 已存在且非空") 39 | response = input("是否继续并覆盖? (y/n): ") 40 | if response.lower() != 'y': 41 | logging.info("取消下载") 42 | return 43 | 44 | output_path.mkdir(parents=True, exist_ok=True) 45 | 46 | if token is None: 47 | token = os.getenv("HF_TOKEN") 48 | 49 | logging.info(f"开始下载数据集: {repo_id}") 50 | 51 | # 下载 train.tar.gz 52 | logging.info("\n下载 train.tar.gz...") 53 | train_file = hf_hub_download( 54 | repo_id=repo_id, 55 | repo_type="dataset", 56 | filename="train.tar.gz", 57 | token=token 58 | ) 59 | 60 | logging.info("解压 train.tar.gz...") 61 | with tarfile.open(train_file, 'r:gz') as tar: 62 | tar.extractall(output_path) 63 | logging.info("✓ train 解压完成") 64 | 65 | # 下载 eval.tar.gz 66 | logging.info("\n下载 eval.tar.gz...") 67 | eval_file = hf_hub_download( 68 | repo_id=repo_id, 69 | repo_type="dataset", 70 | filename="eval.tar.gz", 71 | token=token 72 | ) 73 | 74 | logging.info("解压 eval.tar.gz...") 75 | with tarfile.open(eval_file, 'r:gz') as tar: 76 | tar.extractall(output_path) 77 | logging.info("✓ eval 解压完成") 78 | 79 | # 下载 README 80 | try: 81 | logging.info("\n下载 README.md...") 82 | readme_file = hf_hub_download( 83 | repo_id=repo_id, 84 | repo_type="dataset", 85 | filename="README.md", 86 | token=token 87 | ) 88 | import shutil 89 | shutil.copy(readme_file, output_path / "README.md") 90 | logging.info("✓ README.md 下载完成") 91 | except Exception as e: 92 | logging.warning(f"README.md 下载失败: {e}") 93 | 94 | logging.info(f"\n✓ 数据集下载并解压完成!") 95 | logging.info(f"数据集位置: {output_path.absolute()}") 96 | 97 | # 显示数据集结构 98 | logging.info("\n数据集结构:") 99 | for split in ['train', 'eval']: 100 | split_dir = output_path / split 101 | if split_dir.exists(): 102 | games = [d.name for d in split_dir.iterdir() if d.is_dir()] 103 | logging.info(f" {split}/: {', '.join(games)}") 104 | 105 | 106 | def main(): 107 | parser = argparse.ArgumentParser( 108 | description='从 Hugging Face 下载并解压 VR-Bench 数据集' 109 | ) 110 | parser.add_argument( 111 | '--repo-id', 112 | type=str, 113 | default='amagipeng/VR-Bench', 114 | help='Hugging Face 仓库 ID (默认: amagipeng/VR-Bench)' 115 | ) 116 | parser.add_argument( 117 | '--output-dir', 118 | type=str, 119 | default='./dataset_VR', 120 | help='输出目录 (默认: ./dataset_VR)' 121 | ) 122 | parser.add_argument( 123 | '--token', 124 | type=str, 125 | default=None, 126 | help='Hugging Face token (默认: 从 .env 文件读取 HF_TOKEN)' 127 | ) 128 | 129 | args = parser.parse_args() 130 | 131 | download_and_extract( 132 | repo_id=args.repo_id, 133 | output_dir=args.output_dir, 134 | token=args.token 135 | ) 136 | 137 | 138 | if __name__ == '__main__': 139 | main() 140 | 141 | -------------------------------------------------------------------------------- /AutoEnv/base/engine/cost_monitor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Global cost monitoring for LLM calls using contextvars. 3 | 4 | Usage: 5 | with CostMonitor() as monitor: 6 | await pipeline.run(...) 7 | print(f"Total cost: ${monitor.total_cost:.4f}") 8 | monitor.save() # saves to workspace/costs/.json 9 | """ 10 | 11 | import contextvars 12 | import json 13 | from dataclasses import dataclass, field 14 | from datetime import datetime 15 | from pathlib import Path 16 | from typing import TYPE_CHECKING 17 | 18 | if TYPE_CHECKING: 19 | from base.engine.cost_monitor import CostMonitor 20 | 21 | _current_monitor: contextvars.ContextVar["CostMonitor | None"] = contextvars.ContextVar( 22 | "cost_monitor", default=None 23 | ) 24 | 25 | 26 | @dataclass 27 | class CostRecord: 28 | """Single LLM call cost record.""" 29 | 30 | model: str 31 | input_tokens: int 32 | output_tokens: int 33 | cost: float 34 | 35 | 36 | @dataclass 37 | class CostMonitor: 38 | """Aggregates LLM costs during a context scope.""" 39 | 40 | records: list[CostRecord] = field(default_factory=list) 41 | _token: contextvars.Token | None = field(default=None, repr=False) 42 | 43 | @property 44 | def total_cost(self) -> float: 45 | return sum(r.cost for r in self.records) 46 | 47 | @property 48 | def total_input_tokens(self) -> int: 49 | return sum(r.input_tokens for r in self.records) 50 | 51 | @property 52 | def total_output_tokens(self) -> int: 53 | return sum(r.output_tokens for r in self.records) 54 | 55 | @property 56 | def call_count(self) -> int: 57 | return len(self.records) 58 | 59 | def record(self, model: str, input_tokens: int, output_tokens: int, cost: float) -> None: 60 | """Record a single LLM call's cost.""" 61 | self.records.append(CostRecord(model, input_tokens, output_tokens, cost)) 62 | 63 | def summary(self) -> dict: 64 | """Get aggregated summary.""" 65 | return { 66 | "total_cost": self.total_cost, 67 | "total_input_tokens": self.total_input_tokens, 68 | "total_output_tokens": self.total_output_tokens, 69 | "call_count": self.call_count, 70 | "by_model": self._group_by_model(), 71 | } 72 | 73 | def _group_by_model(self) -> dict[str, dict]: 74 | result: dict[str, dict] = {} 75 | for r in self.records: 76 | if r.model not in result: 77 | result[r.model] = {"cost": 0.0, "input_tokens": 0, "output_tokens": 0, "calls": 0} 78 | result[r.model]["cost"] += r.cost 79 | result[r.model]["input_tokens"] += r.input_tokens 80 | result[r.model]["output_tokens"] += r.output_tokens 81 | result[r.model]["calls"] += 1 82 | return result 83 | 84 | def save(self, save_dir: str = "workspace/costs") -> Path: 85 | """Save cost summary to JSON file.""" 86 | cost_dir = Path(save_dir) 87 | cost_dir.mkdir(parents=True, exist_ok=True) 88 | ts = datetime.now().strftime("%Y%m%d_%H%M%S") 89 | path = cost_dir / f"{ts}.json" 90 | path.write_text( 91 | json.dumps(self.summary(), indent=2, ensure_ascii=False), 92 | encoding="utf-8", 93 | ) 94 | return path 95 | 96 | def __enter__(self) -> "CostMonitor": 97 | self._token = _current_monitor.set(self) 98 | return self 99 | 100 | def __exit__(self, *args) -> None: 101 | if self._token is not None: 102 | _current_monitor.reset(self._token) 103 | 104 | 105 | def get_current_monitor() -> "CostMonitor | None": 106 | """Get the current cost monitor from context, if any.""" 107 | return _current_monitor.get() 108 | 109 | 110 | def record_cost(model: str, input_tokens: int, output_tokens: int, cost: float) -> None: 111 | """Record cost to the current monitor if one is active.""" 112 | monitor = get_current_monitor() 113 | if monitor is not None: 114 | monitor.record(model, input_tokens, output_tokens, cost) 115 | 116 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/executors/maze_executor.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import List, Dict, Any, Tuple 3 | from pathlib import Path 4 | 5 | from core.schema import UnifiedState 6 | from evaluation.vlm_eval.game_executor import GameExecutor 7 | from evaluation.vlm_eval.prompts import get_dynamic_prompt 8 | from games.maze import constants 9 | from games.maze.generators.image_gen import draw_maze 10 | from generation.path_finder import find_maze_paths 11 | 12 | 13 | class MazeExecutor(GameExecutor): 14 | def __init__(self, assets_folder: str = None): 15 | self.assets_folder = assets_folder 16 | 17 | def load_state(self, state_path: str) -> UnifiedState: 18 | return UnifiedState.load(state_path) 19 | 20 | def get_optimal_solution(self, state: UnifiedState) -> List[List[Dict[str, Any]]]: 21 | all_paths = find_maze_paths(state) 22 | return [self._path_to_actions(path) for path in all_paths] 23 | 24 | def execute_action(self, state: UnifiedState, action: Dict[str, Any]) -> Tuple[UnifiedState, bool, str]: 25 | if action.get('action') != 'move': 26 | return state, False, f"Invalid action type: {action.get('action')}" 27 | 28 | direction = action.get('direction') 29 | if direction not in ['up', 'down', 'left', 'right']: 30 | return state, False, f"Invalid direction: {direction}" 31 | 32 | current_pos = state.player.grid_pos 33 | new_pos = self._calculate_new_position(current_pos, direction) 34 | 35 | maze = state.grid.data 36 | rows = len(maze) 37 | cols = len(maze[0]) if maze else 0 38 | 39 | if not (0 <= new_pos.row < rows and 0 <= new_pos.col < cols): 40 | return state, False, "Out of bounds" 41 | 42 | cell_value = maze[new_pos.row][new_pos.col] 43 | if cell_value == constants.WALL_CELL: 44 | return state, False, "Hit wall" 45 | 46 | new_state = copy.deepcopy(state) 47 | 48 | from core.schema.entity import Entity 49 | 50 | cell_size = state.render.cell_size 51 | new_state.player = Entity.from_grid_pos(new_pos.row, new_pos.col, cell_size) 52 | 53 | new_maze = [list(row) for row in maze] 54 | new_maze[current_pos.row][current_pos.col] = constants.EMPTY_CELL 55 | if new_maze[new_pos.row][new_pos.col] != constants.GOAL_CELL: 56 | new_maze[new_pos.row][new_pos.col] = constants.PLAYER_CELL 57 | else: 58 | new_maze[new_pos.row][new_pos.col] = constants.PLAYER_CELL 59 | 60 | new_state.grid.data = new_maze 61 | 62 | return new_state, True, "OK" 63 | 64 | def check_win(self, state: UnifiedState) -> bool: 65 | return (state.player.grid_pos.row == state.goal.grid_pos.row and 66 | state.player.grid_pos.col == state.goal.grid_pos.col) 67 | 68 | def render_state(self, state: UnifiedState, output_path: str) -> None: 69 | Path(output_path).parent.mkdir(parents=True, exist_ok=True) 70 | draw_maze(state.grid.data, state.render.cell_size, output_path, assets_folder=self.assets_folder) 71 | 72 | def get_system_prompt(self) -> str: 73 | return get_dynamic_prompt('maze', 'system', self.assets_folder) 74 | 75 | def get_user_prompt(self) -> str: 76 | return get_dynamic_prompt('maze', 'user', self.assets_folder) 77 | 78 | def get_game_type(self) -> str: 79 | return 'maze' 80 | 81 | def _path_to_actions(self, path: List[Tuple[int, int]]) -> List[Dict[str, Any]]: 82 | actions = [] 83 | for i in range(len(path) - 1): 84 | curr_row, curr_col = path[i] 85 | next_row, next_col = path[i + 1] 86 | 87 | if next_row < curr_row: 88 | direction = 'up' 89 | elif next_row > curr_row: 90 | direction = 'down' 91 | elif next_col < curr_col: 92 | direction = 'left' 93 | else: 94 | direction = 'right' 95 | 96 | actions.append({'action': 'move', 'direction': direction}) 97 | 98 | return actions 99 | 100 | def _calculate_new_position(self, pos, direction: str): 101 | from core.schema.position import Position 102 | 103 | if direction == 'up': 104 | return Position(row=pos.row - 1, col=pos.col) 105 | elif direction == 'down': 106 | return Position(row=pos.row + 1, col=pos.col) 107 | elif direction == 'left': 108 | return Position(row=pos.row, col=pos.col - 1) 109 | else: 110 | return Position(row=pos.row, col=pos.col + 1) 111 | 112 | -------------------------------------------------------------------------------- /utils/video_metadata.py: -------------------------------------------------------------------------------- 1 | """ 2 | 视频元数据提取工具 3 | 支持 ffprobe 和 OpenCV 两种方式 4 | """ 5 | 6 | from __future__ import annotations 7 | 8 | import json 9 | import math 10 | import shutil 11 | import subprocess 12 | from fractions import Fraction 13 | from pathlib import Path 14 | from typing import Tuple 15 | 16 | 17 | class VideoMetadataError(RuntimeError): 18 | """视频元数据提取失败""" 19 | pass 20 | 21 | 22 | def get_video_metadata(path: Path) -> Tuple[float, float, float]: 23 | """ 24 | 获取视频元数据 25 | 26 | Args: 27 | path: 视频文件路径 28 | 29 | Returns: 30 | (duration_s, frame_count, fps) 31 | 32 | Raises: 33 | VideoMetadataError: 无法提取元数据 34 | """ 35 | try: 36 | return _metadata_with_ffprobe(path) 37 | except FileNotFoundError: 38 | pass 39 | except VideoMetadataError: 40 | raise 41 | except Exception as exc: 42 | raise VideoMetadataError(f"ffprobe error for {path}: {exc}") from exc 43 | 44 | try: 45 | return _metadata_with_cv2(path) 46 | except ImportError: 47 | raise VideoMetadataError( 48 | "Neither ffprobe (from ffmpeg) nor OpenCV (cv2) is available" 49 | ) 50 | except Exception as exc: 51 | raise VideoMetadataError(f"OpenCV error for {path}: {exc}") from exc 52 | 53 | 54 | def _metadata_with_ffprobe(path: Path) -> Tuple[float, float, float]: 55 | """使用 ffprobe 提取元数据""" 56 | if not shutil.which("ffprobe"): 57 | raise FileNotFoundError("ffprobe not found") 58 | 59 | cmd = [ 60 | "ffprobe", 61 | "-v", "error", 62 | "-select_streams", "v:0", 63 | "-show_entries", "format=duration", 64 | "-show_entries", "stream=nb_frames,avg_frame_rate", 65 | "-of", "json", 66 | str(path), 67 | ] 68 | result = subprocess.run(cmd, capture_output=True, text=True, check=False) 69 | if result.returncode != 0: 70 | raise VideoMetadataError(result.stderr.strip() or "ffprobe failed") 71 | 72 | data = json.loads(result.stdout) 73 | try: 74 | duration_s = float(data["format"]["duration"]) 75 | except (KeyError, ValueError) as exc: 76 | raise VideoMetadataError("Duration unavailable") from exc 77 | 78 | stream = data.get("streams", [{}])[0] 79 | raw_frames = stream.get("nb_frames") 80 | avg_frame_rate = stream.get("avg_frame_rate") 81 | fps = _fps_from_rate(avg_frame_rate) 82 | 83 | if raw_frames in (None, "N/A"): 84 | frames = duration_s * fps 85 | else: 86 | try: 87 | frames = float(raw_frames) 88 | except ValueError as exc: 89 | raise VideoMetadataError("Invalid frame count") from exc 90 | 91 | return duration_s, frames, fps 92 | 93 | 94 | def _metadata_with_cv2(path: Path) -> Tuple[float, float, float]: 95 | """使用 OpenCV 提取元数据""" 96 | import cv2 97 | 98 | capture = cv2.VideoCapture(str(path)) 99 | if not capture.isOpened(): 100 | raise VideoMetadataError("Unable to open video with OpenCV") 101 | 102 | fps = float(capture.get(cv2.CAP_PROP_FPS)) 103 | frame_count = float(capture.get(cv2.CAP_PROP_FRAME_COUNT)) 104 | 105 | if not math.isfinite(fps) or fps <= 0: 106 | capture.release() 107 | raise VideoMetadataError("Frame rate unavailable via OpenCV") 108 | 109 | if not math.isfinite(frame_count) or frame_count <= 0: 110 | frame_count = float(_count_frames_with_cv2(capture)) 111 | 112 | frames = frame_count 113 | capture.release() 114 | 115 | duration_s = frames / fps 116 | return duration_s, frames, fps 117 | 118 | 119 | def _count_frames_with_cv2(capture) -> int: 120 | """手动计数帧数""" 121 | frames = 0 122 | while True: 123 | ok, _ = capture.read() 124 | if not ok: 125 | break 126 | frames += 1 127 | return frames 128 | 129 | 130 | def _fps_from_rate(avg_frame_rate: str | None) -> float: 131 | """从帧率字符串解析 FPS""" 132 | if not avg_frame_rate or avg_frame_rate in ("0/0", "0"): 133 | raise VideoMetadataError("Frame rate unavailable") 134 | 135 | try: 136 | rate = Fraction(avg_frame_rate) 137 | except (ZeroDivisionError, ValueError) as exc: 138 | raise VideoMetadataError("Invalid frame rate") from exc 139 | 140 | return float(rate) 141 | 142 | 143 | def has_audio_stream(path: Path) -> bool: 144 | """检查视频是否有音频流""" 145 | cmd = [ 146 | "ffprobe", 147 | "-v", "error", 148 | "-select_streams", "a", 149 | "-show_entries", "stream=index", 150 | "-of", "csv=p=0", 151 | str(path), 152 | ] 153 | result = subprocess.run(cmd, capture_output=True, text=True, check=False) 154 | return bool(result.stdout.strip()) 155 | 156 | -------------------------------------------------------------------------------- /games/maze/default_textures.py: -------------------------------------------------------------------------------- 1 | """ 2 | Maze 游戏默认纹理生成器 3 | 生成简单的纯色方块作为默认纹理 4 | """ 5 | 6 | from PIL import Image, ImageDraw 7 | from pathlib import Path 8 | import logging 9 | 10 | 11 | # 默认颜色配置(参考图片) 12 | DEFAULT_COLORS = { 13 | 'floor': (173, 216, 230), # 浅蓝色 (light blue) 14 | 'wall': (255, 255, 255), # 白色 (white) 15 | 'player': (0, 128, 0), # 绿色 (green) 16 | 'target': (255, 0, 0), # 红色 (red) 17 | } 18 | 19 | 20 | def create_default_texture(color: tuple, size: int = 64, border: bool = True) -> Image.Image: 21 | """ 22 | 创建默认纹理 23 | 24 | Args: 25 | color: RGB 颜色元组 26 | size: 纹理大小(像素) 27 | border: 是否添加边框 28 | 29 | Returns: 30 | PIL Image 对象 31 | """ 32 | img = Image.new('RGB', (size, size), color) 33 | 34 | if border: 35 | draw = ImageDraw.Draw(img) 36 | # 绘制浅灰色边框 37 | border_color = (200, 200, 200) 38 | draw.rectangle([0, 0, size-1, size-1], outline=border_color, width=1) 39 | 40 | return img 41 | 42 | 43 | def create_player_texture(size: int = 64) -> Image.Image: 44 | """ 45 | 创建玩家纹理(绿色方块) 46 | 47 | Args: 48 | size: 纹理大小(像素) 49 | 50 | Returns: 51 | PIL Image 对象 52 | """ 53 | # 创建透明背景 54 | img = Image.new('RGBA', (size, size), (0, 0, 0, 0)) 55 | draw = ImageDraw.Draw(img) 56 | 57 | # 绘制绿色方块(居中,占 60% 大小) 58 | margin = int(size * 0.2) 59 | color = DEFAULT_COLORS['player'] 60 | draw.rectangle( 61 | [margin, margin, size - margin - 1, size - margin - 1], 62 | fill=color, 63 | outline=(0, 100, 0), # 深绿色边框 64 | width=2 65 | ) 66 | 67 | return img 68 | 69 | 70 | def create_target_texture(size: int = 64) -> Image.Image: 71 | """ 72 | 创建目标纹理(红色圆点) 73 | 74 | Args: 75 | size: 纹理大小(像素) 76 | 77 | Returns: 78 | PIL Image 对象 79 | """ 80 | # 创建透明背景 81 | img = Image.new('RGBA', (size, size), (0, 0, 0, 0)) 82 | draw = ImageDraw.Draw(img) 83 | 84 | # 绘制红色圆点(居中,占 50% 大小) 85 | margin = int(size * 0.25) 86 | color = DEFAULT_COLORS['target'] 87 | draw.ellipse( 88 | [margin, margin, size - margin - 1, size - margin - 1], 89 | fill=color, 90 | outline=(200, 0, 0), # 深红色边框 91 | width=2 92 | ) 93 | 94 | return img 95 | 96 | 97 | def generate_default_textures(output_dir: str | Path, size: int = 64): 98 | """ 99 | 生成所有默认纹理并保存到指定目录 100 | 101 | Args: 102 | output_dir: 输出目录路径 103 | size: 纹理大小(像素) 104 | """ 105 | output_path = Path(output_dir) 106 | output_path.mkdir(parents=True, exist_ok=True) 107 | 108 | logging.info(f"Generating default maze textures to {output_path}") 109 | 110 | # 生成地板纹理 111 | floor_img = create_default_texture(DEFAULT_COLORS['floor'], size, border=True) 112 | floor_img.save(output_path / 'floor.png') 113 | 114 | # 生成墙壁纹理 115 | wall_img = create_default_texture(DEFAULT_COLORS['wall'], size, border=True) 116 | wall_img.save(output_path / 'wall.png') 117 | 118 | # 生成玩家纹理 119 | player_img = create_player_texture(size) 120 | player_img.save(output_path / 'player.png') 121 | 122 | # 生成目标纹理 123 | target_img = create_target_texture(size) 124 | target_img.save(output_path / 'target.png') 125 | 126 | logging.info(f"✓ Generated 4 default textures: floor, wall, player, target") 127 | 128 | 129 | def ensure_default_textures(assets_folder: str | Path = None) -> Path: 130 | """ 131 | 确保默认纹理存在,如果不存在则生成 132 | 133 | Args: 134 | assets_folder: 素材文件夹路径,如果为 None 则使用 assets/default_maze 135 | 136 | Returns: 137 | 纹理文件夹路径 138 | """ 139 | if assets_folder is None: 140 | assets_folder = Path(__file__).parent.parent.parent / 'assets' / 'default_maze' 141 | else: 142 | assets_folder = Path(assets_folder) 143 | 144 | # 检查是否所有纹理都存在 145 | required_textures = ['floor', 'wall', 'player', 'target'] 146 | all_exist = all( 147 | any((assets_folder / f"{name}{ext}").exists() for ext in ['.png', '.jpg', '.jpeg']) 148 | for name in required_textures 149 | ) 150 | 151 | if not all_exist: 152 | logging.info(f"Default textures not found, generating...") 153 | generate_default_textures(assets_folder) 154 | 155 | return assets_folder 156 | 157 | 158 | if __name__ == '__main__': 159 | # 测试:生成默认纹理 160 | logging.basicConfig(level=logging.INFO) 161 | output_dir = Path(__file__).parent.parent.parent / 'assets' / 'default_maze' 162 | generate_default_textures(output_dir, size=64) 163 | print(f"Default textures generated in: {output_dir}") 164 | 165 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/executors/trapfield_executor.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import List, Dict, Any, Tuple 3 | from pathlib import Path 4 | 5 | from core.schema import UnifiedState 6 | from evaluation.vlm_eval.game_executor import GameExecutor 7 | from evaluation.vlm_eval.prompts import get_dynamic_prompt 8 | from games.trapfield import constants 9 | from games.trapfield.renderer import TrapFieldRenderer 10 | from generation.path_finder import find_trapfield_paths 11 | 12 | 13 | class TrapFieldExecutor(GameExecutor): 14 | def __init__(self, assets_folder: str = None): 15 | self.assets_folder = assets_folder 16 | 17 | def load_state(self, state_path: str) -> UnifiedState: 18 | return UnifiedState.load(state_path) 19 | 20 | def get_optimal_solution(self, state: UnifiedState) -> List[List[Dict[str, Any]]]: 21 | all_paths = find_trapfield_paths(state) 22 | return [self._path_to_actions(path) for path in all_paths] 23 | 24 | def execute_action(self, state: UnifiedState, action: Dict[str, Any]) -> Tuple[UnifiedState, bool, str]: 25 | if action.get('action') != 'move': 26 | return state, False, f"Invalid action type: {action.get('action')}" 27 | 28 | direction = action.get('direction') 29 | if direction not in ['up', 'down', 'left', 'right']: 30 | return state, False, f"Invalid direction: {direction}" 31 | 32 | current_pos = state.player.grid_pos 33 | new_pos = self._calculate_new_position(current_pos, direction) 34 | 35 | grid = state.grid.data 36 | rows = len(grid) 37 | cols = len(grid[0]) if grid else 0 38 | 39 | if not (0 <= new_pos.row < rows and 0 <= new_pos.col < cols): 40 | return state, False, "Out of bounds" 41 | 42 | cell_value = grid[new_pos.row][new_pos.col] 43 | 44 | if cell_value == constants.TRAP_CELL: 45 | return state, False, "Hit trap" 46 | 47 | new_state = copy.deepcopy(state) 48 | 49 | from core.schema.entity import Entity 50 | cell_size = state.render.cell_size 51 | new_state.player = Entity.from_grid_pos(new_pos.row, new_pos.col, cell_size) 52 | 53 | new_grid = [list(row) for row in grid] 54 | new_grid[current_pos.row][current_pos.col] = constants.EMPTY_CELL 55 | if new_grid[new_pos.row][new_pos.col] != constants.GOAL_CELL: 56 | new_grid[new_pos.row][new_pos.col] = constants.PLAYER_CELL 57 | else: 58 | new_grid[new_pos.row][new_pos.col] = constants.PLAYER_CELL 59 | 60 | new_state.grid.data = new_grid 61 | 62 | return new_state, True, "OK" 63 | 64 | def check_win(self, state: UnifiedState) -> bool: 65 | return (state.player.grid_pos.row == state.goal.grid_pos.row and 66 | state.player.grid_pos.col == state.goal.grid_pos.col) 67 | 68 | def render_state(self, state: UnifiedState, output_path: str) -> None: 69 | Path(output_path).parent.mkdir(parents=True, exist_ok=True) 70 | renderer = TrapFieldRenderer(cell_size=state.render.cell_size, assets_folder=self.assets_folder) 71 | renderer.render_grid(state.grid.data, output_path) 72 | 73 | def get_system_prompt(self) -> str: 74 | return get_dynamic_prompt('trapfield', 'system', self.assets_folder) 75 | 76 | def get_user_prompt(self) -> str: 77 | return get_dynamic_prompt('trapfield', 'user', self.assets_folder) 78 | 79 | def get_game_type(self) -> str: 80 | return 'trapfield' 81 | 82 | def _path_to_actions(self, path: List[Tuple[int, int]]) -> List[Dict[str, Any]]: 83 | actions = [] 84 | for i in range(len(path) - 1): 85 | curr_row, curr_col = path[i] 86 | next_row, next_col = path[i + 1] 87 | 88 | if next_row < curr_row: 89 | direction = 'up' 90 | elif next_row > curr_row: 91 | direction = 'down' 92 | elif next_col < curr_col: 93 | direction = 'left' 94 | else: 95 | direction = 'right' 96 | 97 | actions.append({'action': 'move', 'direction': direction}) 98 | 99 | return actions 100 | 101 | def _calculate_new_position(self, pos, direction: str): 102 | from core.schema.position import Position 103 | 104 | if direction == 'up': 105 | return Position(row=pos.row - 1, col=pos.col) 106 | elif direction == 'down': 107 | return Position(row=pos.row + 1, col=pos.col) 108 | elif direction == 'left': 109 | return Position(row=pos.row, col=pos.col - 1) 110 | else: 111 | return Position(row=pos.row, col=pos.col + 1) 112 | 113 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/run_vlm_eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | from pathlib import Path 4 | sys.path.insert(0, str(Path(__file__).parent.parent)) 5 | 6 | import argparse 7 | import logging 8 | import yaml 9 | from concurrent.futures import ProcessPoolExecutor, as_completed 10 | from typing import Dict, Any 11 | 12 | from evaluation.vlm_eval.vlm_client import VLMClient 13 | from evaluation.vlm_eval.vlm_evaluator import VLMEvaluator 14 | from evaluation.vlm_eval.executors.maze_executor import MazeExecutor 15 | from evaluation.vlm_eval.executors.sokoban_executor import SokobanExecutor 16 | from evaluation.vlm_eval.executors.trapfield_executor import TrapFieldExecutor 17 | from evaluation.vlm_eval.executors.pathfinder_executor import PathfinderExecutor 18 | from evaluation.vlm_eval.executors.maze3d_executor import Maze3DExecutor 19 | from dotenv import load_dotenv 20 | # 强制使用项目 .env 文件的值,覆盖系统环境变量 21 | load_dotenv(override=True) 22 | 23 | 24 | def create_executor(game: str, assets_folder: str = None): 25 | if game == 'maze': 26 | return MazeExecutor(assets_folder=assets_folder) 27 | elif game == 'sokoban': 28 | return SokobanExecutor(assets_folder=assets_folder) 29 | elif game == 'trapfield': 30 | return TrapFieldExecutor(assets_folder=assets_folder) 31 | elif game == 'pathfinder': 32 | return PathfinderExecutor(assets_folder=assets_folder) 33 | elif game in ['maze3d', '3dmaze' ,'maze3d_new']: 34 | return Maze3DExecutor(assets_folder=assets_folder) 35 | else: 36 | raise ValueError(f"Unsupported game: {game}") 37 | 38 | 39 | def evaluate_single_model(game: str, dataset: str, model_config: Dict[str, Any], 40 | output_base: str, workers: int, max_levels: int, 41 | assets_folder: str = None) -> Dict[str, Any]: 42 | # 在子进程中重新加载环境变量,强制使用项目 .env 文件的值 43 | load_dotenv(override=True) 44 | 45 | model_name = model_config['name'] 46 | output_dir = Path(output_base) / model_name 47 | 48 | logging.info(f"[{model_name}] Starting evaluation") 49 | 50 | vlm_client = VLMClient( 51 | model=model_name, 52 | base_url=model_config.get('base_url'), 53 | max_tokens=model_config.get('max_tokens', 10000), 54 | temperature=model_config.get('temperature', 0.0) 55 | ) 56 | 57 | executor = create_executor(game, assets_folder) 58 | evaluator = VLMEvaluator(vlm_client, executor) 59 | 60 | summary = evaluator.evaluate_dataset( 61 | dataset_dir=dataset, 62 | output_dir=str(output_dir), 63 | max_workers=workers, 64 | max_levels=max_levels 65 | ) 66 | 67 | logging.info(f"[{model_name}] Complete - SR: {summary['avg_sr']:.4f}, PR: {summary['avg_pr']:.4f}, MR: {summary['avg_mr']:.4f}") 68 | 69 | return { 70 | 'model': model_name, 71 | 'summary': summary 72 | } 73 | 74 | 75 | def main(): 76 | parser = argparse.ArgumentParser(description='VLM Game Evaluation') 77 | parser.add_argument('config', type=str, help='Config file path') 78 | args = parser.parse_args() 79 | 80 | logging.basicConfig( 81 | level=logging.INFO, 82 | format='%(asctime)s - %(levelname)s - %(message)s' 83 | ) 84 | 85 | with open(args.config, 'r') as f: 86 | config = yaml.safe_load(f) 87 | 88 | game = config['game'] 89 | dataset = config['dataset'] 90 | output_base = config['output'] 91 | models = config['models'] 92 | workers = config.get('workers', 10) 93 | max_levels = config.get('max_levels', -1) 94 | assets_folder = config.get('assets_folder') 95 | 96 | logging.info(f"Game: {game}") 97 | logging.info(f"Dataset: {dataset}") 98 | logging.info(f"Models: {[m['name'] for m in models]}") 99 | logging.info(f"Workers per model: {workers}") 100 | logging.info(f"Total parallel tasks: {len(models) * workers}") 101 | 102 | results = [] 103 | with ProcessPoolExecutor(max_workers=len(models)) as executor: 104 | futures = [] 105 | for model_config in models: 106 | future = executor.submit( 107 | evaluate_single_model, 108 | game, dataset, model_config, output_base, 109 | workers, max_levels, assets_folder 110 | ) 111 | futures.append(future) 112 | 113 | for future in as_completed(futures): 114 | try: 115 | result = future.result() 116 | results.append(result) 117 | except Exception as e: 118 | logging.error(f"Model evaluation failed: {e}") 119 | 120 | logging.info("\n=== Final Results ===") 121 | for result in results: 122 | model = result['model'] 123 | summary = result['summary'] 124 | logging.info(f"{model}: SR={summary['avg_sr']:.5f}, PR={summary['avg_pr']:.5f}, MR={summary['avg_mr']:.5f}, Step={summary['avg_step']:.5f}") 125 | 126 | 127 | if __name__ == '__main__': 128 | main() 129 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/recalculate_avg_step.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | 重新计算 VLM 评估结果中的 avg_step 指标 4 | 5 | 旧方法:对所有 case 都计算 step 6 | 新方法:只对 SR=1(成功的 case)计算 step 7 | 8 | 使用方法: 9 | python evaluation/vlm_eval/recalculate_avg_step.py 10 | """ 11 | 12 | import json 13 | import sys 14 | from pathlib import Path 15 | from typing import Dict, Any, List 16 | 17 | 18 | def recalculate_summary(summary_path: Path) -> Dict[str, Any]: 19 | """重新计算单个 summary.json 的 avg_step""" 20 | 21 | with open(summary_path, 'r') as f: 22 | summary = json.load(f) 23 | 24 | results = summary.get('results', []) 25 | 26 | # 只计算 SR=1.0 的 case 的 step 27 | successful_steps = [] 28 | for result in results: 29 | metrics = result.get('metrics', {}) 30 | sr = metrics.get('sr', 0.0) 31 | step = metrics.get('step') 32 | 33 | # 只统计成功的 case (SR=1.0) 且 step 不为 None 34 | if sr == 1.0 and step is not None: 35 | successful_steps.append(step) 36 | 37 | # 计算新的 avg_step 38 | old_avg_step = summary.get('avg_step', 0.0) 39 | old_successful_cases = summary.get('successful_cases', 0) 40 | 41 | new_avg_step = sum(successful_steps) / len(successful_steps) if successful_steps else 0.0 42 | new_successful_cases = len(successful_steps) 43 | 44 | # 更新 summary 45 | summary['avg_step'] = new_avg_step 46 | summary['successful_cases'] = new_successful_cases 47 | 48 | return { 49 | 'path': str(summary_path), 50 | 'old_avg_step': old_avg_step, 51 | 'new_avg_step': new_avg_step, 52 | 'old_successful_cases': old_successful_cases, 53 | 'new_successful_cases': new_successful_cases, 54 | 'changed': abs(old_avg_step - new_avg_step) > 1e-6 or old_successful_cases != new_successful_cases, 55 | 'updated_summary': summary 56 | } 57 | 58 | 59 | def process_directory(base_dir: Path) -> List[Dict[str, Any]]: 60 | """处理目录下的所有 summary.json 文件""" 61 | 62 | results = [] 63 | 64 | # 查找所有 summary.json 文件 65 | summary_files = list(base_dir.rglob('summary.json')) 66 | 67 | if not summary_files: 68 | print(f" ⚠ 未找到 summary.json 文件") 69 | return results 70 | 71 | print(f" 找到 {len(summary_files)} 个 summary.json 文件") 72 | 73 | for summary_file in sorted(summary_files): 74 | try: 75 | result = recalculate_summary(summary_file) 76 | results.append(result) 77 | 78 | # 显示相对路径 79 | rel_path = summary_file.relative_to(base_dir) 80 | 81 | if result['changed']: 82 | print(f" ✓ {rel_path}") 83 | print(f" 旧: avg_step={result['old_avg_step']:.4f}, successful_cases={result['old_successful_cases']}") 84 | print(f" 新: avg_step={result['new_avg_step']:.4f}, successful_cases={result['new_successful_cases']}") 85 | else: 86 | print(f" - {rel_path} (无变化)") 87 | 88 | except Exception as e: 89 | print(f" ✗ 处理 {summary_file} 时出错: {e}") 90 | import traceback 91 | traceback.print_exc() 92 | 93 | return results 94 | 95 | 96 | def save_updated_summaries(results: List[Dict[str, Any]], dry_run: bool = False): 97 | """保存更新后的 summary.json 文件""" 98 | 99 | changed_count = sum(1 for r in results if r['changed']) 100 | 101 | if changed_count == 0: 102 | print("\n没有需要更新的文件") 103 | return 104 | 105 | if dry_run: 106 | print(f"\n[DRY RUN] 将更新 {changed_count} 个文件(实际未保存)") 107 | return 108 | 109 | print(f"\n正在保存 {changed_count} 个更新的文件...") 110 | 111 | for result in results: 112 | if result['changed']: 113 | summary_path = Path(result['path']) 114 | with open(summary_path, 'w') as f: 115 | json.dump(result['updated_summary'], f, indent=2) 116 | print(f" ✓ 已保存: {summary_path}") 117 | 118 | print(f"\n✅ 成功更新 {changed_count} 个文件") 119 | 120 | 121 | def main(): 122 | import argparse 123 | 124 | parser = argparse.ArgumentParser(description='重新计算 VLM 评估结果的 avg_step') 125 | parser.add_argument('--dry-run', action='store_true', help='只显示变化,不实际保存') 126 | parser.add_argument('--dirs', nargs='+', 127 | default=['vlm_eval_results/maze', 'vlm_eval_results/sokoban', 'vlm_eval_results/trapfield'], 128 | help='要处理的目录列表') 129 | args = parser.parse_args() 130 | 131 | print("=" * 70) 132 | print("重新计算 VLM 评估结果的 avg_step") 133 | print("=" * 70) 134 | print(f"新方法: 只对 SR=1.0 的成功 case 计算 avg_step") 135 | print() 136 | 137 | all_results = [] 138 | 139 | for dir_path in args.dirs: 140 | base_dir = Path(dir_path) 141 | 142 | if not base_dir.exists(): 143 | print(f"⚠ 目录不存在: {base_dir}") 144 | continue 145 | 146 | print(f"处理目录: {base_dir}") 147 | results = process_directory(base_dir) 148 | all_results.extend(results) 149 | print() 150 | 151 | # 统计 152 | total_files = len(all_results) 153 | changed_files = sum(1 for r in all_results if r['changed']) 154 | 155 | print("=" * 70) 156 | print(f"统计: 共处理 {total_files} 个文件,其中 {changed_files} 个需要更新") 157 | print("=" * 70) 158 | 159 | if args.dry_run: 160 | print("\n[DRY RUN 模式] 未实际保存文件") 161 | print("如需保存,请去掉 --dry-run 参数重新运行") 162 | else: 163 | # 保存更新 164 | save_updated_summaries(all_results, dry_run=False) 165 | 166 | 167 | if __name__ == '__main__': 168 | main() 169 | 170 | -------------------------------------------------------------------------------- /prompts/METADATA_USAGE.md: -------------------------------------------------------------------------------- 1 | # Metadata Generation Tool 2 | 3 | ## Overview 4 | 5 | Generate metadata.csv files for VR-Bench dataset with dynamic prompts based on skin configurations. 6 | 7 | **Features:** 8 | - Dynamic prompt generation from skin descriptions 9 | - Flexible filtering by game type, skin, and difficulty 10 | - Separate or merged output modes 11 | - Support for both train and eval splits 12 | 13 | ## Quick Start 14 | 15 | ```bash 16 | # Generate all metadata files 17 | python test_dynamic_metadata.py 18 | 19 | # Generate for specific game type 20 | python test_dynamic_metadata.py --games maze 21 | 22 | # Generate for specific skins and difficulties 23 | python test_dynamic_metadata.py --games maze --skins 1 2 --difficulties easy 24 | ``` 25 | 26 | ## Command-Line Arguments 27 | 28 | ### `--games` 29 | Select game types (multiple allowed) 30 | 31 | **Options:** `maze`, `irregular_maze`, `maze3d`, `sokoban`, `trapfield` 32 | 33 | ```bash 34 | python test_dynamic_metadata.py --games maze sokoban 35 | ``` 36 | 37 | ### `--skins` 38 | Select skin IDs (multiple allowed) 39 | 40 | **Options:** `1`, `2`, `3`, `4`, `5` (varies by game type) 41 | 42 | **Skin counts:** 43 | - maze: 5 skins 44 | - irregular_maze: 4 skins 45 | - maze3d: 4 skins 46 | - sokoban: 5 skins 47 | - trapfield: 4 skins 48 | 49 | ```bash 50 | python test_dynamic_metadata.py --skins 1 2 3 51 | ``` 52 | 53 | ### `--difficulties` 54 | Select difficulty levels (multiple allowed) 55 | 56 | **Options:** `easy`, `medium`, `hard` 57 | 58 | ```bash 59 | python test_dynamic_metadata.py --difficulties easy hard 60 | ``` 61 | 62 | ### `--splits` 63 | Select dataset splits (default: train eval) 64 | 65 | **Options:** `train`, `eval` 66 | 67 | ```bash 68 | python test_dynamic_metadata.py --splits train 69 | ``` 70 | 71 | ### `--merge` 72 | Merge all matching data into a single metadata.csv 73 | 74 | ```bash 75 | python test_dynamic_metadata.py --games maze --skins 1 2 --merge 76 | ``` 77 | 78 | ### `--dataset-root` 79 | Specify dataset root directory (default: project_root/downloaded_dataset) 80 | 81 | ```bash 82 | python test_dynamic_metadata.py --dataset-root /path/to/dataset 83 | ``` 84 | 85 | ### `--skins-root` 86 | Specify skins configuration directory (default: project_root/skins) 87 | 88 | ```bash 89 | python test_dynamic_metadata.py --skins-root /path/to/skins 90 | ``` 91 | 92 | ## Usage Examples 93 | 94 | ### Generate all data 95 | ```bash 96 | python test_dynamic_metadata.py 97 | ``` 98 | **Output:** 132 metadata.csv files (66 train + 66 eval) 99 | 100 | ### Generate specific game 101 | ```bash 102 | python test_dynamic_metadata.py --games maze 103 | ``` 104 | **Output:** 30 files (5 skins × 3 difficulties × 2 splits) 105 | 106 | ### Generate specific combination 107 | ```bash 108 | python test_dynamic_metadata.py --games maze --skins 1 --difficulties easy 109 | ``` 110 | **Output:** 2 files (train/maze_1_easy and eval/maze_1_easy) 111 | 112 | ### Merge multiple games 113 | ```bash 114 | python test_dynamic_metadata.py --games maze irregular_maze --merge 115 | ``` 116 | **Output:** 2 merged files (one for train, one for eval) 117 | 118 | ### Cross-skin training 119 | ```bash 120 | python test_dynamic_metadata.py --games maze --skins 1 2 3 --merge --splits train 121 | ``` 122 | **Output:** 1 merged file containing all train data for maze skins 1, 2, 3 123 | 124 | ### Regenerate specific skins 125 | ```bash 126 | python test_dynamic_metadata.py --games irregular_maze --skins 1 2 3 127 | ``` 128 | **Output:** 18 files (3 skins × 3 difficulties × 2 splits) 129 | 130 | ## Output Structure 131 | 132 | ### Separate Mode (default) 133 | ``` 134 | downloaded_dataset/ 135 | └── metadata/ 136 | ├── train/ 137 | │ ├── maze_1_easy/ 138 | │ │ └── metadata.csv 139 | │ ├── maze_1_medium/ 140 | │ │ └── metadata.csv 141 | │ └── ... 142 | └── eval/ 143 | ├── maze_1_easy/ 144 | │ └── metadata.csv 145 | └── ... 146 | ``` 147 | 148 | ### Merge Mode 149 | ``` 150 | downloaded_dataset/ 151 | └── metadata/ 152 | ├── train/ 153 | │ └── maze_sokoban_1_2_easy/ 154 | │ └── metadata.csv 155 | └── eval/ 156 | └── maze_sokoban_1_2_easy/ 157 | └── metadata.csv 158 | ``` 159 | 160 | ## Metadata CSV Format 161 | 162 | Each CSV file contains 3 columns: 163 | 164 | | Column | Description | Example | 165 | |--------|-------------|---------| 166 | | `video` | Video file path (relative to downloaded_dataset/) | `train/maze/1/easy/videos/easy_0001_0.mp4` | 167 | | `prompt` | Dynamically generated prompt | `Create a 2D animation...` | 168 | | `input_image` | Input image path (relative to downloaded_dataset/) | `train/maze/1/easy/images/easy_0001.png` | 169 | 170 | ## Dynamic Prompt System 171 | 172 | Prompts are automatically generated based on skin configurations in `skins/{game_type}/{skin_id}/description.json`. 173 | 174 | **Example:** For maze skin 1: 175 | ```json 176 | { 177 | "visual_description": { 178 | "player": "red circle", 179 | "goal": "green square", 180 | "wall": "light blue square", 181 | "floor": "white square" 182 | } 183 | } 184 | ``` 185 | 186 | **Generated prompt:** 187 | ``` 188 | Create a 2D animation based on the provided image of a maze. 189 | The red circle slides smoothly along the white square path, 190 | stopping perfectly on the green square... 191 | ``` 192 | 193 | Different skins produce different prompts automatically. 194 | 195 | ## Notes 196 | 197 | - All paths in metadata.csv are relative to `downloaded_dataset/` directory 198 | - Game type `irregular_maze` maps to `pathfinder` skin directory 199 | - If skin description is not found, a warning is displayed and the combination is skipped 200 | - Use `--merge` mode for training across multiple skins or game types 201 | -------------------------------------------------------------------------------- /evaluation/vlm_eval/action_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | from typing import List, Dict, Any 4 | 5 | 6 | def parse_actions(response: str, game_type: str = 'default') -> List[Dict[str, Any]]: 7 | if game_type in ['maze', 'trapfield']: 8 | return _parse_path_actions(response) 9 | elif game_type == 'sokoban': 10 | return _parse_sokoban_actions(response) 11 | elif game_type == 'pathfinder': 12 | return _parse_pathfinder_actions(response) 13 | elif game_type in ['maze3d', '3dmaze']: 14 | return _parse_maze3d_actions(response) 15 | 16 | attempts = [ 17 | lambda: json.loads(response), 18 | lambda: _parse_markdown_json(response), 19 | lambda: _extract_json_array(response) 20 | ] 21 | 22 | for attempt in attempts: 23 | try: 24 | result = attempt() 25 | if isinstance(result, list): 26 | return result 27 | except: 28 | continue 29 | 30 | raise ValueError(f"Failed to parse JSON from response: {response[:200]}") 31 | 32 | 33 | def _parse_markdown_json(text: str) -> List[Dict[str, Any]]: 34 | match = re.search(r'```(?:json)?\s*(.*?)\s*```', text, re.DOTALL) 35 | if match: 36 | return json.loads(match.group(1)) 37 | raise ValueError("No markdown JSON found") 38 | 39 | 40 | def _extract_json_array(text: str) -> List[Dict[str, Any]]: 41 | match = re.search(r'\[.*\]', text, re.DOTALL) 42 | if match: 43 | return json.loads(match.group(0)) 44 | raise ValueError("No JSON array found") 45 | 46 | 47 | def _parse_path_actions(response: str) -> List[Dict[str, Any]]: 48 | attempts = [ 49 | lambda: json.loads(response), 50 | lambda: _parse_markdown_json(response), 51 | lambda: _extract_json_object(response) 52 | ] 53 | 54 | for attempt in attempts: 55 | try: 56 | result = attempt() 57 | if isinstance(result, dict) and 'path' in result: 58 | path = result['path'] 59 | if isinstance(path, list): 60 | return [{'action': 'move', 'direction': d} for d in path] 61 | except: 62 | continue 63 | 64 | raise ValueError(f"Failed to parse path from response: {response[:200]}") 65 | 66 | 67 | def _parse_sokoban_actions(response: str) -> List[Dict[str, Any]]: 68 | """Parse Sokoban actions - only move actions (up/down/left/right)""" 69 | attempts = [ 70 | lambda: json.loads(response), 71 | lambda: _parse_markdown_json(response), 72 | lambda: _extract_json_object(response) 73 | ] 74 | 75 | for attempt in attempts: 76 | try: 77 | result = attempt() 78 | if isinstance(result, dict) and 'actions' in result: 79 | actions = result['actions'] 80 | if isinstance(actions, list): 81 | parsed = [] 82 | for a in actions: 83 | # 只支持 move 操作,push 会自动发生 84 | parsed.append({'action': 'move', 'direction': a}) 85 | return parsed 86 | except: 87 | continue 88 | 89 | raise ValueError(f"Failed to parse sokoban actions from response: {response[:200]}") 90 | 91 | 92 | def _parse_pathfinder_actions(response: str) -> List[Dict[str, Any]]: 93 | """Parse PathFinder actions - letter array representing the path 94 | 95 | Expected format: {"path": ["A", "C", "D"]} 96 | """ 97 | attempts = [ 98 | lambda: json.loads(response), 99 | lambda: _parse_markdown_json(response), 100 | lambda: _extract_json_object(response) 101 | ] 102 | 103 | for attempt in attempts: 104 | try: 105 | result = attempt() 106 | if isinstance(result, dict) and 'path' in result: 107 | path = result['path'] 108 | if isinstance(path, list): 109 | # Validate all elements are strings 110 | if all(isinstance(item, str) for item in path): 111 | return [{'action': 'path', 'path': path}] 112 | except: 113 | continue 114 | 115 | raise ValueError(f"Failed to parse pathfinder actions from response: {response[:200]}") 116 | 117 | 118 | def _parse_maze3d_actions(response: str) -> List[Dict[str, Any]]: 119 | """Parse 3D Maze actions - direction array 120 | 121 | Expected format: {"path": ["up", "forward_right", "forward_left", ...]} 122 | Valid directions: forward_left, forward_right, backward_left, backward_right, up, down 123 | """ 124 | valid_directions = { 125 | 'forward_left', 'forward_right', 'backward_left', 'backward_right', 'up', 'down' 126 | } 127 | 128 | attempts = [ 129 | lambda: json.loads(response), 130 | lambda: _parse_markdown_json(response), 131 | lambda: _extract_json_object(response) 132 | ] 133 | 134 | for attempt in attempts: 135 | try: 136 | result = attempt() 137 | if isinstance(result, dict) and 'path' in result: 138 | path = result['path'] 139 | if isinstance(path, list): 140 | # Validate all elements are valid direction strings 141 | if all(isinstance(item, str) and item in valid_directions for item in path): 142 | return [{'action': 'move', 'direction': direction} for direction in path] 143 | except: 144 | continue 145 | 146 | raise ValueError(f"Failed to parse maze3d actions from response: {response[:200]}") 147 | 148 | 149 | def _extract_json_object(text: str) -> Dict[str, Any]: 150 | match = re.search(r'\{.*\}', text, re.DOTALL) 151 | if match: 152 | return json.loads(match.group(0)) 153 | raise ValueError("No JSON object found") 154 | -------------------------------------------------------------------------------- /core/texture_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unified texture handler for all games. 3 | Handles loading, caching, and resizing of game textures. 4 | """ 5 | 6 | import os 7 | from pathlib import Path 8 | from typing import Optional, Dict 9 | from PIL import Image 10 | 11 | from .constants import SUPPORTED_IMAGE_FORMATS, DEFAULT_CELL_SIZE 12 | 13 | 14 | class BaseTextureHandler: 15 | """Base texture handler shared by all games""" 16 | 17 | def __init__(self, assets_folder: Optional[str] = None, cell_size: int = DEFAULT_CELL_SIZE): 18 | """ 19 | Initialize texture handler. 20 | 21 | Args: 22 | assets_folder: Path to assets folder. If None, uses default. 23 | cell_size: Size of each cell in pixels. 24 | """ 25 | if assets_folder is None: 26 | assets_folder = Path(__file__).parent.parent / "assets" 27 | 28 | self.assets_path = Path(assets_folder) 29 | self.cell_size = cell_size 30 | self.textures: Dict[str, Image.Image] = {} 31 | 32 | # Ensure assets directory exists 33 | self.assets_path.mkdir(parents=True, exist_ok=True) 34 | 35 | def load_textures(self, texture_names: list): 36 | """ 37 | Load specified textures. 38 | 39 | Args: 40 | texture_names: List of texture names to load (e.g., ['floor', 'wall', 'player']) 41 | """ 42 | for name in texture_names: 43 | texture = self._load_texture(name) 44 | if texture: 45 | self.textures[name] = texture 46 | 47 | def _load_texture(self, name: str) -> Optional[Image.Image]: 48 | """ 49 | Load a single texture from file. 50 | 51 | Args: 52 | name: Texture name (without extension) 53 | 54 | Returns: 55 | Loaded and resized texture, or None if not found 56 | """ 57 | file_path = None 58 | for ext in SUPPORTED_IMAGE_FORMATS: 59 | candidate = self.assets_path / f"{name}{ext}" 60 | if candidate.exists(): 61 | file_path = candidate 62 | break 63 | 64 | if file_path is None: 65 | return None 66 | 67 | try: 68 | img = Image.open(file_path).convert("RGBA") 69 | return self._resize_keep_aspect_ratio(img, self.cell_size) 70 | except Exception as e: 71 | print(f"Failed to load texture {name}: {e}") 72 | return None 73 | 74 | def _resize_keep_aspect_ratio(self, img: Image.Image, target_size: int) -> Image.Image: 75 | """ 76 | Resize image while maintaining aspect ratio, centered on transparent canvas. 77 | 78 | Args: 79 | img: Source image 80 | target_size: Target size (width and height) 81 | 82 | Returns: 83 | Resized image on transparent canvas 84 | """ 85 | original_width, original_height = img.size 86 | 87 | # Calculate scaling ratio 88 | ratio = min(target_size / original_width, target_size / original_height) 89 | new_width = int(original_width * ratio) 90 | new_height = int(original_height * ratio) 91 | 92 | # Resize with high quality 93 | resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS) 94 | 95 | # Create transparent canvas 96 | result = Image.new("RGBA", (target_size, target_size), (0, 0, 0, 0)) 97 | 98 | # Center the resized image 99 | paste_x = (target_size - new_width) // 2 100 | paste_y = (target_size - new_height) // 2 101 | result.paste(resized_img, (paste_x, paste_y), resized_img if resized_img.mode == 'RGBA' else None) 102 | 103 | return result 104 | 105 | def get_texture(self, name: str) -> Optional[Image.Image]: 106 | """ 107 | Get a loaded texture by name. 108 | 109 | Args: 110 | name: Texture name 111 | 112 | Returns: 113 | Texture image or None if not loaded 114 | """ 115 | return self.textures.get(name) 116 | 117 | def has_texture(self, name: str) -> bool: 118 | """ 119 | Check if a texture is loaded. 120 | 121 | Args: 122 | name: Texture name 123 | 124 | Returns: 125 | True if texture is loaded 126 | """ 127 | return name in self.textures 128 | 129 | 130 | # Global texture handler cache 131 | _texture_handlers: Dict[str, BaseTextureHandler] = {} 132 | 133 | 134 | def get_texture_handler(assets_folder: Optional[str] = None, 135 | cell_size: int = DEFAULT_CELL_SIZE, 136 | texture_names: Optional[list] = None) -> BaseTextureHandler: 137 | """ 138 | Get or create a cached texture handler. 139 | 140 | Args: 141 | assets_folder: Path to assets folder 142 | cell_size: Size of each cell 143 | texture_names: List of textures to load 144 | 145 | Returns: 146 | Cached or new texture handler 147 | """ 148 | if assets_folder is None: 149 | assets_folder = str(Path(__file__).parent.parent / "assets") 150 | 151 | cache_key = f"{assets_folder}:{cell_size}" 152 | 153 | if cache_key not in _texture_handlers: 154 | handler = BaseTextureHandler(assets_folder, cell_size) 155 | if texture_names: 156 | handler.load_textures(texture_names) 157 | _texture_handlers[cache_key] = handler 158 | 159 | return _texture_handlers[cache_key] 160 | 161 | 162 | def clear_texture_cache(): 163 | """Clear the texture handler cache.""" 164 | global _texture_handlers 165 | _texture_handlers.clear() 166 | 167 | -------------------------------------------------------------------------------- /evaluation/videomodel_eval/evaluator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | from pathlib import Path 4 | sys.path.insert(0, str(Path(__file__).parent.parent)) 5 | 6 | import numpy as np 7 | from typing import Dict, Any, Optional, Tuple 8 | from core.schema import UnifiedState 9 | 10 | # 尝试导入GPU版本,如果失败则使用CPU版本 11 | try: 12 | from evaluation.videomodel_eval.metrics_gpu import ( 13 | PrecisionRateMetricGPU as PrecisionRateMetric, 14 | StepMetric, 15 | ExactMatchMetric, 16 | SuccessRateMetric, 17 | FidelityMetric, 18 | normalize_trajectory, 19 | GPU_AVAILABLE 20 | ) 21 | if GPU_AVAILABLE: 22 | print("✓ GPU加速已启用") 23 | except ImportError: 24 | from evaluation.videomodel_eval.metrics import ( 25 | PrecisionRateMetric, 26 | StepMetric, 27 | ExactMatchMetric, 28 | normalize_trajectory 29 | ) 30 | GPU_AVAILABLE = False 31 | print("✓ 使用CPU计算") 32 | # CPU版本没有SR和Fidelity,需要从GPU版本导入 33 | from evaluation.videomodel_eval.metrics_gpu import SuccessRateMetric, FidelityMetric 34 | 35 | 36 | class TrajectoryEvaluator: 37 | """ 38 | 轨迹评估器 - 基于 GUI 路径跟踪一致性评测指标定义文档 v1.1 39 | """ 40 | 41 | def __init__(self, eps_ratio: float = 0.01, num_samples: int = 1000, 42 | pr_threshold: float = 0.98, step_threshold: float = 0.1, 43 | fidelity_frame_step: int = 5, fidelity_pixel_threshold: int = 5): 44 | """ 45 | Args: 46 | eps_ratio: 匹配阈值(相对对角线比例),默认 0.01 (1%) 47 | num_samples: 采样点数量,默认 1000 48 | pr_threshold: Exact Match 的 PR 阈值,默认 0.98 49 | step_threshold: Exact Match 的 Step 阈值,默认 0.1 50 | fidelity_frame_step: 保真度计算的帧采样步长,默认 5 51 | fidelity_pixel_threshold: 保真度计算的像素差异阈值,默认 5(±5灰度值) 52 | """ 53 | self.eps_ratio = eps_ratio 54 | self.num_samples = num_samples 55 | self.pr_threshold = pr_threshold 56 | self.step_threshold = step_threshold 57 | 58 | self.metrics = [ 59 | PrecisionRateMetric(eps_ratio, num_samples), 60 | SuccessRateMetric(), 61 | StepMetric(), 62 | ExactMatchMetric(pr_threshold, step_threshold), 63 | FidelityMetric(frame_step=fidelity_frame_step, pixel_threshold=fidelity_pixel_threshold), 64 | ] 65 | 66 | def evaluate(self, 67 | gt_traj: np.ndarray, 68 | gen_traj: np.ndarray, 69 | video_width: int, 70 | video_height: int, 71 | state: Optional[UnifiedState] = None, 72 | gen_box_traj: Optional[np.ndarray] = None, 73 | **kwargs) -> Dict[str, Any]: 74 | """ 75 | 评估两条轨迹 76 | 77 | Args: 78 | gt_traj: Ground truth 轨迹 (N, 2),像素坐标(玩家轨迹) 79 | gen_traj: Generated 轨迹 (M, 2),像素坐标(玩家轨迹) 80 | video_width: 视频宽度 81 | video_height: 视频高度 82 | state: UnifiedState 对象(可选) 83 | gen_box_traj: Generated 箱子轨迹(仅推箱子游戏,用于SR计算) 84 | **kwargs: 额外参数 85 | 86 | Returns: 87 | result: 评估结果字典 88 | """ 89 | if len(gt_traj) < 2 or len(gen_traj) < 2: 90 | return self._empty_result() 91 | 92 | # 归一化轨迹到 [0,1]×[0,1] 93 | gt_traj_norm = normalize_trajectory(gt_traj, video_width, video_height) 94 | gen_traj_norm = normalize_trajectory(gen_traj, video_width, video_height) 95 | 96 | result = {} 97 | shared_data = { 98 | 'state': state, 99 | 'video_width': video_width, 100 | 'video_height': video_height, 101 | } 102 | 103 | # 如果有state,提取goal bbox并归一化 104 | if state is not None: 105 | goal_bbox_pixel = state.goal.bbox 106 | # 使用 state 中记录的原始图片尺寸来归一化 bbox 107 | # 因为 bbox 坐标是基于原始图片的,而不是生成视频的尺寸 108 | state_width = state.render.image_width 109 | state_height = state.render.image_height 110 | goal_bbox_norm = ( 111 | goal_bbox_pixel.x / state_width, 112 | goal_bbox_pixel.y / state_height, 113 | goal_bbox_pixel.width / state_width, 114 | goal_bbox_pixel.height / state_height 115 | ) 116 | shared_data['goal_bbox'] = goal_bbox_norm 117 | 118 | # 如果是推箱子游戏,归一化箱子轨迹并用于 SR 计算 119 | if gen_box_traj is not None: 120 | gen_box_traj_norm = normalize_trajectory(gen_box_traj, video_width, video_height) 121 | shared_data['gen_box_traj'] = gen_box_traj_norm 122 | 123 | # 依次计算所有 metrics 124 | for metric in self.metrics: 125 | value, extra = metric.compute(gt_traj_norm, gen_traj_norm, **kwargs, **shared_data) 126 | result[metric.name] = value 127 | shared_data.update(extra) 128 | # 将 metric 的值也加入 shared_data,供后续 metric 使用 129 | shared_data[metric.name] = value 130 | 131 | # 添加额外数据(用于可视化和调试) 132 | result.update({ 133 | 'gt_length': shared_data.get('gt_length', 0.0), 134 | 'gen_length': shared_data.get('gen_length', 0.0), 135 | 'is_perfect': shared_data.get('is_perfect', False), 136 | 'gt_resampled': shared_data.get('gt_resampled'), 137 | 'gen_resampled': shared_data.get('gen_resampled'), 138 | 'distances': shared_data.get('distances') 139 | }) 140 | 141 | return result 142 | 143 | def _empty_result(self) -> Dict[str, Any]: 144 | """返回空结果""" 145 | return { 146 | 'pr': 0.0, 147 | 'sd': 0.0, 148 | 'em': 0.0, 149 | 'sr': 0.0, 150 | 'mf': 0.0, 151 | 'gt_length': 0.0, 152 | 'gen_length': 0.0, 153 | 'is_perfect': False 154 | } 155 | 156 | 157 | -------------------------------------------------------------------------------- /games/pathfinder/texture_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | PathFinder 纹理处理器 3 | 支持起点、终点图标和道路纹理 4 | """ 5 | 6 | from pathlib import Path 7 | from typing import Optional 8 | from PIL import Image 9 | 10 | 11 | class PathFinderTextureHandler: 12 | """PathFinder 纹理处理器""" 13 | 14 | # 支持的图片格式 15 | SUPPORTED_FORMATS = ['.png', '.jpg', '.jpeg'] 16 | 17 | # 必需的纹理 18 | REQUIRED_TEXTURES = ['start', 'end', 'road'] 19 | 20 | def __init__(self, assets_folder: Optional[str] = None): 21 | """ 22 | 初始化纹理处理器 23 | 24 | Args: 25 | assets_folder: 纹理文件夹路径,如果为 None 则使用默认路径 26 | """ 27 | if assets_folder is None: 28 | # 默认使用 games/pathfinder/assets 29 | assets_folder = Path(__file__).parent / "assets" 30 | 31 | self.assets_path = Path(assets_folder) 32 | self.textures = {} 33 | 34 | # 确保资源文件夹存在 35 | self.assets_path.mkdir(parents=True, exist_ok=True) 36 | 37 | def load_textures(self): 38 | """加载所有纹理""" 39 | for texture_name in self.REQUIRED_TEXTURES: 40 | texture = self._load_texture(texture_name) 41 | if texture: 42 | self.textures[texture_name] = texture 43 | 44 | def _load_texture(self, name: str) -> Optional[Image.Image]: 45 | """ 46 | 加载单个纹理 47 | 48 | Args: 49 | name: 纹理名称(不含扩展名) 50 | 51 | Returns: 52 | 加载的纹理图片,如果不存在则返回 None 53 | """ 54 | for ext in self.SUPPORTED_FORMATS: 55 | file_path = self.assets_path / f"{name}{ext}" 56 | if file_path.exists(): 57 | try: 58 | img = Image.open(file_path).convert("RGBA") 59 | return img 60 | except Exception as e: 61 | print(f"Failed to load texture {name}: {e}") 62 | return None 63 | 64 | return None 65 | 66 | def get_texture(self, name: str) -> Optional[Image.Image]: 67 | """ 68 | 获取纹理 69 | 70 | Args: 71 | name: 纹理名称 72 | 73 | Returns: 74 | 纹理图片,如果不存在则返回 None 75 | """ 76 | return self.textures.get(name) 77 | 78 | def has_texture(self, name: str) -> bool: 79 | """ 80 | 检查是否有指定纹理 81 | 82 | Args: 83 | name: 纹理名称 84 | 85 | Returns: 86 | 是否存在该纹理 87 | """ 88 | return name in self.textures 89 | 90 | def get_start_icon(self, size: int) -> Optional[Image.Image]: 91 | """ 92 | 获取起点图标(调整到指定尺寸) 93 | 94 | Args: 95 | size: 图标尺寸(直径) 96 | 97 | Returns: 98 | 调整尺寸后的起点图标 99 | """ 100 | texture = self.get_texture('start') 101 | if texture: 102 | return self._resize_keep_aspect_ratio(texture, size) 103 | return None 104 | 105 | def get_end_icon(self, size: int) -> Optional[Image.Image]: 106 | """ 107 | 获取终点图标(调整到指定尺寸) 108 | 109 | Args: 110 | size: 图标尺寸(直径) 111 | 112 | Returns: 113 | 调整尺寸后的终点图标 114 | """ 115 | texture = self.get_texture('end') 116 | if texture: 117 | return self._resize_keep_aspect_ratio(texture, size) 118 | return None 119 | 120 | def get_road_texture(self) -> Optional[Image.Image]: 121 | """ 122 | 获取道路纹理(原始尺寸) 123 | 124 | Returns: 125 | 道路纹理图片 126 | """ 127 | return self.get_texture('road') 128 | 129 | @staticmethod 130 | def _resize_keep_aspect_ratio(img: Image.Image, target_size: int) -> Image.Image: 131 | """ 132 | 调整图片尺寸,保持长宽比 133 | 134 | Args: 135 | img: 原始图片 136 | target_size: 目标尺寸(正方形边长) 137 | 138 | Returns: 139 | 调整后的图片 140 | """ 141 | # 获取原始尺寸 142 | width, height = img.size 143 | 144 | # 计算缩放比例 145 | if width > height: 146 | new_width = target_size 147 | new_height = int(height * target_size / width) 148 | else: 149 | new_height = target_size 150 | new_width = int(width * target_size / height) 151 | 152 | # 调整尺寸 153 | resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS) 154 | 155 | # 创建正方形画布,居中放置 156 | canvas = Image.new('RGBA', (target_size, target_size), (0, 0, 0, 0)) 157 | offset_x = (target_size - new_width) // 2 158 | offset_y = (target_size - new_height) // 2 159 | canvas.paste(resized, (offset_x, offset_y), resized) 160 | 161 | return canvas 162 | 163 | def validate_textures(self) -> tuple[bool, list]: 164 | """ 165 | 验证所有必需纹理是否存在 166 | 167 | Returns: 168 | (是否全部存在, 缺失的纹理列表) 169 | """ 170 | missing = [] 171 | for texture_name in self.REQUIRED_TEXTURES: 172 | if not self.has_texture(texture_name): 173 | missing.append(texture_name) 174 | 175 | return len(missing) == 0, missing 176 | 177 | 178 | # 全局缓存 179 | _texture_handlers = {} 180 | 181 | 182 | def get_texture_handler(assets_folder: Optional[str] = None) -> PathFinderTextureHandler: 183 | """ 184 | 获取或创建缓存的纹理处理器 185 | 186 | Args: 187 | assets_folder: 纹理文件夹路径 188 | 189 | Returns: 190 | 纹理处理器实例 191 | """ 192 | if assets_folder is None: 193 | assets_folder = str(Path(__file__).parent / "assets") 194 | 195 | cache_key = str(assets_folder) 196 | 197 | if cache_key not in _texture_handlers: 198 | handler = PathFinderTextureHandler(assets_folder) 199 | handler.load_textures() 200 | _texture_handlers[cache_key] = handler 201 | 202 | return _texture_handlers[cache_key] 203 | 204 | --------------------------------------------------------------------------------