├── xenoverse ├── mazeworld │ ├── demo │ │ ├── __init__.py │ │ ├── keyboard_play_demo.py │ │ └── agent_play_demo.py │ ├── tests │ │ ├── __init__.py │ │ ├── test.py │ │ └── test_agent.py │ ├── agents │ │ ├── __init__.py │ │ ├── oracle_agent.py │ │ └── agent_base.py │ ├── envs │ │ ├── img │ │ │ ├── .DS_Store │ │ │ ├── wall_brick_4_diff_1k.jpg │ │ │ ├── wall_brick_4_rough_1k.jpg │ │ │ ├── wall_slab_tiles_diff_1k.jpg │ │ │ ├── ceiling_asphalt_03_diff_1k.jpg │ │ │ ├── ground_asphalt_02_diff_1k.jpg │ │ │ ├── ground_asphalt_02_rough_1k.jpg │ │ │ ├── ground_leafy_grass_diff_1k.jpg │ │ │ ├── ground_stone_tiles_diff_1k.jpg │ │ │ ├── wall_aerial_mud_1_diff_1k.jpg │ │ │ ├── wall_aerial_mud_1_rough_1k.jpg │ │ │ ├── wall_brick_wall_09_ao_1k.jpg │ │ │ ├── wall_brick_wall_09_diff_1k.jpg │ │ │ ├── wall_painted_brick_diff_1k.jpg │ │ │ ├── wall_pavement_02_diff_1k.jpg │ │ │ ├── wall_rocky_terrain_diff_1k.jpg │ │ │ ├── wall_stone_wall_04_diff_1k.jpg │ │ │ ├── ceiling_factory_wall_diff_1k.jpg │ │ │ ├── ceiling_grey_plaster_diff_1k.jpg │ │ │ ├── ground_grey_cartago_02_ao_1k.jpg │ │ │ ├── ground_rubber_tiles_diff_1k.jpg │ │ │ ├── ground_winter_leaves_diff_1k.jpg │ │ │ ├── wall_aerial_rocks_02_diff_1k.jpg │ │ │ ├── wall_brick_wall_005_diff_1k.jpg │ │ │ ├── ceiling_factory_wall_rough_1k.jpg │ │ │ ├── ceiling_grey_plaster_02_diff_1k.jpg │ │ │ ├── ceiling_grey_plaster_03_diff_1k.jpg │ │ │ ├── ceiling_grey_plaster_rough_1k.jpg │ │ │ ├── ceiling_metal_plate_02_diff_1k.jpg │ │ │ ├── ceiling_scuffed_cement_diff_1k.jpg │ │ │ ├── ground_asphalt_pit_lane_diff_1k.jpg │ │ │ ├── ground_concrete_pavers_diff_1k.jpg │ │ │ ├── ground_cracked_concrete_diff_1k.jpg │ │ │ ├── ground_floor_tiles_06_diff_1k.jpg │ │ │ ├── ground_floor_tiles_06_rough_1k.jpg │ │ │ ├── ground_grey_cartago_02_diff_1k.jpg │ │ │ ├── ground_long_white_tiles_ao_1k.jpg │ │ │ ├── ground_long_white_tiles_diff_1k.jpg │ │ │ ├── ground_old_wood_floor_diff_1k.jpg │ │ │ ├── ground_painted_concrete_diff_1k.jpg │ │ │ ├── ground_slate_floor_02_diff_1k.jpg │ │ │ ├── ground_stone_tiles_03_diff_1k.jpg │ │ │ ├── ground_weathered_planks_diff_1k.jpg │ │ │ ├── wall_aerial_rocks_02_rough_1k.jpg │ │ │ ├── wall_brick_pavement_02_diff_1k.jpg │ │ │ ├── wall_denmin_fabric_02_diff_1k.jpg │ │ │ ├── wall_green_metal_rust_diff_1k.jpg │ │ │ ├── wall_green_metal_rust_rough_1k.jpg │ │ │ ├── wall_metal_grate_rusty_diff_1k.jpg │ │ │ ├── wall_rock_pitted_mossy_diff_1k.jpg │ │ │ ├── wall_rough_block_wall_diff_1k.jpg │ │ │ ├── wall_rough_block_wall_rough_1k.jpg │ │ │ ├── ceiling_concrete_floor_01_diff_1k.jpg │ │ │ ├── ceiling_concrete_floor_02_diff_1k.jpg │ │ │ ├── ceiling_grey_plaster_02_rough_1k.jpg │ │ │ ├── ceiling_grey_plaster_03_rough_1k.jpg │ │ │ ├── ceiling_painted_concrete_diff_1k.jpg │ │ │ ├── ceiling_white_plaster_02_diff_1k.jpg │ │ │ ├── ceiling_white_plaster_02_rough_1k.jpg │ │ │ ├── ground_dark_wooden_planks_diff_1k.jpg │ │ │ ├── ground_dry_riverbed_rock_diff_1k.jpg │ │ │ ├── ground_laminate_floor_02_diff_1k.jpg │ │ │ ├── ground_rock_tile_floor_02_diff_1k.jpg │ │ │ ├── ground_t_brick_floor_002_rough_1k.jpg │ │ │ ├── wall_preconcrete_wall_001_diff_1k.jpg │ │ │ ├── wall_recycled_brick_floor_diff_1k.jpg │ │ │ ├── wall_rough_plaster_brick_diff_1k.jpg │ │ │ ├── wall_rough_plaster_brick_rough_1k.jpg │ │ │ ├── wall_stone_brick_wall_001_diff_1k.jpg │ │ │ ├── ceiling_concrete_floor_01_rough_1k.jpg │ │ │ ├── ceiling_concrete_floor_02_rough_1k.jpg │ │ │ ├── ceiling_worn_corrugated_iron_diff_1k.jpg │ │ │ ├── ground_t_brick_floor_002_diffuse_1k.jpg │ │ │ ├── wall_castle_wall_varriation_diff_1k.jpg │ │ │ ├── wall_castle_wall_varriation_rough_1k.jpg │ │ │ ├── wall_cracked_concrete_wall_diff_1k.jpg │ │ │ ├── wall_patterned_brick_wall_03_diff_1k.jpg │ │ │ ├── wall_preconcrete_wall_001_rough_1k.jpg │ │ │ ├── wall_rough_plaster_brick_02_diff_1k.jpg │ │ │ ├── wall_rough_plaster_brick_02_rough_1k.jpg │ │ │ ├── wall_stone_brick_wall_001_rough_1k.jpg │ │ │ ├── ground_concrete_floor_painted_diff_1k.jpg │ │ │ ├── ground_concrete_floor_painted_rough_1k.jpg │ │ │ ├── wall_concrete_brick_wall_001_rough_1k.jpg │ │ │ ├── ceiling_concrete_floor_worn_001_diff_1k.jpg │ │ │ ├── ceiling_concrete_floor_worn_001_rough_1k.jpg │ │ │ ├── ground_patterned_cobblestone_02_diff_1k.jpg │ │ │ ├── wall_concrete_brick_wall_001_diffuse_1k.jpg │ │ │ └── .gitattributes │ │ ├── __init__.py │ │ ├── maze_continuous_3d.py │ │ ├── maze_env.py │ │ └── dynamics.py │ └── __init__.py ├── metacontrol │ ├── tests │ │ ├── __init__.py │ │ └── test.py │ ├── __init__.py │ ├── random_humanoid.py │ ├── random_cartpole.py │ └── random_acrobot.py ├── linds │ ├── inspect_pkl.py │ ├── test.py │ ├── __init__.py │ ├── README.md │ ├── test_ppo.py │ ├── visualizer.py │ ├── task_sampler.py │ └── solver.py ├── utils │ ├── __init__.py │ ├── tools.py │ └── grid_ops.py ├── __init__.py ├── anyhvac │ ├── __init__.py │ ├── README.md │ ├── test_file.py │ ├── test.py │ ├── anyhvac_sampler.py │ ├── anyhvac_env_vis.py │ └── anyhvac_solver.py ├── metalang │ ├── __init__.py │ ├── metalangv3.py │ ├── README.md │ ├── task_sampler.py │ ├── metalangv2.py │ ├── metalangv1.py │ └── generator.py └── anymdp │ ├── __init__.py │ ├── anymdp_solver_opt.py │ ├── test.py │ ├── anymdp_solver_q.py │ ├── test_utils.py │ ├── anymdp_solver_mbrl.py │ ├── README.md │ ├── test_ppo.py │ ├── task_sampler.py │ ├── visualizer.py │ ├── solver.py │ └── anymdp_env.py ├── requirements.txt ├── setup.py ├── .gitignore └── README.md /xenoverse/mazeworld/demo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xenoverse/mazeworld/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /xenoverse/metacontrol/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | gymnasium>=1.0.0 2 | numpy>=1.24.4 3 | Pillow>=6.2.2 4 | six>=1.12.0 5 | pygame>=2.6.0 6 | numba>=0.58.1 -------------------------------------------------------------------------------- /xenoverse/mazeworld/agents/__init__.py: -------------------------------------------------------------------------------- 1 | from .smart_slam_agent import SmartSLAMAgent 2 | from .oracle_agent import OracleAgent 3 | -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/.DS_Store -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_brick_4_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_brick_4_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_brick_4_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_brick_4_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_slab_tiles_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_slab_tiles_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/linds/inspect_pkl.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | if __name__=='__main__': 4 | with open('task.pkl', 'rb') as f: 5 | task = pickle.load(f) 6 | print(task) -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_asphalt_03_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_asphalt_03_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_asphalt_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_asphalt_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_asphalt_02_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_asphalt_02_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_leafy_grass_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_leafy_grass_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_stone_tiles_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_stone_tiles_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_aerial_mud_1_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_aerial_mud_1_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_aerial_mud_1_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_aerial_mud_1_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_brick_wall_09_ao_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_brick_wall_09_ao_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_brick_wall_09_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_brick_wall_09_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_painted_brick_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_painted_brick_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_pavement_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_pavement_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_rocky_terrain_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_rocky_terrain_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_stone_wall_04_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_stone_wall_04_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_factory_wall_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_factory_wall_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_grey_plaster_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_grey_plaster_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_grey_cartago_02_ao_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_grey_cartago_02_ao_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_rubber_tiles_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_rubber_tiles_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_winter_leaves_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_winter_leaves_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_aerial_rocks_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_aerial_rocks_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_brick_wall_005_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_brick_wall_005_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_factory_wall_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_factory_wall_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_grey_plaster_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_grey_plaster_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_grey_plaster_03_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_grey_plaster_03_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_grey_plaster_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_grey_plaster_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_metal_plate_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_metal_plate_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_scuffed_cement_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_scuffed_cement_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_asphalt_pit_lane_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_asphalt_pit_lane_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_concrete_pavers_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_concrete_pavers_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_cracked_concrete_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_cracked_concrete_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_floor_tiles_06_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_floor_tiles_06_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_floor_tiles_06_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_floor_tiles_06_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_grey_cartago_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_grey_cartago_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_long_white_tiles_ao_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_long_white_tiles_ao_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_long_white_tiles_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_long_white_tiles_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_old_wood_floor_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_old_wood_floor_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_painted_concrete_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_painted_concrete_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_slate_floor_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_slate_floor_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_stone_tiles_03_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_stone_tiles_03_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_weathered_planks_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_weathered_planks_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_aerial_rocks_02_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_aerial_rocks_02_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_brick_pavement_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_brick_pavement_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_denmin_fabric_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_denmin_fabric_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_green_metal_rust_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_green_metal_rust_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_green_metal_rust_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_green_metal_rust_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_metal_grate_rusty_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_metal_grate_rusty_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_rock_pitted_mossy_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_rock_pitted_mossy_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_rough_block_wall_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_rough_block_wall_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_rough_block_wall_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_rough_block_wall_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_concrete_floor_01_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_concrete_floor_01_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_concrete_floor_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_concrete_floor_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_grey_plaster_02_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_grey_plaster_02_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_grey_plaster_03_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_grey_plaster_03_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_painted_concrete_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_painted_concrete_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_white_plaster_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_white_plaster_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_white_plaster_02_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_white_plaster_02_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_dark_wooden_planks_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_dark_wooden_planks_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_dry_riverbed_rock_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_dry_riverbed_rock_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_laminate_floor_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_laminate_floor_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_rock_tile_floor_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_rock_tile_floor_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_t_brick_floor_002_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_t_brick_floor_002_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_preconcrete_wall_001_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_preconcrete_wall_001_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_recycled_brick_floor_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_recycled_brick_floor_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_rough_plaster_brick_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_rough_plaster_brick_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_rough_plaster_brick_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_rough_plaster_brick_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_stone_brick_wall_001_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_stone_brick_wall_001_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_concrete_floor_01_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_concrete_floor_01_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_concrete_floor_02_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_concrete_floor_02_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_worn_corrugated_iron_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_worn_corrugated_iron_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_t_brick_floor_002_diffuse_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_t_brick_floor_002_diffuse_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_castle_wall_varriation_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_castle_wall_varriation_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_castle_wall_varriation_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_castle_wall_varriation_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_cracked_concrete_wall_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_cracked_concrete_wall_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_patterned_brick_wall_03_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_patterned_brick_wall_03_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_preconcrete_wall_001_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_preconcrete_wall_001_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_rough_plaster_brick_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_rough_plaster_brick_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_rough_plaster_brick_02_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_rough_plaster_brick_02_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_stone_brick_wall_001_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_stone_brick_wall_001_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_concrete_floor_painted_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_concrete_floor_painted_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_concrete_floor_painted_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_concrete_floor_painted_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_concrete_brick_wall_001_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_concrete_brick_wall_001_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from xenoverse.mazeworld.envs.maze_env import MazeWorldContinuous3D 2 | from xenoverse.mazeworld.envs.task_sampler import MAZE_TASK_MANAGER, MazeTaskSampler, Resampler -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_concrete_floor_worn_001_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_concrete_floor_worn_001_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ceiling_concrete_floor_worn_001_rough_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ceiling_concrete_floor_worn_001_rough_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/ground_patterned_cobblestone_02_diff_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/ground_patterned_cobblestone_02_diff_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/wall_concrete_brick_wall_001_diffuse_1k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FutureAGI/Xenoverse/HEAD/xenoverse/mazeworld/envs/img/wall_concrete_brick_wall_001_diffuse_1k.jpg -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/img/.gitattributes: -------------------------------------------------------------------------------- 1 | SURVIVAL-1-demo.gif filter=lfs diff=lfs merge=lfs -text 2 | NAVIGATION-1-demo.gif filter=lfs diff=lfs merge=lfs -text 3 | NAVIGATION-2-demo.gif filter=lfs diff=lfs merge=lfs -text 4 | -------------------------------------------------------------------------------- /xenoverse/mazeworld/agents/oracle_agent.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import math 3 | import pygame 4 | from .smart_slam_agent import SmartSLAMAgent 5 | 6 | class OracleAgent(SmartSLAMAgent): 7 | def __init__(self, *args, **kwargs): 8 | super().__init__(*args, **kwargs) 9 | self._long_term_memory = numpy.ones_like(self._long_term_memory) -------------------------------------------------------------------------------- /xenoverse/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .random_nn import pseudo_random_seed 2 | from .random_nn import weights_and_biases 3 | from .random_nn import RandomMLP, RandomFourier, RandomGoal 4 | from .random_nn import RandomRNN, RandomLM 5 | from .tools import conv2d_numpy 6 | from .grid_ops import genmaze_by_primwall, genmaze_largeroom 7 | from .tools import random_partition, versatile_sample, generate_secure_strings 8 | from .tools import dump_task, load_task -------------------------------------------------------------------------------- /xenoverse/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __version__ = '0.1.10.0' -------------------------------------------------------------------------------- /xenoverse/anyhvac/__init__.py: -------------------------------------------------------------------------------- 1 | from gymnasium.envs.registration import register 2 | from xenoverse.anyhvac.anyhvac_env import HVACEnv 3 | from xenoverse.anyhvac.anyhvac_env_vis import HVACEnvVisible 4 | 5 | register( 6 | id='anyhvac-v1', 7 | entry_point='xenoverse.anyhvac.anyhvac_env:HVACEnv', 8 | kwargs={"max_steps": 5040, 9 | "failure_upperbound": 80, 10 | "iter_per_step": 600, 11 | "set_lower_bound": 16, 12 | "set_upper_bound": 32, 13 | "verbose": False }, 14 | ) 15 | 16 | register( 17 | id='anyhvac-visualizer-v1', 18 | entry_point='xenoverse.anyhvac.anyhvac_env:HVACEnvVisible', 19 | kwargs={"max_steps": 5040, 20 | "failure_upperbound": 80, 21 | "iter_per_step": 600, 22 | "set_lower_bound": 16, 23 | "set_upper_bound": 32, 24 | "verbose": False }, 25 | ) -------------------------------------------------------------------------------- /xenoverse/metacontrol/tests/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | # File: test.py 4 | import gymnasium as gym 5 | import sys 6 | import xenoverse.metacontrol 7 | from xenoverse.metacontrol import sample_humanoid, get_humanoid_tasks 8 | 9 | def test_humanoid(max_steps=1000): 10 | env = gym.make("random-humanoid-v0") 11 | task = sample_humanoid() 12 | env.set_task(task) 13 | 14 | env.reset() 15 | terminated, truncated=False, False 16 | sum_reward = 0 17 | while not terminated and not truncated: 18 | state, reward, terminated, truncated, _ = env.step(env.action_space.sample()) 19 | sum_reward += reward 20 | print("...Test Finishes. Get score %f, steps = %s\n\n---------\n\n"%(sum_reward, max_steps)) 21 | 22 | if __name__=="__main__": 23 | for _ in range(10): 24 | test_humanoid() 25 | print("\n\nCongratulations!!!\n\nAll Tests Have Been Passed\n\n") -------------------------------------------------------------------------------- /xenoverse/mazeworld/tests/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | # File: test.py 4 | import gymnasium as gym 5 | import sys 6 | import xenoverse.mazeworld 7 | from xenoverse.mazeworld import MazeTaskSampler 8 | from numpy import random 9 | 10 | def test_maze(max_steps=1000): 11 | maze_env = gym.make("mazeworld-v2", enable_render=False, max_steps=max_steps) 12 | task = MazeTaskSampler(verbose=True) 13 | maze_env.set_task(task) 14 | 15 | maze_env.reset() 16 | terminated, truncated=False, False 17 | sum_reward = 0 18 | while not terminated and not truncated: 19 | state, reward, terminated, truncated, _ = maze_env.step(maze_env.action_space.sample()) 20 | sum_reward += reward 21 | print("...Test Finishes. Get score %f, steps = %s\n\n---------\n\n"%(sum_reward, max_steps)) 22 | 23 | if __name__=="__main__": 24 | for _ in range(10): 25 | test_maze() 26 | print("\n\nCongratulations!!!\n\nAll Tests Have Been Passed\n\n") 27 | -------------------------------------------------------------------------------- /xenoverse/metalang/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from gymnasium.envs.registration import register 16 | from xenoverse.metalang.metalangv1 import MetaLangV1 17 | from xenoverse.metalang.metalangv2 import MetaLangV2 18 | from xenoverse.metalang.metalangv3 import MetaLMV3Env 19 | from xenoverse.metalang.task_sampler import TaskSamplerV1, TaskSamplerV2, TaskSamplerV3 20 | from xenoverse.metalang.generator import metalang_generator 21 | from xenoverse.metalang.generator_v3 import metalang_generator_v3 22 | 23 | register( 24 | id='meta-language-v3', 25 | entry_point='xenoverse.metalang:MetaLMV3Env', 26 | kwargs={} 27 | ) -------------------------------------------------------------------------------- /xenoverse/mazeworld/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from gymnasium.envs.registration import register 16 | from xenoverse.mazeworld.envs import MazeWorldContinuous3D 17 | from xenoverse.mazeworld.envs import MazeTaskSampler, Resampler 18 | 19 | register( 20 | id='mazeworld-v2', 21 | entry_point='xenoverse.mazeworld:MazeWorldContinuous3D', 22 | order_enforce=False, 23 | disable_env_checker=True, 24 | kwargs={ 25 | "enable_render": True, 26 | "render_scale": 480, 27 | "resolution": (256, 256), 28 | "max_steps": 5000, 29 | "visibility_3D": 12.0, 30 | "command_in_observation": False, 31 | "action_space_type": "Discrete16" , 32 | } 33 | ) 34 | -------------------------------------------------------------------------------- /xenoverse/linds/test.py: -------------------------------------------------------------------------------- 1 | if __name__=="__main__": 2 | import gymnasium as gym 3 | import numpy 4 | from xenoverse.linds import LinearDSSamplerRandomDim 5 | 6 | task = LinearDSSamplerRandomDim() 7 | max_steps = 5000 8 | prt_freq = 100 9 | 10 | # Test Random Policy 11 | env = gym.make("linear-dynamics-v0-visualizer") 12 | env.set_task(task) 13 | state = env.reset() 14 | acc_reward = 0 15 | epoch_reward = 0 16 | done = False 17 | obs_arr = [] 18 | act_arr = [] 19 | state_arr = [] 20 | step_lst = [] 21 | 22 | steps = 0 23 | episode_steps = 0 24 | while steps < max_steps: 25 | action = env.action_space.sample() 26 | state, reward, terminated, truncated, info = env.step(action) 27 | acc_reward += reward 28 | epoch_reward += reward 29 | steps += 1 30 | episode_steps += 1 31 | if(steps % prt_freq == 0 and steps > 0): 32 | print("Step:{}\tEpoch Reward: {}".format(steps, epoch_reward)) 33 | epoch_reward = 0 34 | if(terminated or truncated): 35 | step_lst.append(episode_steps) 36 | episode_steps = 0 37 | state, info = env.reset() 38 | print(f"Random Policy Summary: {acc_reward}, Average Episode Length:{numpy.mean(step_lst)}") 39 | env.visualize_and_save() 40 | 41 | print("Test Passed") -------------------------------------------------------------------------------- /xenoverse/mazeworld/tests/test_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf8 3 | # File: test.py 4 | import gymnasium as gym 5 | import sys 6 | import xenoverse.mazeworld 7 | import time 8 | from xenoverse.mazeworld import MazeTaskSampler, Resampler 9 | from xenoverse.mazeworld.agents import SmartSLAMAgent 10 | from numpy import random 11 | 12 | def test_agent_maze(max_steps=1000): 13 | maze_env = gym.make("mazeworld-v2", enable_render=False, max_steps=max_steps) 14 | task = MazeTaskSampler(verbose=True) 15 | maze_env.set_task(Resampler(task)) 16 | 17 | # Must intialize agent after reset 18 | agent = SmartSLAMAgent(maze_env=maze_env, memory_keep_ratio=0.25, render=False) 19 | 20 | terminated, truncated=False, False 21 | observation = maze_env.reset() 22 | sum_reward = 0 23 | reward = 0 24 | while not terminated and not truncated: 25 | action = agent.step(observation, reward) 26 | observation, reward, terminated, truncated, _ = maze_env.step(action) 27 | loc_map = maze_env.get_local_map() 28 | global_map = maze_env.get_global_map() 29 | sum_reward += reward 30 | print("...Test Finishes. Get score %f, steps = %s\n\n---------\n\n"%(sum_reward, max_steps)) 31 | 32 | if __name__=="__main__": 33 | for _ in range(10): 34 | test_agent_maze(max_steps=100) 35 | print("\n\nCongratulations!!!\n\nAll Tests Have Been Passed\n\n") 36 | -------------------------------------------------------------------------------- /xenoverse/anymdp/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from gymnasium.envs.registration import register 16 | from xenoverse.anymdp.anymdp_env import AnyMDPEnv 17 | from xenoverse.anymdp.anymdp_solver_opt import AnyMDPSolverOpt 18 | from xenoverse.anymdp.anymdp_solver_mbrl import AnyMDPSolverMBRL 19 | from xenoverse.anymdp.anymdp_solver_q import AnyMDPSolverQ 20 | from xenoverse.anymdp.task_sampler import AnyMDPTaskSampler, GarnetTaskSampler 21 | from xenoverse.anymdp.task_sampler import AnyPOMDPTaskSampler, MultiTokensAnyPOMDPTaskSampler 22 | from xenoverse.anymdp.visualizer import anymdp_task_visualizer 23 | 24 | register( 25 | id='anymdp-v0', 26 | entry_point='xenoverse.anymdp:AnyMDPEnv', 27 | order_enforce=False, 28 | disable_env_checker=True, 29 | kwargs={"max_steps": 5000}, 30 | ) -------------------------------------------------------------------------------- /xenoverse/linds/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from gymnasium.envs.registration import register 16 | from xenoverse.linds.linds_env import LinearDSEnv 17 | from xenoverse.linds.task_sampler import LinearDSSampler, LinearDSSamplerRandomDim, dump_linds_task, load_linds_task 18 | from xenoverse.linds.visualizer import LinearDSVisualizer 19 | from xenoverse.linds.solver import LTISystemMPC 20 | 21 | register( 22 | id='linear-dynamics-v0', 23 | entry_point='xenoverse.linds:LinearDSEnv', 24 | order_enforce=False, 25 | disable_env_checker=True, 26 | kwargs={"max_steps": 5000}, 27 | ) 28 | 29 | register( 30 | id='linear-dynamics-v0-visualizer', 31 | entry_point='xenoverse.linds:LinearDSVisualizer', 32 | order_enforce=False, 33 | disable_env_checker=True, 34 | kwargs={"max_steps": 5000}, 35 | ) -------------------------------------------------------------------------------- /xenoverse/mazeworld/demo/keyboard_play_demo.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import sys 3 | import argparse 4 | import xenoverse.mazeworld 5 | from xenoverse.mazeworld import MazeTaskSampler 6 | 7 | if __name__=='__main__': 8 | parser = argparse.ArgumentParser(description='Playing the maze world demo with your keyboard') 9 | parser.add_argument('--max_steps', type=int, default=1000000) 10 | parser.add_argument('--visibility_3D', type=float, default=12, help="3D vision range, Only valid in 3D mode") 11 | parser.add_argument('--save_replay', type=str, default=None, help="Save the replay trajectory in file") 12 | 13 | args = parser.parse_args() 14 | 15 | # Create the environment 16 | maze_env = gym.make("mazeworld-v2", 17 | enable_render=True, 18 | max_steps=args.max_steps, 19 | visibility_3D=args.visibility_3D, 20 | command_in_observation=False, 21 | render_scale=320) 22 | 23 | # Sample and set the task 24 | task = MazeTaskSampler() 25 | maze_env.set_task(task) 26 | 27 | maze_env.reset() 28 | terminated, truncated = False, False 29 | sum_reward = 0 30 | 31 | while (not terminated) and (not truncated): 32 | maze_env.render() 33 | state, reward, terminated, truncated, _ = maze_env.step(None) 34 | sum_reward += reward 35 | print("Instant r = %.2f, Accumulate r = %.2f" % (reward, sum_reward)) 36 | if(maze_env.key_done): 37 | break 38 | print("Episode is over! You got %.2f score."%sum_reward) 39 | 40 | if(args.save_replay is not None): 41 | maze_env.save_trajectory(args.save_replay) 42 | -------------------------------------------------------------------------------- /xenoverse/metacontrol/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Xenoverse. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from gymnasium.envs.registration import register 16 | from xenoverse.metacontrol.random_cartpole import sample_cartpole,RandomCartPoleEnv 17 | from xenoverse.metacontrol.random_acrobot import sample_acrobot, RandomAcrobotEnv 18 | from xenoverse.metacontrol.random_humanoid import RandomHumanoidEnv, sample_humanoid, get_humanoid_tasks 19 | 20 | register( 21 | id='random-cartpole-v0', 22 | entry_point='xenoverse.metacontrol.random_cartpole:RandomCartPoleEnv', 23 | order_enforce=False, 24 | disable_env_checker=True, 25 | kwargs={"frameskip":1, "reset_bounds_scale":[0.45, 0.90, 0.13, 1.0]} 26 | ) 27 | register( 28 | id='random-acrobot-v0', 29 | entry_point='xenoverse.metacontrol.random_acrobot:RandomAcrobotEnv', 30 | order_enforce=False, 31 | disable_env_checker=True, 32 | kwargs={"frameskip":1, "reset_bounds_scale":0.10} 33 | ) 34 | register( 35 | id='random-humanoid-v0', 36 | entry_point='xenoverse.metacontrol.random_humanoid:RandomHumanoidEnv', 37 | order_enforce=False, 38 | disable_env_checker=True, 39 | kwargs={} 40 | ) 41 | -------------------------------------------------------------------------------- /xenoverse/metalang/metalangv3.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import numpy 3 | 4 | class MetaLMV3Env(gym.Env): 5 | def __init__(self, max_steps=10000): 6 | super().__init__() 7 | self.action_space = gym.spaces.Sequence(gym.spaces.Discrete(16)) 8 | self.observation_space = gym.spaces.Sequence(gym.spaces.Discrete(16)) 9 | self.max_steps = max_steps 10 | 11 | def set_task(self, task): 12 | self.vocabulary = task["vocabulary"] 13 | self.embedding = task["embedding"] 14 | self.hidden = task["hidden"] 15 | self.function_vocabulary = task["function_vocabulary"] 16 | self.lm = task["lm"] 17 | self.action_space = gym.spaces.Sequence(gym.spaces.Discrete(self.vocabulary)) 18 | self.observation_space = gym.spaces.Sequence(gym.spaces.Discrete(self.vocabulary)) 19 | self.task_set = True 20 | 21 | def reset(self, *args, **kwargs): 22 | if(self.task_set == False): 23 | raise Exception("Task not set") 24 | self.cached_query = self.lm.generate_query() 25 | self.steps = 0 26 | return self.cached_query 27 | 28 | def step(self, action, cached=False): 29 | label, ppl = self.lm.label_answer(list(action)) 30 | _, ppl_min = self.lm.generate_answer_greedy() 31 | _, ppl_max = self.lm.generate_answer_low() 32 | r = (ppl_max - ppl_min) / max(ppl - ppl_min + 0.1, 1.0e-3) - 2.0 33 | if(not cached): 34 | s = self.lm.generate_query() 35 | else: 36 | s = self.cached_query 37 | self.steps += 1 38 | return tuple(s), r, False, (self.steps < self.max_steps), {"label": label} 39 | 40 | def policy(self, T=1.0): 41 | ans, _ = self.lm.generate_answer_softmax(T=T) 42 | return list(ans) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 DeepEvolution Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import io 16 | from xenoverse import __version__ 17 | from setuptools import setup, find_packages 18 | 19 | with io.open('README.md', 'r', encoding='utf-8') as fh: 20 | long_description = fh.read() 21 | 22 | setup( 23 | name='xenoverse', 24 | version=__version__, 25 | author='WorldEditors', 26 | author_email='', 27 | description=('Collection of xeno-world environments for meta-training of general-purpose learning agents (GLAs)'), 28 | long_description=long_description, 29 | long_description_content_type='text/markdown', 30 | url='https://github.com/FutureAGI/xenoverse', 31 | license="Apache", 32 | packages=[package for package in find_packages() 33 | if package.startswith('xenoverse')], 34 | package_data={'xenoverse': [ 35 | './mazeworld/envs/img/*', 36 | ] 37 | }, 38 | python_requires='>=3.8', 39 | tests_require=['pytest', 'mock'], 40 | include_package_data=True, 41 | install_requires=[ 42 | 'gymnasium>=1.0.0', 43 | 'numpy>=1.24.4', 44 | 'Pillow>=6.2.2', 45 | 'six>=1.12.0', 46 | 'pygame>=2.6.0', 47 | 'numba>=0.58.1', 48 | 'scipy' 49 | ], 50 | extras_require={}, 51 | zip_safe=False, 52 | ) 53 | -------------------------------------------------------------------------------- /xenoverse/anymdp/anymdp_solver_opt.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy import random 3 | from numba import njit 4 | from xenoverse.anymdp.solver import update_value_matrix 5 | 6 | 7 | class AnyMDPSolverOpt(object): 8 | """ 9 | Solver for AnyMDPEnv with Bellman Equation and Value Iteration 10 | Suppose to know the ground truth of the environment 11 | """ 12 | def __init__(self, env, gamma=0.99): 13 | self.na = env.na 14 | self.ns = env.ns 15 | self.env= env 16 | self.task_type = env.task_type 17 | if(self.task_type != 'MTMDP'): 18 | self.da = 1 19 | else: 20 | self.da = env.da 21 | 22 | self.transition_matrix = env.transition 23 | self.reward_matrix = env.reward 24 | self.state_mapping = env.state_mapping 25 | self.value_matrix = numpy.zeros((len(env.state_mapping), self.na)) 26 | self.gamma = gamma 27 | self.inverse_state_mapping = dict() 28 | for i,state in enumerate(self.state_mapping): 29 | self.inverse_state_mapping[state] = i 30 | self.q_solver(gamma=gamma) 31 | 32 | def q_solver(self, gamma=0.99): 33 | self.value_matrix = update_value_matrix(self.transition_matrix, self.reward_matrix, gamma, self.value_matrix) 34 | 35 | def learner(self, *args, **kwargs): 36 | pass 37 | 38 | def policy(self, *args, **kwargs): 39 | # optimal solver directly utilize the inner states 40 | state_dist = numpy.zeros((self.ns,)) 41 | state_dist[self.env.inner_state] = 1.0 42 | toks = [] 43 | for i in range(self.da): 44 | value_dist = self.value_matrix.T @ state_dist 45 | toks.append(numpy.argmax(value_dist)) 46 | state_dist = self.transition_matrix[:, toks[-1], :] @ state_dist 47 | if(len(toks) == 1): 48 | toks = toks[0] 49 | else: 50 | toks = numpy.array(toks, dtype=int) 51 | return toks -------------------------------------------------------------------------------- /xenoverse/linds/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Scalable procedurally generated Linear Time Invariant system by randomizing the transition and rewards. 4 | 5 | # Implementation 6 | 7 | Linear Time Invariant (LTI) systems represent a class of systems where the transition dynamics and reward function are linear functions. The state-space representation for an LTI system is: 8 | 9 | $dx/dt = A x + B u + X$ 10 | 11 | $o(t) = C x(t) + Y$ 12 | 13 | In this implementation, we randomly generate the matrices $A$, $B$, $C$ as well as the bias vectors $X$ and $Y$. 14 | 15 | The reward function is also procedurally generated, with a base reward and a factor that scales the distance between the current observation and the command. 16 | 17 | $r(t)=r_0 - \alpha |w \cdot (c(t) - o(t)) |^2 - \beta|u(t)|^2$ 18 | 19 | The initial states are randomly generated and the command is also randomly chosen. 20 | 21 | # Usage 22 | 23 | ```python 24 | import numpy as np 25 | from xenoverse.linds import LinearDSEnv, LinearDSSamplerRandomDim 26 | 27 | # Create a task sampler with 10-dimensional state and action spaces 28 | task = LinearDSSampler(observation_dim=16, action_dim=8) 29 | 30 | # LinearDSSamplerRandomDim samples tasks with random dimensions between 1 and the specified max values 31 | task = LinearDSSamplerRandomDim(max_state_dim=16, max_action_dim=8) 32 | 33 | # Create an LINDS environment from the sampled task 34 | env = LinearDSEnv(pad_observation_dim=16, pad_action_dim=8, pad_command_dim=16) 35 | # pad_observation_dim, pad_action_dim and pad_command_dim are used to ensure the environment's observation space constant 36 | # the observation and action space of the actual task might be smaller than these values, but the environment will pad them with zeros to match these dimensions 37 | # e.g., if the actual observation space is 3-dimensional and pad_observation_dim=5, 38 | # then the observation returned by env will be [o1, o2, o3, 0, 0] 39 | env.set_task(task, use_pad_dim=True) # use_pad_dim=True ensures that the environment uses padded dimensions 40 | 41 | # Reset the environment 42 | observation, info = env.reset() 43 | 44 | # Take a step in the environment 45 | action = np.random.randn(env.action_dim) 46 | next_observation, reward, done, truncated, info = env.step(action) 47 | ``` -------------------------------------------------------------------------------- /xenoverse/anyhvac/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Scalable simulation environments for HVAC in IDC, with casual models of second law of thermodynamics 4 | 5 | ![AnyHVACVisualizer](https://github.com/FutureAGI/DataPack/blob/main/demo/anyhvac/hvac_video.gif) 6 | 7 | # Install 8 | 9 | ```bash 10 | pip install xenoverse[anyhvac] 11 | ``` 12 | 13 | #### For local installation, execute following commands: 14 | 15 | ```bash 16 | git clone https://github.com/FutureAGI/xenoverse 17 | cd xenoverse 18 | pip install .[anyhvac] 19 | ``` 20 | 21 | # Quick Start 22 | 23 | ## Import 24 | 25 | Import and create the AnyHVAC environment: 26 | 27 | ```python 28 | import gymnasium as gym 29 | import xenoverse.anyhvac 30 | 31 | env = gym.make("anyhvac-visualizer-v1", # use `anyhvac-v0` for non-visualizer version 32 | max_steps=86400, # max time in seconds 33 | target_temperature=28, # target temperature in Celsius, highest reward at this position 34 | upper_limit=80, # upper limit of temperature in Celsius, failure at this position 35 | iter_per_step=600, 36 | set_lower_bound=16, # lower limit of ac set temperature 37 | set_upper_bound=32, # upper limit of ac set temperature 38 | tolerance=1, # temperature tolerance for reward calculation 39 | ) # number of iterations per step, actual time elapsed=iter_per_step * 0.2 40 | 41 | 42 | ``` 43 | 44 | ## Sampling an HVAC control task 45 | An HVAC task include random number of **sensors**, **coolers**, and **equipments** randomly aranged in the building. There might also be random walls in it. The task can be sampled by: 46 | 47 | ```python 48 | from xenoverse.anyhvac import HVACTaskSampler 49 | 50 | task = AnyMDPTaskSampler() 51 | env.set_task(task) 52 | observation, info = env.reset() 53 | ``` 54 | 55 | ## Running the built-in PID solver based on sensor - actuator correlation 56 | ```python 57 | from xenoverse.anyhvac import HVACSolverGTPID 58 | 59 | solver = HVACSolverGTPID(env) # Use PID controller and chtc to solve 60 | state, info = env.reset() 61 | terminated, truncated = False, False 62 | while (not terminated) and (not truncated): 63 | action = solver.policy() 64 | state, reward, terminated, truncated, info = env.step(action) 65 | ``` -------------------------------------------------------------------------------- /xenoverse/mazeworld/demo/agent_play_demo.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import sys 3 | import argparse 4 | import time 5 | import xenoverse.mazeworld 6 | from xenoverse.mazeworld import MazeTaskSampler 7 | from xenoverse.mazeworld.agents import SmartSLAMAgent, OracleAgent 8 | 9 | if __name__=='__main__': 10 | parser = argparse.ArgumentParser(description='Playing the maze world demo with your keyboard') 11 | parser.add_argument('--max_steps', type=int, default=1000000) 12 | parser.add_argument('--visibility_3D', type=float, default=12, help="3D vision range, Only valid in 3D mode") 13 | parser.add_argument('--save_replay', type=str, default=None, help="Save the replay trajectory in file") 14 | parser.add_argument('--memory_keep_ratio', type=float, default=1.0, 15 | help="Keep ratio of memory when the agent switch from short to long term memory. 1.0 means perfect memory, 0.0 means no memory") 16 | parser.add_argument('--oracle', type=bool, default=False) 17 | parser.add_argument('--verbose', type=bool, default=False) 18 | 19 | args = parser.parse_args() 20 | 21 | # create the environment 22 | maze_env = gym.make("mazeworld-v2", enable_render=False, max_steps=args.max_steps, visibility_3D=args.visibility_3D, 23 | command_in_observation=True) 24 | 25 | # sample the task 26 | task = MazeTaskSampler(verbose=True) 27 | maze_env.set_task(task) 28 | 29 | # create an smart SLAM agent 30 | if(args.oracle): 31 | agent = OracleAgent(maze_env=maze_env, render=True) 32 | else: 33 | agent = SmartSLAMAgent(maze_env=maze_env, memory_keep_ratio=args.memory_keep_ratio, render=True) 34 | 35 | observation, _ = maze_env.reset() 36 | terminated, truncated = False, False 37 | sum_reward = 0 38 | reward = 0 39 | 40 | while (not terminated) and (not truncated): 41 | action = agent.step(observation, reward) 42 | observation, reward, terminated, truncated, _ = maze_env.step(action) 43 | sum_reward += reward 44 | if(args.verbose): 45 | print("Instant r = %.2f, Accumulate r = %.2f" % (reward, sum_reward)) 46 | if(maze_env.key_done): 47 | break 48 | print("Episode is over! You got %.2f score."%sum_reward) 49 | 50 | if(args.save_replay is not None): 51 | maze_env.save_trajectory(args.save_replay) -------------------------------------------------------------------------------- /xenoverse/anyhvac/test_file.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import numpy 3 | from xenoverse.anyhvac.anyhvac_env_vis import HVACEnvVisible, HVACEnv 4 | from xenoverse.anyhvac.anyhvac_sampler import HVACTaskSampler 5 | from xenoverse.anyhvac.anyhvac_solver import HVACSolverGTPID 6 | import pickle 7 | 8 | env = HVACEnv() 9 | TASK_CONFIG_PATH = "./hvac_task_config.pkl" 10 | try: 11 | with open(TASK_CONFIG_PATH, "rb") as f: 12 | task = pickle.load(f) 13 | print(f"Loaded existing task config from {TASK_CONFIG_PATH}") 14 | 15 | except FileNotFoundError: 16 | print("Sampling new HVAC tasks...") 17 | task = HVACTaskSampler() 18 | with open(TASK_CONFIG_PATH, "wb") as f: 19 | pickle.dump(task, f) 20 | print(f"... Saved new task config to {TASK_CONFIG_PATH}") 21 | env.set_task(task) 22 | terminated, truncated = False,False 23 | obs = env.reset() 24 | max_steps = 10000 25 | current_stage = [] 26 | steps = 0 27 | while steps < max_steps: 28 | action = env.sample_action(mode="pid") 29 | obs, reward, terminated, truncated, info = env.step(action) 30 | current_stage.append(reward) 31 | if steps < 1: 32 | info_sums = {key: 0.0 for key in info.keys()} 33 | info_counts = {key: 0 for key in info.keys()} 34 | for key, value in info.items(): 35 | if isinstance(value, (int, float)): 36 | info_sums[key] += value 37 | info_counts[key] += 1 38 | 39 | steps += 1 40 | # print("sensors - ", obs, "\nactions - ", action, "\nrewards - ", reward, "ambient temperature - ", env.ambient_temp) 41 | if steps % 100 == 0: 42 | mean_reward = numpy.mean(current_stage) 43 | 44 | # 计算各信息字段均值 45 | info_means = { 46 | key: info_sums[key] / info_counts[key] 47 | for key in info_sums 48 | } 49 | 50 | # 格式化输出 51 | info_str = " | ".join([f"{k}:{v:.4f}" for k,v in info_means.items()]) 52 | print(f"Step {steps} | Reward: {mean_reward:.2f} | {info_str}", flush=True) 53 | 54 | # 重置统计量 55 | current_stage = [] 56 | info_sums = {k:0.0 for k in info_sums} 57 | info_counts = {k:0 for k in info_counts} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .qodo 131 | -------------------------------------------------------------------------------- /xenoverse/anyhvac/test.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import numpy as np 3 | import pickle 4 | import time 5 | from xenoverse.anyhvac.anyhvac_env_vis import HVACEnvVisible, HVACEnv 6 | from xenoverse.anyhvac.anyhvac_sampler import HVACTaskSampler 7 | from xenoverse.anyhvac.anyhvac_solver import HVACSolverGTPID 8 | 9 | pid_type = "HVACSolverGTPID" #"temperarure" , "HVACSolverGTPID" 10 | env = HVACEnvVisible(verbose=True) 11 | print("Sampling hvac tasks...") 12 | 13 | TASK_CONFIG_PATH = "./gen_env/hvac_task.pkl" 14 | 15 | 16 | try: 17 | with open(TASK_CONFIG_PATH, "rb") as f: 18 | task = pickle.load(f) 19 | print(f"Loaded existing task config from {TASK_CONFIG_PATH}") 20 | 21 | except FileNotFoundError: 22 | print("Sampling new HVAC tasks...") 23 | task = HVACTaskSampler(control_type='Temperature') 24 | with open(TASK_CONFIG_PATH, "wb") as f: 25 | pickle.dump(task, f) 26 | print(f"... Saved new task config to {TASK_CONFIG_PATH}") 27 | 28 | print("... Finished Sampling") 29 | env.set_task(task) 30 | terminated, truncated = False,False 31 | obs, info = env.reset() 32 | agent = HVACSolverGTPID(env) 33 | while (not terminated) and (not truncated): 34 | 35 | if pid_type == "temperarure": 36 | action = env._pid_action() 37 | elif pid_type == "HVACSolverGTPID": 38 | action = agent.policy(obs) 39 | if action.shape != env.action_space.shape: 40 | print(f"Warning: Action shape from agent ({action.shape}) does not match env action space shape ({env.action_space.shape}).") 41 | 42 | if action.ndim == 2 and action.shape[0] == 1 and action.shape[1] == env.action_space.shape[0]: 43 | action = action.squeeze(0) 44 | elif action.size != env.action_space.shape[0] * env.action_space.shape[1] if len(env.action_space.shape) > 1 else env.action_space.shape[0] : 45 | print(f"Action size mismatch: {action.size} vs {env.action_space.shape}") 46 | obs, reward, terminated, truncated, info = env.step(action) 47 | cool_power = round(np.mean(info.get("cool_power", 0)),4) 48 | heat_power = round(np.mean(info.get("heat_power", 0)),4) 49 | info_total = f"energy_cost: {round(info.get('energy_cost', 0),4)}, target_cost: {round(info.get('target_cost', 0),4)}, switch_cost: {round(info.get('switch_cost', 0),4)},cool_power: {cool_power}, heat_power: {heat_power}" 50 | -------------------------------------------------------------------------------- /xenoverse/anymdp/test.py: -------------------------------------------------------------------------------- 1 | if __name__=="__main__": 2 | import gymnasium as gym 3 | import numpy 4 | import xenoverse.anymdp 5 | from xenoverse.anymdp import AnyMDPTaskSampler, GarnetTaskSampler, AnyPOMDPTaskSampler, MultiTokensAnyPOMDPTaskSampler 6 | from xenoverse.anymdp.test_utils import train 7 | from xenoverse.utils import dump_task, load_task 8 | 9 | """ 10 | # Test MDP Task Sampler 11 | task = AnyMDPTaskSampler(state_space=16, 12 | action_space=5, 13 | min_state_space=None, 14 | verbose=True) 15 | # Test Garnet Task Sampler 16 | task = GarnetTaskSampler(state_space=16, 17 | action_space=5, 18 | min_state_space=None, 19 | verbose=True) 20 | # Test POMDP Task Sampler 21 | task = AnyPOMDPTaskSampler(state_space=16, 22 | action_space=5, 23 | min_state_space=None, 24 | observation_space=16, 25 | density = 0.1, 26 | verbose=True) 27 | 28 | task = MultiTokensAnyPOMDPTaskSampler(state_space=128, 29 | action_space=5, 30 | min_state_space=None, 31 | observation_space=32, 32 | observation_tokens=4, 33 | action_tokens=1, 34 | density = 0.1, 35 | verbose=True) 36 | """ 37 | 38 | task = MultiTokensAnyPOMDPTaskSampler(state_space=128, 39 | action_space=5, 40 | min_state_space=None, 41 | observation_space=32, 42 | observation_tokens=4, 43 | action_tokens=1, 44 | density = 0.1, 45 | verbose=True) 46 | dump_task("./task.pkl", task) 47 | 48 | env = gym.make("anymdp-v0") 49 | env.set_task(task) 50 | 51 | train_rewards, test_rewards, train_steps = train(env, max_epochs=100, gamma=0.99, solver_type='random', lr=0.20) 52 | train_rewards, test_rewards, train_steps = train(env, max_epochs=100, gamma=0.99, solver_type='opt', lr=0.20) 53 | train_rewards, test_rewards, train_steps = train(env, max_epochs=10000, gamma=0.99, solver_type='q', lr=0.20) 54 | 55 | print("Test Passed") -------------------------------------------------------------------------------- /xenoverse/metacontrol/random_humanoid.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy 3 | import gymnasium as gym 4 | import pygame 5 | import xml.etree.ElementTree as ET 6 | from numpy import random 7 | from numba import njit 8 | from gymnasium import spaces 9 | from xenoverse.utils import pseudo_random_seed, versatile_sample, generate_secure_strings 10 | from gymnasium.envs.mujoco.humanoid_v5 import HumanoidEnv 11 | from xenoverse.metacontrol.humanoid_xml_sampler import humanoid_xml_sampler 12 | 13 | def sample_humanoid(root_path=None, noise_scale=1.0): 14 | # Sample a random humanoid task 15 | if(root_path is None): 16 | root_path = os.path.dirname(os.path.abspath(__file__)) 17 | root_path = os.path.abspath(os.path.join(root_path, 'assets')) 18 | if(os.path.exists(root_path) is False): 19 | os.makedirs(root_path) 20 | file_id = generate_secure_strings(1, length=8)[0] 21 | file_path = os.path.join(root_path, f'random_humanoid_{file_id}.xml') 22 | humanoid_xml_sampler(file_path, noise_scale=noise_scale) 23 | return file_path 24 | 25 | def get_humanoid_tasks(directory): 26 | # Acquire a list of tasks from the specified directory 27 | xml_files = [f for f in os.listdir(directory) if f.endswith('.xml')] 28 | xml_lists = [] 29 | for xml_file in xml_files: 30 | if 'random_humanoid' in xml_file: 31 | xml_lists.append(os.path.join(directory, xml_file)) 32 | if(len(xml_lists) == 0): 33 | raise ValueError(f"No random_humanoid XML files found in directory: {directory}") 34 | return xml_lists 35 | 36 | class RandomHumanoidEnv(HumanoidEnv): 37 | """ 38 | Randomly sampled humanoid environment from mujoco-py 39 | """ 40 | def __init__(self, seed=None, **kwargs): 41 | self.kwargs = kwargs 42 | super().__init__(**self.kwargs) 43 | self.seed(seed) 44 | 45 | def seed(self, seed=None): 46 | if(seed is None): 47 | pseudo_random_seed(0) 48 | else: 49 | pseudo_random_seed(seed) 50 | 51 | def set_task(self, task): 52 | tree = ET.parse(task) 53 | root = tree.getroot() 54 | 55 | for body in root.findall('.//body'): 56 | if(body.get('name') == 'torso'): 57 | size = body.get('pos', '0 0 0').split() 58 | torso_height = float(size[2]) 59 | max_height = torso_height * 2 60 | min_height = torso_height / 2 61 | 62 | super().__init__(xml_file=task, 63 | healthy_z_range = (min_height, max_height), 64 | **self.kwargs) -------------------------------------------------------------------------------- /xenoverse/metalang/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Generating Randomized Pseudo Meta-Language for Benchmarking Long-Term Dependency In-Context Learning 4 | 5 | - MetaLangv1: Generate by repeated random sequences 6 | - MetaLangv2: Generate from randomized n-gram neural network models 7 | 8 | # Usage 9 | 10 | ### PYTHON APIs 11 | 12 | ```python 13 | import gym 14 | import xenoverse.metalang 15 | from xenoverse.metalang import TaskSamplerV2 16 | 17 | # Initialize the generator 18 | generator = gym.make("meta-language-v2") 19 | 20 | # Sample a task 21 | task = TaskSamplerV2(n_gram=3, 22 | n_embedding=16, 23 | _lambda=5.0, 24 | ...) 25 | 26 | # Set the task 27 | generator.set_task(task) 28 | 29 | # Generate sequences from the task 30 | batch_sample = generator.batch_generator(batch_size=8) 31 | ``` 32 | 33 | ### COMMAND LINES 34 | 35 | ```bash 36 | # Sample 100 tasks first 37 | python -m xenoverse.metalang.generator --sample_type tasks --samples 100 --output tasks.pkl ... 38 | 39 | # Sample 1000 sequences from the 100 tasks 40 | python -m xenoverse.metalang.generator --sample_type sequences --task_file tasks.pkl --samples 1000 --output sequences.txt --output_type txt ... 41 | 42 | # Or generate 1000 sequences by randomly sample tasks on the fly 43 | python -m xenoverse.metalang.generator --sample_type sequences --samples 1000 --output sequences.txt --output_type txt ... 44 | ``` 45 | 46 | #### show all the options 47 | 48 | ```bash 49 | python -m xenoverse.metalang.generator --help 50 | 51 | Generating Meta Language Tasks or Sequences 52 | 53 | optional arguments: 54 | -h, --help show this help message and exit 55 | 56 | --version {v1,v2} 57 | 58 | --sample_type {tasks,sequences} 59 | Generate tasks or sequences 60 | 61 | --task_file TASK_FILE 62 | Specify task file to generate from if the sample_type is sequences. Default will generate task on the fly. 63 | 64 | --vocab_size VOCAB_SIZE 65 | 66 | --embedding_size EMBEDDING_SIZE 67 | 68 | --hidden_size HIDDEN_SIZE 69 | 70 | --patterns_number PATTERNS_NUMBER 71 | 72 | --error_rate ERROR_RATE 73 | 74 | --n_gram N_GRAM 75 | 76 | --lambda_weight LAMBDA_WEIGHT 77 | Lambda weight multiplied for softmax sampling in MetaLangV2 78 | 79 | --batch_size BATCH_SIZE 80 | 81 | --sequence_length SEQUENCE_LENGTH 82 | 83 | --samples SAMPLES number of sequences / tasks to generate 84 | 85 | --output_type {txt,npy} 86 | 87 | --output OUTPUT 88 | ``` 89 | 90 | -------------------------------------------------------------------------------- /xenoverse/anymdp/anymdp_solver_q.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy import random 3 | 4 | class AnyMDPSolverQ(object): 5 | """ 6 | Solver for AnyMDPEnv with Q-Learning 7 | """ 8 | def __init__(self, env, gamma=0.99, alpha=0.50, max_steps=4000): 9 | """ 10 | The constructor for the class AnyMDPSolverQ 11 | The exploration strategy is controlled by UCB-H with c as its hyperparameter. Increasing c will encourage exploration 12 | Simulation of the ideal policy when the ground truth is not known 13 | """ 14 | self.env = env 15 | self.na = env.na 16 | self.ns = env.ns 17 | assert env.task_type=="MDP", "The solver only works for MDP" 18 | self.value_matrix = numpy.zeros((self.ns, self.na)) + 1.0/(1.0 - gamma) 19 | self.sa_visitied = numpy.ones((self.ns, self.na)) 20 | self.s_visitied = numpy.ones((self.ns,)) 21 | self.gamma = gamma 22 | self.alpha = alpha 23 | self.max_steps = max_steps 24 | self.avg_r = 0.0 25 | self.avg_r2 = 0.0 26 | self.r_std = 0.01 27 | self.r_cnt = 0 28 | self.lr = numpy.ones((self.ns, self.na)) 29 | 30 | def learner(self, s, a, ns, r, terminated, truncated): 31 | 32 | self.avg_r = (self.avg_r * self.r_cnt + r) / (self.r_cnt + 1) 33 | self.avg_r2 = (self.avg_r2 * self.r_cnt + r**2) / (self.r_cnt + 1) 34 | self.r_cnt = min(self.r_cnt + 1, 10000) 35 | self.r_std = numpy.sqrt(max(self.avg_r2 - self.avg_r**2, 1.0e-4)) 36 | 37 | # Learning rate decay 38 | self.lr[s,a] = numpy.sqrt(max((self.max_steps + 1) / (self.max_steps + self.sa_visitied[s,a]), 1.0e-3)) 39 | 40 | if(terminated): 41 | target = r 42 | self.value_matrix[ns] = 0.0 43 | else: 44 | target = r + self.gamma * max(self.value_matrix[ns]) 45 | 46 | error = target - self.value_matrix[s][a] 47 | self.value_matrix[s][a] += self.alpha * self.lr[s,a] * error 48 | self.sa_visitied[s][a] += 1 49 | self.s_visitied[s] += 1 50 | 51 | def policy(self, state, is_test=False): 52 | if(is_test): 53 | return numpy.argmax(self.value_matrix[state]) 54 | 55 | value = self.value_matrix[state] - numpy.max(self.value_matrix[state]) 56 | stiffness = min((self.max_steps + self.s_visitied[state]) / (self.max_steps + 1), 10.0) 57 | value = value / max(numpy.std(value), 1.0e-2) * stiffness 58 | value = numpy.exp(value) / numpy.sum(numpy.exp(value)) 59 | return random.choice(range(len(value)), p=value) -------------------------------------------------------------------------------- /xenoverse/utils/tools.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numba import njit 3 | from numpy import random 4 | import secrets 5 | import string 6 | import pickle 7 | 8 | @njit(cache=True) 9 | def conv2d_numpy(input_data:numpy.ndarray, 10 | kernel:numpy.ndarray, 11 | stride=(1,1), padding=0): 12 | input_height, input_width = input_data.shape 13 | kernel_height, kernel_width = kernel.shape 14 | output_height = (input_height - kernel_height + 2 * padding) // stride[0] + 1 15 | output_width = (input_width - kernel_width + 2 * padding) // stride[1] + 1 16 | 17 | output_data = numpy.zeros((output_height, output_width)) 18 | ni = 0 19 | for i in range(-padding, input_height - kernel_height + padding + 1, stride[0]): 20 | ib = max(0, i) 21 | ie = min(input_height, i + kernel_height) 22 | _ib = ib - i 23 | _ie = ie - i 24 | nj = 0 25 | for j in range(-padding, input_width - kernel_width + padding + 1, stride[1]): 26 | jb = max(0, j) 27 | je = min(input_width, j + kernel_width) 28 | _jb = jb - j 29 | _je = je - j 30 | output_data[ni, nj] = numpy.sum(input_data[ib:ie, jb:je] * kernel[_ib:_ie, _jb:_je]) 31 | nj += 1 32 | ni += 1 33 | 34 | return output_data 35 | 36 | def random_partition(num_parts:int): 37 | # Generate a random partition of 1 into num_parts parts 38 | if num_parts <= 0: 39 | raise ValueError("Number of parts must be greater than 0") 40 | if num_parts == 1: 41 | return [sum_value] 42 | partitions = numpy.random.random(num_parts - 1) 43 | partitions.sort() 44 | partitions = numpy.concatenate(([0], partitions, [1])) 45 | return partitions[1:] - partitions[:-1] 46 | 47 | def versatile_sample(setting, default_range, default_value): 48 | if(isinstance(setting, tuple) or isinstance(setting, list)): 49 | assert len(setting) == 2, f"Setting must be a tuple or list of length 2, got {len(setting)}" 50 | return random.uniform(setting[0], setting[1]) 51 | elif(setting): 52 | return random.uniform(default_range[0], default_range[1]) 53 | else: 54 | return default_value 55 | 56 | def generate_secure_strings(count, length=16): 57 | alphabet = string.ascii_letters + string.digits # 62个字符 58 | return [''.join(secrets.choice(alphabet) for _ in range(length)) 59 | for _ in range(count)] 60 | 61 | 62 | def dump_task(file, task): 63 | with open(file, 'wb') as f: 64 | pickle.dump(task, f) 65 | 66 | def load_task(file): 67 | with open(file, 'rb') as f: 68 | task = pickle.load(f) 69 | return task -------------------------------------------------------------------------------- /xenoverse/metacontrol/random_cartpole.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gym Environment For Any MDP 3 | """ 4 | import numpy 5 | import gymnasium as gym 6 | import pygame 7 | from numpy import random 8 | from numba import njit 9 | from gymnasium import spaces 10 | from xenoverse.utils import pseudo_random_seed, versatile_sample 11 | from gymnasium.envs.classic_control.cartpole import CartPoleEnv 12 | 13 | def sample_cartpole(gravity_scope=True, 14 | masscart_scope=True, 15 | masspole_scope=True, 16 | length_scope=True): 17 | # Sample a random cartpole task 18 | pseudo_random_seed(0) 19 | gravity = versatile_sample(gravity_scope, (1, 11), 9.8) 20 | masscart = versatile_sample(masscart_scope, (0.5, 2.0), 1.0) 21 | masspole = versatile_sample(masspole_scope, (0.05, 0.20), 0.1) 22 | length = versatile_sample(length_scope, (0.25, 1.0), 0.5) # actually half the pole's length 23 | 24 | return { 25 | "gravity": gravity, 26 | "masscart": masscart, 27 | "masspole": masspole, 28 | "length": length 29 | } 30 | 31 | class RandomCartPoleEnv(CartPoleEnv): 32 | 33 | def __init__(self, *args, **kwargs): 34 | """ 35 | Pay Attention max_steps might be reseted by task settings 36 | """ 37 | self.frameskip = kwargs.get("frameskip", 5) 38 | self.reset_bounds_scale = kwargs.get("reset_bounds_scale", numpy.array([0.45, 0.90, 0.13, 1.0])) 39 | if(isinstance(self.reset_bounds_scale, list)): 40 | assert len(self.reset_bounds_scale) == 4, "reset_bounds_scale should be a list of 4 elements" 41 | self.reset_bounds_scale = numpy.array(self.reset_bounds_scale) 42 | kwargs.pop("frameskip", None) 43 | kwargs.pop("reset_bounds_scale", None) 44 | super().__init__(*args, **kwargs) 45 | 46 | def set_task(self, task_config): 47 | for key, value in task_config.items(): 48 | setattr(self, key, value) 49 | self.polemass_length = self.masspole * self.length 50 | self.total_mass = self.masspole + self.masscart 51 | 52 | def step(self, action): 53 | total_reward = 0 54 | terminated = False 55 | truncated = False 56 | for _ in range(self.frameskip): 57 | obs, reward, terminated, truncated, info = super().step(action) 58 | total_reward += reward 59 | if terminated or truncated: 60 | break 61 | return obs, total_reward, terminated, truncated, info 62 | 63 | def reset( 64 | self, 65 | *, 66 | seed: int | None = None, 67 | options: dict | None = None): 68 | # Note that if you use custom reset bounds, it may lead to out-of-bound 69 | # state/observations. 70 | self.state = random.uniform(low=-1, high=1, size=(4,)) * self.reset_bounds_scale 71 | self.steps_beyond_terminated = None 72 | 73 | if self.render_mode == "human": 74 | super().render() 75 | return numpy.array(self.state, dtype=numpy.float32), {} -------------------------------------------------------------------------------- /xenoverse/anymdp/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from xenoverse.anymdp import AnyMDPSolverOpt, AnyMDPSolverMBRL, AnyMDPSolverQ 3 | 4 | class RandomAgent: 5 | def __init__(self, env): 6 | self.na = env.na 7 | self.ns = env.ns 8 | self.action_space = env.action_space 9 | 10 | def policy(self, state, is_test=False): 11 | return self.action_space.sample() 12 | 13 | def learner(self, s, a, ns, r, terminated, truncated): 14 | pass 15 | 16 | def train(env, max_epochs=1000, 17 | gamma=0.99, 18 | solver_type='q', 19 | lr=0.20, 20 | c=0.05, 21 | test_interval=100, 22 | test_epochs=3, 23 | is_verbose=True): 24 | # Test AnyMDPSolverQ 25 | if(solver_type.lower()=='q'): 26 | solver = AnyMDPSolverQ(env, gamma=gamma, alpha=lr) 27 | elif(solver_type.lower()=='mbrl'): 28 | solver = AnyMDPSolverMBRL(env, gamma=gamma, c=c) 29 | elif(solver_type.lower()=='opt'): 30 | solver = AnyMDPSolverOpt(env, gamma=gamma) 31 | elif(solver_type.lower()=='random'): 32 | solver = RandomAgent(env) 33 | else: 34 | raise ValueError('Invalid Solver Type') 35 | 36 | epoch_rewards = [] 37 | epoch_steps = [] 38 | epoch_test_rewards = [] 39 | 40 | epochs = 0 41 | 42 | def epoch_run(is_test=False, epochs=1): 43 | rewards = [] 44 | steps = [] 45 | for epoch in range(epochs): 46 | state, info = env.reset() 47 | terminated, truncated = False, False 48 | epoch_reward = 0 49 | epoch_step = 0 50 | while not terminated and not truncated: 51 | action = solver.policy(state, is_test=is_test) 52 | next_state, reward, terminated, truncated, info = env.step(action) 53 | solver.learner(state, action, next_state, reward, terminated, truncated) 54 | epoch_reward += reward 55 | epoch_step += 1 56 | state = next_state 57 | rewards.append(epoch_reward) 58 | steps.append(epoch_step) 59 | 60 | return numpy.mean(rewards), numpy.mean(steps) 61 | 62 | while epochs < max_epochs: 63 | train_rewards, train_steps = epoch_run(epochs=test_interval) 64 | test_rewards, test_steps = epoch_run(is_test=True, epochs=test_epochs) 65 | epoch_rewards.append(train_rewards) 66 | epoch_steps.append(train_steps) 67 | epoch_test_rewards.append(test_rewards) 68 | epochs += test_interval 69 | if(is_verbose): 70 | print("[{}]-Run\tEpoch:{}\tMean Train Epoch Reward: {:.2f}\tMean Test Epoch Reward: {:.2f}\tMean Steps In Epoch: {:.2f}\t".format(solver_type, epochs, epoch_rewards[-1], epoch_test_rewards[-1], epoch_steps[-1])) 71 | print("Solver Summary: {:.3f}".format(numpy.mean(test_rewards))) 72 | 73 | return epoch_rewards, epoch_test_rewards, epoch_steps -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Benchmarking general decision-making with open & random worlds 2 | 3 | ## Why a Xenoverse Instead of a Single Universe? 4 | 5 | **Enhancing Generalization Over Memorization:** Recent research indicates that the generalization ability of learning agents primarily depends on the diversity of training environments. However, the real world imposes significant limitations on this diversity, such as physical laws and insufficient variety in environments, tasks, and embodiments. These limitations present a serious bottleneck to advancing artificial general intelligence (AGI). Xenoverse is a collection of extremely diverse worlds generated procedurally based on completely random parameters. We propose that AGI should not be trained and adapted within a single universe but rather within Xenoverse. 6 | 7 | **Avoid Overfitting Specific Benchmarks**: Xenoverse can be used for both **Meta-Training** and **Open-World Evaluation**. Existing benchmarks are typically closed-set and tend to be overfitted soon after their introduction. In contrast, Xenoverse provides open-world benchmarks that are **theoretically impossible to overfit**, making them effective tools for evaluating generalization capabilities. 8 | 9 | ## Current Collections (Continuously Updating) 10 | 11 | - [AnyMDP](xenoverse/anymdp): Procedurally generated, unlimited, general-purpose (Partially Observable) Markov Decision Processes (MDPs) in discrete spaces. 12 | 13 | - [LinDS](xenoverse/linds): Procedurally generated, ulimited Linear Time-Invariante (LTI) control tasks. 14 | 15 | - [AnyHVAC](xenoverse/anyhvac): Procedurally generated random rooms and equipment for Heating, Ventilation, and Air Conditioning (HVAC) control. 16 | 17 | - [MetaLanguage](xenoverse/metalang): A pseudo-language generated by randomized neural networks, used for benchmarking in-context language learning (ICLL). 18 | 19 | - [MazeWorld](xenoverse/mazeworld): Procedurally generated immersive mazes featuring diverse maze structures and textures. 20 | 21 | - [MetaControl](xenoverse/metacontrol): Randomized environments for classic control tasks and locomotion studies. 22 | 23 | # Installation 24 | 25 | ```bash 26 | pip install xenoverse 27 | ``` 28 | 29 | # Reference 30 | Please refer to the following technical report / papers: 31 | ```bibtex 32 | @article{wang2024benchmarking, 33 | title={Benchmarking General Purpose In-Context Learning}, 34 | author={Wang, Fan and Lin, Chuan and Cao, Yang and Kang, Yu}, 35 | journal={arXiv preprint arXiv:2405.17234}, 36 | year={2024} 37 | } 38 | @inproceedings{ 39 | wang2025towards, 40 | title={Towards Large-Scale In-Context Reinforcement Learning by Meta-Training in Randomized Worlds}, 41 | author={Fan Wang and Pengtao Shao and Yiming Zhang and Bo Yu and Shaoshan Liu and Ning Ding and Yang Cao and Yu Kang and Haifeng Wang}, 42 | booktitle={The Thirty-ninth Annual Conference on Neural Information Processing Systems}, 43 | year={2025}, 44 | url={https://openreview.net/forum?id=b6ASJBXtgP} 45 | } 46 | @article{fan2025putting, 47 | title={Putting the smarts into robot bodies}, 48 | author={Fan, Wang and Liu, Shaoshan}, 49 | journal={Communications of the ACM}, 50 | volume={68}, 51 | number={3}, 52 | pages={6--8}, 53 | year={2025}, 54 | publisher={ACM New York, NY, USA} 55 | } 56 | ``` 57 | -------------------------------------------------------------------------------- /xenoverse/anymdp/anymdp_solver_mbrl.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy import random 3 | from xenoverse.anymdp.solver import update_value_matrix 4 | 5 | 6 | class AnyMDPSolverMBRL(object): 7 | """ 8 | Implementing the RL Solver of the paper 9 | Hu, Bingshan, et al. "Optimistic Thompson sampling-based algorithms for 10 | episodic reinforcement learning." 11 | Uncertainty in Artificial Intelligence. PMLR, 2023. 12 | """ 13 | def __init__(self, env, gamma=0.99, c=1.0, max_steps=4000): 14 | """ 15 | The constructor for the class AnyMDPSolverQ 16 | The exploration strategy is controlled by UCB-H with c as its hyperparameter. Increasing c will encourage exploration 17 | Simulation of the ideal policy when the ground truth is not known 18 | """ 19 | self.ns = env.ns 20 | self.na = env.na 21 | 22 | self.est_r = numpy.zeros((self.ns, self.na, self.ns)) 23 | self.vis_cnt = 0.01 * numpy.ones((self.ns, self.na, self.ns)) 24 | self.vis_cnt_sa = numpy.ones((self.ns, self.na)) 25 | 26 | self.gamma = gamma 27 | self._c = c / (1.0 - self.gamma) 28 | self.max_steps = max_steps 29 | 30 | self.value_matrix = numpy.zeros((self.ns, self.na)) 31 | self.est_r_global_avg = 0 32 | self.est_r_global_cnt = 0 33 | 34 | self.est_r_std = 0.01 35 | 36 | self.s_0 = [] 37 | self.s_0_cnt = [] 38 | self.s_e = [] 39 | 40 | self.update_estimator() 41 | 42 | def update_estimator(self): 43 | t_mat = numpy.copy(self.vis_cnt) 44 | # use 0.01 to make sure those with all transition = 0 will stay 0 45 | t_mat_valid = numpy.clip(numpy.sum(t_mat, axis=-1, keepdims=True), 0.01, None) 46 | self.t_mat = t_mat / t_mat_valid 47 | self.r_mat = numpy.copy(self.est_r) 48 | 49 | self.est_r_std = max(numpy.std(self.est_r), 0.01) 50 | self.b_mat = self._c * self.est_r_std / numpy.sqrt(self.vis_cnt_sa) 51 | 52 | self.value_matrix = update_value_matrix(self.t_mat, self.r_mat, self.gamma, self.value_matrix, max_iteration=1) 53 | 54 | if(len(self.s_0) > 0): 55 | self.s_0_prob = numpy.array(self.s_0_cnt) / numpy.sum(self.s_0_cnt) 56 | 57 | def learner(self, s, a, ns, r, terminated, truncated): 58 | # Update the environment model estimation 59 | cnt = self.vis_cnt[s,a,ns] 60 | self.est_r[s,a,ns] = (self.est_r[s,a,ns] * cnt + r) / (cnt + 1) 61 | self.vis_cnt[s,a,ns] += 1 62 | self.vis_cnt_sa[s,a] += 1 63 | 64 | if(terminated): 65 | self.vis_cnt[ns] = 0 66 | self.est_r[ns] = 0 67 | if(ns not in self.s_e): 68 | self.s_e.append(ns) 69 | 70 | if(terminated or truncated): 71 | self.update_estimator() 72 | 73 | def policy(self, state, is_test=False): 74 | # UCB Exploration 75 | if(is_test): 76 | return numpy.argmax(self.value_matrix[state]) 77 | rnd_vec = random.uniform(0.0, 1.0, size=(self.na)) 78 | return numpy.argmax(self.value_matrix[state] + self.b_mat[state] * rnd_vec) 79 | 80 | def set_reset_states(self, s): 81 | if(s not in self.s_0): 82 | self.s_0.append(s) 83 | self.s_0_cnt.append(1) 84 | else: 85 | idx = self.s_0.index(s) 86 | self.s_0_cnt[idx] += 1 -------------------------------------------------------------------------------- /xenoverse/linds/test_ppo.py: -------------------------------------------------------------------------------- 1 | if __name__=="__main__": 2 | import gymnasium as gym 3 | import numpy 4 | import argparse 5 | from xenoverse.linds import LinearDSSamplerRandomDim 6 | 7 | from stable_baselines3 import PPO, SAC 8 | from sb3_contrib import RecurrentPPO 9 | from stable_baselines3.common.env_util import make_vec_env 10 | from stable_baselines3.common.evaluation import evaluate_policy 11 | from xenoverse.utils import dump_task, load_task 12 | 13 | 14 | task = LinearDSSamplerRandomDim() 15 | 16 | env = gym.make("linear-dynamics-v0-visualizer") 17 | env.set_task(task, verbose=True, reward_shaping=True) 18 | 19 | args = argparse.ArgumentParser() 20 | args.add_argument("--max_step", type=int, default=200000) 21 | args.add_argument("--lr", type=float, default=3e-4) 22 | args.add_argument("--run", choices=["mlp", "lstm", "both"], default="both") 23 | args.add_argument("--task", type=str, default=None) 24 | args = args.parse_args() 25 | if(args.task is not None): 26 | task = load_task(args.task) 27 | else: 28 | task = LinearDSSamplerRandomDim() 29 | dump_task("./task.pkl", task) 30 | 31 | max_step = args.max_step 32 | lr = args.lr 33 | 34 | model_mlp = PPO( 35 | "MlpPolicy", # 使用 MLP 策略网络 36 | env, # 环境对象 37 | verbose=1, # 打印训练日志 38 | learning_rate=lr, # 学习率 39 | batch_size=64, # 批量大小 40 | gamma=0.99, # 折扣因子 41 | tensorboard_log="./ppo_tensorboard/" # TensorBoard 日志目录 42 | ) 43 | 44 | model_lstm = RecurrentPPO( 45 | "MlpLstmPolicy", # 使用 MLP 策略网络 46 | env, # 环境对象 47 | verbose=1, # 打印训练日志 48 | learning_rate=lr, # 学习率 49 | n_steps=2048, # 每个环境每次更新的步数 50 | batch_size=64, # 批量大小 51 | n_epochs=10, # 每次更新的迭代次数 52 | gamma=0.99, # 折扣因子 53 | gae_lambda=0.95, # GAE 参数 54 | policy_kwargs={ 55 | "lstm_hidden_size": 32, # LSTM 隐藏层大小 56 | "n_lstm_layers": 2, # LSTM 层数 57 | "enable_critic_lstm": True # Critic 网络也使用 LSTM 58 | }, 59 | clip_range=0.2, # PPO 的 clip 范围 60 | tensorboard_log="./ppo_tensorboard/" # TensorBoard 日志目录 61 | ) 62 | 63 | 64 | if(args.run == "mlp" or args.run == "both"): 65 | 66 | print(f"Training MLP Policy for {max_step} steps") 67 | 68 | mean_reward, std_reward = evaluate_policy(model_mlp, env, n_eval_episodes=20) 69 | print(f"Before Training: Mean reward: {mean_reward}, Std reward: {std_reward}") 70 | 71 | model_mlp.learn(total_timesteps=max_step) 72 | 73 | mean_reward, std_reward = evaluate_policy(model_mlp, env, n_eval_episodes=20) 74 | print(f"After Training: Mean reward: {mean_reward}, Std reward: {std_reward}") 75 | 76 | if(args.run == "lstm" or args.run == "both"): 77 | 78 | print(f"Training LSTMLSTM Policy for {max_step} steps") 79 | 80 | mean_reward, std_reward = evaluate_policy(model_lstm, env, n_eval_episodes=20) 81 | print(f"Before Training: Mean reward: {mean_reward}, Std reward: {std_reward}") 82 | 83 | model_lstm.learn(total_timesteps=max_step) 84 | 85 | mean_reward, std_reward = evaluate_policy(model_lstm, env, n_eval_episodes=20) 86 | print(f"After Training: Mean reward: {mean_reward}, Std reward: {std_reward}") -------------------------------------------------------------------------------- /xenoverse/anyhvac/anyhvac_sampler.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy 3 | from numpy import random as rnd 4 | from .anyhvac_utils import BaseSensor, HeaterUnc, Cooler 5 | 6 | def HVACTaskSampler(control_type='Temperature', 7 | target_temperature=None): 8 | nw = rnd.randint(5, 16) # width of the building, in cell number 9 | nl = rnd.randint(5, 16) # length of the building, in cell number 10 | cell_size = rnd.uniform(1, 3) 11 | floor_height = rnd.uniform(2, 8) 12 | 13 | dw = nw * cell_size 14 | dl = nl * cell_size 15 | 16 | dh = rnd.uniform(3, 8) # height of the building 17 | area = dw * dl # area of the building 18 | cell_volume = floor_height * cell_size * cell_size # volume of each cell 19 | 20 | chtc_array = numpy.random.uniform(1.5, 25.0, size=(nw + 1, nl + 1, 2)) # Convective Heat Transfer Coefficients 21 | hc_array = numpy.random.uniform(1000, 4000, size=(nw, nl)) * cell_volume # heat capacity inside the building 22 | wall_chtc = rnd.uniform(1.5, 3.0) 23 | chtc_array[0, :, 0] = wall_chtc 24 | chtc_array[nw, :, 0] = wall_chtc 25 | chtc_array[:, 0, 1] = wall_chtc 26 | chtc_array[:, nl, 1] = wall_chtc 27 | cell_walls = chtc_array < 5.0 28 | floorceil_chtc = rnd.uniform(2.0, 6.0) 29 | 30 | n_sensors = max(int(area * rnd.uniform(0.10, 0.30)), 1) 31 | n_heaters = max(int(area * rnd.uniform(0.10, 0.30)), 1) 32 | n_coolers = max(int(area * rnd.uniform(0.05, 0.15)), 1) 33 | 34 | eps = rnd.uniform(0.0, 1.0) 35 | 36 | 37 | if(eps < 0.2): 38 | t_ambient = rnd.uniform(-10, 20) # ambient temperature 39 | elif(eps < 0.5): 40 | t_ambient = rnd.uniform(20, 30) 41 | else: 42 | t_ambient = rnd.uniform(30, 40) 43 | 44 | print(f"Sample Ambient Temperature: {t_ambient}") 45 | sensors = [] 46 | equipments = [] 47 | coolers = [] 48 | timer = [] 49 | for i in range(n_sensors): 50 | sensors.append(BaseSensor(nw, nl, cell_size, cell_walls, min_dist=1.2, 51 | avoidance=sensors)) 52 | base_heater = HeaterUnc(nw, nl, cell_size, cell_walls, min_dist=1.2, 53 | avoidance=equipments) 54 | for i in range(n_heaters): 55 | heater = HeaterUnc(nw, nl, cell_size, cell_walls, min_dist=1.2, avoidance=equipments, base_heater=base_heater) 56 | timer.append(heater.period) 57 | equipments.append(heater) 58 | hc_array[equipments[-1].nloc[0], equipments[-1].nloc[1]] += rnd.uniform(20000, 80000) 59 | for i in range(n_coolers): 60 | coolers.append(Cooler(nw, nl, cell_size, cell_walls, min_dist=min(cell_size, 2.0), 61 | avoidance=coolers, 62 | control_type=control_type)) 63 | 64 | if(target_temperature is not None): 65 | target_temperature = target_temperature 66 | else: 67 | if(rnd.choice([True, False])): 68 | target_temperature = rnd.uniform(24, 28) 69 | else: 70 | target_temperature = rnd.uniform(24, 28, size=(n_sensors)) 71 | 72 | return { 73 | 'width': dw, 74 | 'length': dl, 75 | 'height': dh, 76 | 'n_width': nw, 77 | 'n_length': nl, 78 | 'cell_size': cell_size, 79 | 'floor_height': floor_height, 80 | 'floorceil_chtc': floorceil_chtc, 81 | 'sensors': sensors, 82 | 'convection_coeffs': chtc_array, 83 | 'heat_capacity': hc_array, 84 | 'ambient_temp': t_ambient, 85 | 'equipments': equipments, 86 | 'coolers': coolers, 87 | 'control_type': control_type, 88 | 'target_temperature': target_temperature, 89 | 'heater_timer': timer, 90 | } 91 | -------------------------------------------------------------------------------- /xenoverse/metalang/task_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 DeepEvolution Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import _io 17 | import numpy 18 | from numpy import random 19 | from xenoverse.utils import pseudo_random_seed 20 | from xenoverse.utils import RandomLM 21 | 22 | def TaskSamplerV2(seed=None, 23 | n_emb=16, 24 | n_hidden=64, 25 | n_vocab=256, 26 | n_gram=3, 27 | _lambda=5.0): 28 | if(seed is not None): 29 | numpy.random.seed(seed) 30 | else: 31 | numpy.random.seed(pseudo_random_seed()) 32 | if(isinstance(n_gram, list)): 33 | n_gram = random.choice(n_gram) 34 | word_emb = numpy.random.normal(0, 1.0, size=(n_vocab, n_emb)) 35 | weights_inputlayer = numpy.random.normal(0, 1.0, size=(n_gram, n_emb, n_hidden)) 36 | bias_inputlayer = numpy.random.normal(0, 1.0, size=(n_gram, 1, n_hidden)) 37 | weights_outputlayer = numpy.random.normal(0, 1.0, size=(n_hidden, n_vocab)) 38 | bias_outputlayer = numpy.random.normal(0, 1.0, size=(1, n_vocab)) 39 | return { 40 | 'word_emb': word_emb, 41 | 'weights_inputlayer': weights_inputlayer, 42 | 'bias_inputlayer': bias_inputlayer, 43 | 'weights_outputlayer': weights_outputlayer, 44 | 'bias_outputlayer': bias_outputlayer, 45 | '_lambda': _lambda, 46 | 'n_emb': n_emb, 47 | 'n_hidden': n_hidden, 48 | 'n_vocab': n_vocab, 49 | 'n_gram': n_gram 50 | } 51 | 52 | def TaskSamplerV1(seed=None, 53 | n_vocab=64, 54 | n_patterns=10, 55 | n_gram=64, 56 | error_ratio=0.1): 57 | patterns = [] 58 | if(seed is not None): 59 | numpy.random.seed(seed) 60 | else: 61 | numpy.random.seed(pseudo_random_seed()) 62 | if(isinstance(n_gram, list)): 63 | n_gram = random.choice(n_gram) 64 | for _ in range(n_patterns): 65 | l_r = max(3, numpy.random.poisson(n_gram)) 66 | patterns.append(random.randint(0, n_vocab, size=(l_r), dtype="int32")) 67 | return { 68 | 'patterns': patterns, 69 | 'n_vocab': n_vocab, 70 | 'n_patterns': n_patterns, 71 | 'error_ratio': error_ratio, 72 | 'n_gram': n_gram 73 | } 74 | 75 | function_vocabulary = {'s':0, 76 | 'q':1, 77 | 'a':2, 78 | 'r1':3, 79 | 'r2':4, 80 | 'r3':5, 81 | 'r4':6, 82 | 'r5':7, 83 | 'r>':8, 84 | 'r=':9, 85 | 'r<':10} 86 | 87 | def TaskSamplerV3(vocab_size=32, 88 | embedding_size=16, 89 | hidden_size=32, 90 | seed=None): 91 | return {"vocabulary": vocab_size, 92 | "embedding": embedding_size, 93 | "hidden": hidden_size, 94 | "function_vocabulary": function_vocabulary, 95 | "lm": RandomLM(n_vocab=vocab_size, 96 | function_vocab=function_vocabulary, 97 | n_emb=embedding_size, 98 | n_hidden=hidden_size, 99 | seed=seed)} -------------------------------------------------------------------------------- /xenoverse/linds/visualizer.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import matplotlib.pyplot as plt 3 | from sklearn.manifold import TSNE 4 | from sklearn.datasets import load_digits 5 | from .linds_env import LinearDSEnv 6 | from xenoverse.utils import pseudo_random_seed 7 | from restools.plotting.plot_2D import savitzky_golay 8 | 9 | class LinearDSVisualizer(LinearDSEnv): 10 | 11 | def set_task(self, task, **kwargs): 12 | # Set task will automatically reset all records 13 | super().set_task(task, **kwargs) 14 | self.observation_records = [] 15 | self.inner_state_records = [] 16 | self.action_records = [] 17 | self.reward_records = [] 18 | 19 | def color_spec(self, i): 20 | return [self.color_spec_type[i][idx] for idx in self.colors] 21 | 22 | def reset(self, *args, **kwargs): 23 | obs, info = super().reset() 24 | self.observation_records.append(numpy.copy(obs)) 25 | self.action_records.append(numpy.zeros(self.action_space.shape)) 26 | self.inner_state_records.append(numpy.copy(self._state)) 27 | self.reward_records.append(0.0) 28 | 29 | return obs, info 30 | 31 | def step(self, action): 32 | obs, reward, terminated, truncated, info = super().step(action) 33 | 34 | self.observation_records.append(numpy.copy(obs)) 35 | self.inner_state_records.append(numpy.copy(self._state)) 36 | self.action_records.append(numpy.copy(action)) 37 | self.reward_records.append(reward) 38 | return obs, reward, terminated, truncated, info 39 | 40 | def visualize_and_save(self, filename=None, plot_tsne=False): 41 | if filename is None: 42 | file_name = "./linds_visualizer_output.pdf" 43 | if plot_tsne: 44 | tsne = TSNE(n_components=2, random_state=pseudo_random_seed(), 45 | perplexity=10, max_iter=500, learning_rate=100) 46 | obs_arr = numpy.array(self.observation_records, dtype="float32") 47 | act_arr = numpy.array(self.action_records, dtype="float32") 48 | 49 | s_arr = numpy.array(self.inner_state_records) 50 | max_steps = len(self.inner_state_records) 51 | 52 | obs_tsne = tsne.fit_transform(numpy.array(obs_arr)) 53 | act_tsne = tsne.fit_transform(numpy.array(act_arr)) 54 | s_tsne = tsne.fit_transform(numpy.array(s_arr)) 55 | 56 | plt.figure(figsize=(10, 8)) 57 | # Show Observation T-SNE 58 | plt.subplot(2, 2, 1) 59 | scatter = plt.scatter(obs_tsne[:, 0], obs_tsne[:, 1], c='black', s=10, alpha=0.2) 60 | plt.title("Observation", fontsize=12, fontweight='bold', color='blue', pad=10) 61 | 62 | # Show Action T-SNE 63 | plt.subplot(2, 2, 2) 64 | scatter = plt.scatter(act_tsne[:, 0], act_tsne[:, 1], c='black', s=10, alpha=0.2) 65 | plt.title("Action", fontsize=12, fontweight='bold', color='blue', pad=10) 66 | 67 | # Show State T-SNE 68 | plt.subplot(2, 2, 3) 69 | scatter = plt.scatter(s_tsne[:, 0], s_tsne[:, 1], c=self.reward_records, cmap='viridis', s=10, alpha=0.2, marker='o') 70 | plt.colorbar() 71 | plt.title("States", fontsize=12, fontweight='bold', color='blue', pad=10) 72 | 73 | # Show Reward Curve 74 | plt.subplot(2, 2, 4) 75 | rewards_smooth = savitzky_golay(self.reward_records, window_size=99, order=3) 76 | scatter = plt.plot(numpy.arange(numpy.size(self.reward_records)), self.reward_records, c='red', alpha=0.2) 77 | scatter = plt.plot(numpy.arange(numpy.size(rewards_smooth)), rewards_smooth, c='red') 78 | plt.title("Reward", fontsize=12, fontweight='bold', color='blue', pad=10) 79 | plt.savefig(filename) 80 | else: 81 | plt.figure(figsize=(10, 10)) 82 | rewards_smooth = savitzky_golay(self.reward_records, window_size=99, order=3) 83 | scatter = plt.plot(numpy.arange(numpy.size(self.reward_records)), self.reward_records, c='red', alpha=0.2) 84 | scatter = plt.plot(numpy.arange(numpy.size(rewards_smooth)), rewards_smooth, c='red') 85 | plt.title("Reward", fontsize=12, fontweight='bold', color='blue', pad=10) 86 | plt.savefig(filename) 87 | plt.close() 88 | -------------------------------------------------------------------------------- /xenoverse/metalang/metalangv2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 DeepEvolution Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import numpy 17 | from numpy import random 18 | 19 | 20 | class RandomNGram(object): 21 | def __init__(self, task): 22 | for key, val in task.items(): 23 | self.__dict__[key] = val 24 | self.s_tok = 0 25 | self.w_arr = numpy.expand_dims(numpy.arange(self.n_gram), axis=[0, 2, 3]) 26 | 27 | def softmax(self, x): 28 | e_x = numpy.exp(x - numpy.max(x, axis=-1, keepdims=True)) 29 | return e_x / e_x.sum(axis=-1, keepdims=True) 30 | 31 | def forward(self, l, batch=1, seed=None): 32 | #Generate n=batch sequences of length l under current task 33 | ind = 0 34 | if(seed is not None): 35 | numpy.random.seed(seed) 36 | 37 | def mean_var_norm(i): 38 | m_i = numpy.mean(i) 39 | m_ii = numpy.mean(i * i) 40 | std = numpy.sqrt(m_ii - m_i * m_i) 41 | return (1.0 / std) * (i - m_i) 42 | 43 | cur_tok = numpy.full((batch,), self.s_tok) 44 | idxes = numpy.arange(batch) 45 | pad_emb = numpy.expand_dims(self.word_emb[cur_tok], axis=1) 46 | 47 | h = numpy.zeros((batch, self.n_hidden)) 48 | 49 | # mark whether there is end token 50 | seqs = [] 51 | seqs.append(cur_tok) 52 | ppl = 0 53 | tok_cnt = 0 54 | tok_embs = [pad_emb for _ in range(self.n_gram)] 55 | while ind < l: 56 | ind += 1 57 | tok_emb = numpy.expand_dims(self.word_emb[cur_tok], axis=1) 58 | tok_embs.append(tok_emb) 59 | del tok_embs[0] 60 | tok_emb = numpy.expand_dims(numpy.concatenate(tok_embs[-self.n_gram:], axis=1), axis=2) 61 | 62 | h = numpy.tanh(numpy.matmul(tok_emb, self.weights_inputlayer) + self.bias_inputlayer) 63 | h = numpy.mean(self.w_arr * h, axis=1) 64 | o = numpy.matmul(h, self.weights_outputlayer) + self.bias_outputlayer 65 | o = numpy.squeeze(o, axis=1) 66 | o = self._lambda * mean_var_norm(o) 67 | exp_o = numpy.exp(o) 68 | prob = exp_o / numpy.sum(exp_o, axis=-1, keepdims=True) 69 | cur_tok = (prob.cumsum(1) > numpy.random.rand(prob.shape[0])[:,None]).argmax(1) 70 | cur_prob = prob[idxes, cur_tok] 71 | ppl -= numpy.sum(numpy.log(cur_prob)) 72 | tok_cnt += cur_prob.shape[0] 73 | 74 | seqs.append(cur_tok) 75 | print("Ground Truth Sequence Perplexity: %f" % (ppl / tok_cnt)) 76 | 77 | return numpy.transpose(numpy.asarray(seqs, dtype="int32")) 78 | 79 | class MetaLangV2(): 80 | """ 81 | Pseudo Langauge Generated from RNN models 82 | V: vocabulary size 83 | d: embedding size (input size) 84 | n: n-gram 85 | N: hidden size 86 | e: inverse of softmax - temporature 87 | L: maximum length 88 | """ 89 | def __init__(self, L=4096): 90 | self.L = int(L) 91 | assert L>1 92 | self.task_set = False 93 | 94 | def set_task(self, task): 95 | self.nn = RandomNGram(task) 96 | self.task_set = True 97 | 98 | def data_generator(self, seed=None): 99 | if(self.task_set): 100 | tokens = self.nn.forward(self.L, seed=seed)[0] 101 | else: 102 | raise Exception("Please set task before using data generator") 103 | return tokens 104 | 105 | def batch_generator(self, batch_size, seed=None): 106 | if(self.task_set): 107 | tokens = self.nn.forward(self.L, batch=batch_size, seed=seed) 108 | else: 109 | raise Exception("Please set task before using data generator") 110 | return tokens -------------------------------------------------------------------------------- /xenoverse/metalang/metalangv1.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import _io 17 | import numpy 18 | from numpy import random 19 | 20 | class MetaLangV1(object): 21 | """ 22 | Meta Language Model 23 | MetaLM(V, n, l, e, L) generates n Exponential(1/l)-length sequences, which appears repeatedly until reaching length L 24 | Each time it repeats, we add a noise by replacing the number with e probablity 25 | V: Vocabulary Size 26 | l: mean repeating length 27 | e: noise ratio 28 | L: Sequence Length 29 | 30 | The task require the agent to recover the correct sequence. 31 | """ 32 | 33 | def __init__(self, L=2048): 34 | self.L = int(L) 35 | self.mask_ratio = 0.30 36 | self.task_set = False 37 | assert self.L > 1 38 | 39 | def add_noise(self, seq): 40 | """ 41 | Add noise to a sequence, return the new sequence 42 | """ 43 | noise_value = random.randint(0, self.n_vocab, size=(numpy.shape(seq)), dtype="int32") 44 | noise_ratio = (random.random(size=(numpy.shape(seq))) < self.error_ratio).astype("int32") 45 | mask_ratio = (random.random(size=(numpy.shape(seq))) < self.mask_ratio).astype("int32") 46 | diff = noise_value - seq 47 | diff = diff * (noise_ratio!=0).astype("int32") 48 | new_seq = seq + diff 49 | new_seq = new_seq * (1 - mask_ratio * noise_ratio) 50 | return new_seq 51 | 52 | def set_task(self, task): 53 | for key, val in task.items(): 54 | self.__dict__[key] = val 55 | self.task_set = True 56 | 57 | def data_generator(self, seed=None): 58 | if(not self.task_set): 59 | raise Exception("Please set task before using data generator") 60 | features = [] 61 | labels = [] 62 | cur_l = 0 63 | while cur_l < self.L + 1: 64 | seq = self.patterns[random.randint(0, self.n_patterns)] 65 | fea = self.add_noise(seq) 66 | sep = numpy.array([self.SepID], dtype="int32") 67 | features.append(fea) 68 | labels.append(seq) 69 | features.append(sep) 70 | labels.append(sep) 71 | cur_l += len(seq) + 1 72 | features = numpy.concatenate(features, axis=0).astype("int32") 73 | labels = numpy.concatenate(labels, axis=0).astype("int32") 74 | return features[:self.L], labels[1:(self.L+1)] 75 | 76 | def batch_generator(self, batch_size, seed=None): 77 | features = [] 78 | labels = [] 79 | for _ in range(batch_size): 80 | fea, lab = self.data_generator(seed=seed) 81 | features.append(fea.tolist()) 82 | labels.append(lab.tolist()) 83 | features = numpy.asarray(features) 84 | labels = numpy.asarray(labels) 85 | return features, labels 86 | 87 | def generate_text(self, size, output_stream): 88 | features, labels = self.batch_generator(size) 89 | if(isinstance(output_stream, _io.TextIOWrapper)): 90 | need_close = False 91 | elif(isinstance(output_stream, str)): 92 | output_stream = open(output_stream, "w") 93 | need_close = True 94 | for i in range(features.shape[0]): 95 | output_stream.write("\t".join(map(lambda x: "%d,%d"%(x[0],x[1]), zip(features[i].tolist(), labels[i].tolist())))) 96 | output_stream.write("\n") 97 | if(need_close): 98 | output_stream.close() 99 | 100 | def generate_npy(self, size, file_name): 101 | feas, labs = self.batch_generator(size) 102 | numpy.save(file, numpy.stack([feas, labs], axis=0)) 103 | 104 | @property 105 | def VocabSize(self): 106 | return self.n_vocab 107 | 108 | @property 109 | def SepID(self): 110 | return 0 111 | 112 | @property 113 | def MaskID(self): 114 | return 0 115 | 116 | @property 117 | def PaddingID(self): 118 | return 0 119 | -------------------------------------------------------------------------------- /xenoverse/mazeworld/agents/agent_base.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy 3 | from queue import Queue 4 | from copy import deepcopy 5 | from xenoverse.mazeworld.envs.dynamics import PI, DEFAULT_ACTION_SPACE_16, DEFAULT_ACTION_SPACE_32 6 | from xenoverse.mazeworld.envs.maze_env import MazeWorldContinuous3D 7 | 8 | 9 | class AgentBase(object): 10 | """ 11 | Base class for agents 12 | Use this as parent to create new rule based agents 13 | """ 14 | def __init__(self, **kwargs): 15 | self.render = False 16 | for k in kwargs: 17 | self.__dict__[k] = kwargs[k] 18 | if("maze_env" not in kwargs): 19 | raise Exception("Must use maze_env as arguments") 20 | 21 | # Initialize information 22 | self._cell_size = self.maze_env.maze_core._cell_size 23 | self._god_info = 1 - self.maze_env.maze_core._cell_walls + self.maze_env.maze_core._cell_landmarks 24 | self._landmarks_coordinates = self.maze_env.maze_core._landmarks_coordinates 25 | self._step_reward = self.maze_env.maze_core._step_reward 26 | self._nx, self._ny = self._god_info.shape 27 | self.neighbors = [(-1, 0), (1, 0), (0, 1), (0, -1), (-1, -1), (-1, 1), (1, -1), (1, 1)] 28 | self._landmarks_visit = dict() 29 | self._short_term_memory = list() 30 | self._action_space = self.maze_env.list_actions 31 | 32 | if(self._action_space is None): 33 | raise Exception("For smart agents, maze environment must use Discrete16 or Discrete32") 34 | 35 | if("short_term_memory_size" not in kwargs): 36 | self.short_term_memory_size = 3 37 | if("memory_keep_ratio" not in kwargs): 38 | self.memory_keep_ratio = 1.0 39 | self._long_term_memory = numpy.zeros((self._nx, self._ny), dtype=numpy.int8) 40 | 41 | # Render 42 | if(self.render): 43 | self.render_init() 44 | 45 | def render_init(self): 46 | raise NotImplementedError() 47 | 48 | def valid_neighbors(self, center=None, self_included=False, mask_included=True): 49 | if(center is None): 50 | cx, cy = self.maze_env.maze_core._agent_grid 51 | else: 52 | cx, cy = center 53 | valid_neighbors = [] 54 | if(self_included): 55 | valid_neighbors.append((0, 0)) 56 | for dx, dy in self.neighbors: 57 | nx = cx + dx 58 | ny = cy + dy 59 | if(nx < 0 or nx >= self._nx or ny < 0 or ny >= self._ny): 60 | continue 61 | if(not self._mask_info[nx, ny] and not mask_included): 62 | continue 63 | if(self._god_info[nx, ny] < 0 and self._mask_info[nx, ny]): 64 | continue 65 | if(dx * dy == 0): 66 | valid_neighbors.append((dx, dy)) 67 | else: 68 | if(self._god_info[nx, cy] > -1 and self._god_info[cx, ny] > -1 69 | and self._mask_info[nx, cy] and self._mask_info[cx, ny]): 70 | valid_neighbors.append((dx, dy)) 71 | return valid_neighbors 72 | 73 | def update_common_info(self): 74 | self._command = self.maze_env.maze_core._command 75 | 76 | # Update long and short term memory 77 | # Pop the eldest memory from short term memory and insert it to long term memory, but with losses. 78 | self._short_term_memory.append(numpy.copy(self.maze_env.maze_core._cell_exposed)) 79 | if(len(self._short_term_memory) > self.short_term_memory_size): 80 | to_longterm = self._short_term_memory.pop(0) 81 | long_term_keep = (numpy.random.rand(self._nx, self._ny) < self.memory_keep_ratio).astype(numpy.int8) 82 | self._long_term_memory = numpy.logical_or(self._long_term_memory, to_longterm * long_term_keep) 83 | 84 | # Calculate the current memory: include the long term and short term memory 85 | self._mask_info = numpy.copy(self._long_term_memory) 86 | for i in range(len(self._short_term_memory)): 87 | self._mask_info = numpy.logical_or(self._mask_info, self._short_term_memory[i]) 88 | 89 | self._agent_ori = self.maze_env.maze_core._agent_ori 90 | self._agent_loc = self.maze_env.maze_core._agent_loc 91 | self._cur_grid = deepcopy(self.maze_env.maze_core._agent_grid) 92 | self._cur_grid_float = deepcopy(self.maze_env.maze_core.get_loc_grid_float(self.maze_env.maze_core._agent_loc)) 93 | lid = self._god_info[self._cur_grid[0], self._cur_grid[1]] 94 | if(lid > 0): 95 | self._landmarks_visit[lid - 1] = 0 96 | 97 | def policy(self, observation, r): 98 | raise NotImplementedError() 99 | 100 | def render_update(self, observation): 101 | raise NotImplementedError() 102 | 103 | def step(self, observation, r): 104 | self.update_common_info() 105 | action = self.policy(observation, r) 106 | if(self.render): 107 | self.render_update(observation) 108 | return action -------------------------------------------------------------------------------- /xenoverse/anymdp/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Scalable, procedurally generated Markov Decision Processes (MDPs) and Partially Observable MDPs (POMDPs) are created by randomizing the reward matrix, transition matrix, and observation matrix. The library also supports POMDPs with multi-token observation and action spaces. 4 | 5 | # Download pre-generated datasets and tasks 6 | 7 | - Training DataSet (Large) with 512K sequences and 6B time steps can be downloaded from [here](https://www.kaggle.com/datasets/anonymitynobody/omnirl-training-data-d-large). 8 | - Online evaluating task set and static validation dataset for AnyMDP can be downloaded from [here](https://www.kaggle.com/datasets/anonymitynobody/omnirl-evaluation). 9 | 10 | # Install 11 | 12 | ```bash 13 | pip install xenoverse[anymdp] 14 | ``` 15 | 16 | #### For local installation, execute following commands: 17 | 18 | ```bash 19 | git clone https://github.com/FutureAGI/xenoverse 20 | cd xenoverse 21 | pip install .[anymdp] 22 | ``` 23 | 24 | # Quick Start 25 | 26 | ## Import 27 | 28 | Import and create the AnyMDP environment with 29 | ```python 30 | import gymnasium as gym 31 | import xenoverse.anymdp 32 | 33 | env = gym.make("anymdp-v0", max_steps=5000) 34 | ``` 35 | 36 | ## Sampling an AnyMDP task 37 | ```python 38 | from xenoverse.anymdp import AnyMDPTaskSampler 39 | 40 | task = AnyMDPTaskSampler( 41 | state_space:int=128, # size of maximum states 42 | action_space:int=5, # number of actions 43 | min_state_space:int=None, # size of minimum states, default is the same as state_space 44 | ) 45 | env.set_task(task) 46 | env.reset() 47 | ``` 48 | 49 | You might resample a MDP task by keeping the transitions unchanged but sample a new reward matrix by 50 | 51 | ```python 52 | from xenoverse.anymdp import Resampler 53 | new_task = Resampler(task) 54 | ``` 55 | 56 | ## New Features supporting POMDP and multi-token observation and action spaces (2025.12) 57 | ```python 58 | # Test POMDP Task Sampler 59 | task = AnyPOMDPTaskSampler(state_space=16, 60 | action_space=5, 61 | min_state_space=None, 62 | observation_space=16, 63 | density = 0.1, 64 | verbose=True) 65 | 66 | # Test Multi-token POMDP Task Sampler 67 | # Observation space = MultiDiscrete(observation_tokens) 68 | # Action space = MultiDiscrete(action_tokens) 69 | task = MultiTokensAnyPOMDPTaskSampler(state_space=128, 70 | action_space=5, 71 | min_state_space=None, 72 | observation_space=128, 73 | observation_tokens=4, 74 | action_tokens=2, 75 | density = 0.2, 76 | verbose=True) 77 | ``` 78 | Note that the entry point `env.set_task` is the same for MDP, POMDP, and multi-token POMDP. However, the files `anymdp_solver_mbrl.py` and `anymdp_solver_q.py` are not available for POMDP and multi-token POMDP tasks, while `anymdp_solver_opt.py` is available for all three types of tasks. We provide `test_ppo.py` for demonstration of PPO training on POMDP and multi-token POMDP tasks. 79 | 80 | ## Running the built-in MDP solver 81 | ```python 82 | from xenoverse.anymdp import AnyMDPSolverOpt 83 | 84 | solver = AnyMDPSolverOpt(env) # AnyMDPSolverOpt solves the MDP with ground truth rewards and transition matrix 85 | state, info = env.reset() 86 | terminated, truncated = False, False 87 | while not terminated and not truncated: 88 | action = solver.policy(state) 89 | state, reward, terminated, truncated, info = env.step(action) 90 | ``` 91 | 92 | In case you do not want the ground truth rewards and transition to be leaked to the agent, use the AnyMDPSolverOTS instead. This solver inplement a ideal environment modeling and a planning-based policy. 93 | 94 | ```python 95 | from xenoverse.anymdp import AnyMDPSolverQ, AnyMDPSolverMBRL 96 | 97 | # AnyMDPSolverOTS solves the MDP with dynamic model and dynamic programming 98 | solver = AnyMDPSolverMBRL(env) 99 | 100 | # AnyMDPSolverQ solves the MDP with Tabular Q-learning 101 | #solver = AnyMDPSolverQ(env) 102 | 103 | state, info = env.reset() 104 | terminated, truncated = False, False 105 | while not terminated and not truncated: 106 | action = solver.policy(state) 107 | state, reward, terminated, truncated, info = env.step(action) 108 | solver.learner(state, action, next_state, reward, terminated, truncated) # update the learner 109 | ``` 110 | 111 | ## Procedurely generating an MDP task 112 | 113 | The transition matrix and reward matrix are procedurally generated by [task_sampler](task_sampler.py). It is generated by randomly sample nodes in a high dimensional space. The transition matrix and the reward matrix are further checked to make sure it is a non-trival problem to be solved. 114 | 115 | You can visualize each task by runing ``anymdp_task_visualizer()`` from ``visualizer.py``. It visualizes the markov chain transition kernel and value functions. -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/maze_continuous_3d.py: -------------------------------------------------------------------------------- 1 | """ 2 | Core File of Maze Env 3 | """ 4 | import os 5 | import numpy 6 | import pygame 7 | import random 8 | import time 9 | from pygame import font 10 | from numpy import random as npyrnd 11 | from numpy.linalg import norm 12 | from xenoverse.mazeworld.envs.dynamics import PI, PI_2, PI_4, PI2d, vector_move_with_collision 13 | from xenoverse.mazeworld.envs.ray_caster_utils import maze_view 14 | from xenoverse.mazeworld.envs.task_sampler import MAZE_TASK_MANAGER 15 | from xenoverse.mazeworld.envs.maze_base import MazeBase 16 | from xenoverse.mazeworld.envs.ray_caster_utils import landmarks_rgb, landmarks_rgb_arr, paint_agent_arrow 17 | 18 | class MazeCoreContinuous3D(MazeBase): 19 | #Read Configurations 20 | def __init__( 21 | self, 22 | collision_dist=0.20, #collision distance 23 | visibility_3D=12.0, #agent vision range 24 | resolution_horizon = 320, #resolution in horizontal 25 | resolution_vertical = 320, #resolution in vertical 26 | max_steps = 5000, 27 | command_in_observation = False # whether instruction / command is in observation 28 | ): 29 | super(MazeCoreContinuous3D, self).__init__( 30 | collision_dist = collision_dist, 31 | visibility_3D = visibility_3D, 32 | resolution_horizon = resolution_horizon, 33 | resolution_vertical = resolution_vertical, 34 | max_steps = max_steps, 35 | command_in_observation = command_in_observation 36 | ) 37 | 38 | def reset(self): 39 | 40 | #add the navigation guidance bar 41 | self._navbar_l = 0.50 * self.resolution_vertical 42 | self._navbar_w = 0.05 * self.resolution_horizon 43 | 44 | self._navbar_start_x = 0.25 * self.resolution_vertical 45 | self._navbar_start_y = 0.10 * self.resolution_vertical 46 | 47 | return super(MazeCoreContinuous3D, self).reset() 48 | 49 | def do_action(self, action, delta_t=1.0): 50 | turn_rate, walk_speed = action 51 | turn_rate = numpy.clip(turn_rate, -1, 1) * PI 52 | walk_speed = numpy.clip(walk_speed, -1, 1) 53 | 54 | self._agent_ori, self._agent_loc, collide = vector_move_with_collision( 55 | self._agent_ori, self._agent_loc, turn_rate, walk_speed, delta_t, 56 | self._cell_walls, self._cell_size, self.collision_dist) 57 | self._collision_punish = self._collision_reward * collide 58 | self._agent_grid = self.get_loc_grid(self._agent_loc) 59 | reward, terminated, truncated = self.evaluation_rule() 60 | self.update_observation() 61 | 62 | return reward, terminated, truncated 63 | 64 | def render_init(self, view_size): 65 | super(MazeCoreContinuous3D, self).render_init(view_size) 66 | self._pos_conversion = self._render_cell_size / self._cell_size 67 | self._ori_size = 0.60 * self._pos_conversion 68 | 69 | def render_observation(self): 70 | # Paint Observation 71 | view_obs_surf = pygame.transform.scale(self._obs_surf, (self._view_size, self._view_size)) 72 | self._screen.blit(view_obs_surf, (0, 0)) 73 | 74 | 75 | def movement_control(self, keys): 76 | #Keyboard control cases 77 | turn_rate = None 78 | walk_speed = None 79 | time.sleep(0.01) 80 | if keys[pygame.K_LEFT] or keys[pygame.K_RIGHT] or keys[pygame.K_UP] or keys[pygame.K_DOWN]: 81 | turn_rate = 0.0 82 | walk_speed = 0.0 83 | if keys[pygame.K_LEFT]: 84 | turn_rate -= 0.1 85 | if keys[pygame.K_RIGHT]: 86 | turn_rate += 0.1 87 | if keys[pygame.K_UP]: 88 | walk_speed += 0.5 89 | if keys[pygame.K_DOWN]: 90 | walk_speed -= 0.5 91 | if keys[pygame.K_SPACE]: 92 | turn_rate = 0.0 93 | walk_speed = 0.0 94 | return turn_rate, walk_speed 95 | 96 | def update_observation(self): 97 | self._observation, self._cell_exposed = maze_view(numpy.array(self._agent_loc, dtype=numpy.float32), self._agent_ori, self._agent_height, 98 | self._cell_walls, self._cell_landmarks, self._cell_texts, self._cell_size, 99 | MAZE_TASK_MANAGER.textlib_walls, MAZE_TASK_MANAGER.textlib_grounds[self._ground_text], MAZE_TASK_MANAGER.textlib_ceilings[self._ceiling_text], 100 | self._wall_height, 1.0, self.visibility_3D, 0.20, 101 | self._fol_angle, self.resolution_horizon, self.resolution_vertical, landmarks_rgb_arr) 102 | if(self.command_in_observation): 103 | start_x = int(self._navbar_start_x) 104 | start_y = int(self._navbar_start_y) 105 | end_x = int(self._navbar_start_x + self._navbar_l) 106 | end_y = int(self._navbar_start_y + self._navbar_w) 107 | self._observation[start_x:end_x, start_y:end_y] = landmarks_rgb[self._command] 108 | 109 | self._command_rgb = landmarks_rgb[self._command] 110 | self._obs_surf = pygame.surfarray.make_surface(self._observation) 111 | 112 | def get_observation(self): 113 | return numpy.copy(self._observation.astype('uint8')) 114 | 115 | def get_info(self, info): 116 | info["command"] = self._command_rgb -------------------------------------------------------------------------------- /xenoverse/metacontrol/random_acrobot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gym Environment For Any MDP 3 | """ 4 | import numpy 5 | import gymnasium as gym 6 | import pygame 7 | from numpy import random 8 | from numba import njit 9 | from gymnasium import spaces 10 | from xenoverse.utils import pseudo_random_seed, versatile_sample 11 | from gymnasium.envs.classic_control.acrobot import AcrobotEnv 12 | from numpy import cos, pi, sin 13 | 14 | 15 | def sample_acrobot(link_length_1=True, 16 | link_length_2=True, 17 | link_mass_1=True, 18 | link_mass_2=True, 19 | link_com_1=True, 20 | link_com_2=True, 21 | gravity=True): 22 | # Sample a random acrobot task 23 | pseudo_random_seed(0) 24 | link_length_1 = versatile_sample(link_length_1, (0.5, 3.0), 1.0) 25 | link_length_2 = versatile_sample(link_length_2, (0.5, 3.0), 1.0) 26 | link_mass_1 = versatile_sample(link_mass_1, (0.5, 3.0), 1.0) 27 | link_mass_2 = versatile_sample(link_mass_2, (0.5, 3.0), 1.0) 28 | link_com_1 = versatile_sample(link_com_1, (0.25, 0.75), 0.5) * link_length_1 29 | link_com_2 = versatile_sample(link_com_2, (0.25, 0.75), 0.5) * link_length_2 30 | gravity = versatile_sample(gravity, (1.0, 15.0), 9.8) 31 | 32 | return { 33 | "link_length_1": link_length_1, 34 | "link_length_2": link_length_2, 35 | "link_mass_1": link_mass_1, 36 | "link_mass_2": link_mass_2, 37 | "link_com_1": link_com_1, 38 | "link_com_2": link_com_2, 39 | "gravity": gravity 40 | } 41 | 42 | class RandomAcrobotEnv(AcrobotEnv): 43 | 44 | def __init__(self, *args, **kwargs): 45 | """ 46 | Pay Attention max_steps might be reseted by task settings 47 | """ 48 | self.frameskip = kwargs.get("frameskip", 5) 49 | self.reset_bounds_scale = kwargs.get("reset_bounds_scale", 0.10) 50 | if(isinstance(self.reset_bounds_scale, list)): 51 | assert len(self.reset_bounds_scale) == 4, "reset_bounds_scale should be a list of 4 elements" 52 | self.reset_bounds_scale = numpy.array(self.reset_bounds_scale) 53 | kwargs.pop("reset_bounds_scale", None) 54 | kwargs.pop("frameskip", None) 55 | super().__init__(*args, **kwargs) 56 | 57 | # Rewrite the dynamics for acrobot 58 | def _dsdt(self, s_augmented): 59 | m1 = self.link_mass_1 60 | m2 = self.link_mass_2 61 | l1 = self.link_length_1 62 | lc1 = self.link_com_1 63 | lc2 = self.link_com_2 64 | I1 = self.link_mass_1 * (self.link_com_1**2 + (self.link_length_1 - self.link_com_1)**2) / 6.0 65 | I2 = self.link_mass_2 * (self.link_com_2**2 + (self.link_length_2 - self.link_com_2)**2) / 6.0 66 | g = self.gravity 67 | a = s_augmented[-1] 68 | s = s_augmented[:-1] 69 | theta1 = s[0] 70 | theta2 = s[1] 71 | dtheta1 = s[2] 72 | dtheta2 = s[3] 73 | d1 = m1 * lc1**2 + m2 * (l1**2 + lc2**2 + 2 * l1 * lc2 * cos(theta2)) + I1 + I2 74 | d2 = m2 * (lc2**2 + l1 * lc2 * cos(theta2)) + I2 75 | phi2 = m2 * lc2 * g * cos(theta1 + theta2 - pi / 2.0) 76 | phi1 = ( 77 | -m2 * l1 * lc2 * dtheta2**2 * sin(theta2) 78 | - 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * sin(theta2) 79 | + (m1 * lc1 + m2 * l1) * g * cos(theta1 - pi / 2) 80 | + phi2 81 | ) 82 | if self.book_or_nips == "nips": 83 | # the following line is consistent with the description in the 84 | # paper 85 | ddtheta2 = (a + d2 / d1 * phi1 - phi2) / (m2 * lc2**2 + I2 - d2**2 / d1) 86 | else: 87 | # the following line is consistent with the java implementation and the 88 | # book 89 | ddtheta2 = ( 90 | a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1**2 * sin(theta2) - phi2 91 | ) / (m2 * lc2**2 + I2 - d2**2 / d1) 92 | ddtheta1 = -(d2 * ddtheta2 + phi1) / d1 93 | return dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0 94 | 95 | def _terminal(self): 96 | s = self.state 97 | assert s is not None, "Call reset before using AcrobotEnv object." 98 | return bool(-cos(s[0]) - cos(s[1] + s[0]) > self.link_length_1) 99 | 100 | def set_task(self, task_config): 101 | print("Setting task with config:", task_config) 102 | for key, value in task_config.items(): 103 | setattr(self, key, value) 104 | 105 | def step(self, action): 106 | total_reward = 0 107 | terminated = False 108 | truncated = False 109 | for _ in range(self.frameskip): 110 | obs, reward, terminated, truncated, info = super().step(action) 111 | total_reward += reward 112 | if terminated or truncated: 113 | break 114 | return obs, total_reward, terminated, truncated, info 115 | 116 | def reset(self, *, seed: int | None = None, options: dict | None = None): 117 | super().reset(seed=seed) 118 | # Note that if you use custom reset bounds, it may lead to out-of-bound 119 | # state/observations. 120 | self.state = random.uniform(low=-1, high=1, size=(4,)).astype( 121 | numpy.float32 122 | ) * self.reset_bounds_scale 123 | 124 | if self.render_mode == "human": 125 | super().render() 126 | return super()._get_ob(), {} -------------------------------------------------------------------------------- /xenoverse/anyhvac/anyhvac_env_vis.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import numpy 3 | import time 4 | from .anyhvac_env import HVACEnv 5 | from pygame import font 6 | 7 | class HVACEnvVisible(HVACEnv): 8 | def __init__(self, *args, **kwargs): 9 | super().__init__(*args, **kwargs) 10 | self.empty_region = 20 11 | 12 | def reset(self, *args, **kwargs): 13 | res =super().reset(*args, **kwargs) 14 | self.render_init(render_size=640) 15 | self.keyboard_press = pygame.key.get_pressed() 16 | return res 17 | 18 | def step(self, actions): 19 | observation, reward, terminated, truncated, info = super().step(actions) 20 | 21 | keydone, _ = self.render_update(info["heat_power"], info['cool_power'], info["chtc_array"]) 22 | truncated = truncated or keydone 23 | return observation, reward, terminated, truncated, info 24 | 25 | 26 | def render_init(self, render_size=640): 27 | """ 28 | Initialize a God View With Landmarks 29 | """ 30 | font.init() 31 | self._font = font.SysFont("Arial", 18) 32 | self.render_size = render_size 33 | 34 | #Initialize the agent drawing 35 | self._render_cell_size = (self.render_size - self.empty_region) // max(self.n_width, self.n_length) 36 | self._render_w = self.n_width * self._render_cell_size 37 | self._render_h = self.n_length * self._render_cell_size 38 | self.render_origin_w = (self.render_size - self._render_w) // 2 39 | self.render_origin_h = self.render_size - (self.render_size - self._render_h) // 2 40 | 41 | self._screen = pygame.display.set_mode((self.render_size, self.render_size)) 42 | self._screen.fill(pygame.Color("white")) 43 | 44 | pygame.display.set_caption("HVAC Render") 45 | 46 | def render_update(self, heaters, actuators, chtc): 47 | """ 48 | Update the God View with new data 49 | """ 50 | if not hasattr(self, "_screen"): 51 | raise RuntimeError("Render is not initialized yet.") 52 | 53 | def colorbar(v, vmin=-10, vmax=100): 54 | return int(max(0, min(1.0, (v - vmin) / (vmax - vmin))) * 255) 55 | 56 | def radius_normalizer(v, vmin=0, vmax=10000, min_pixels=1, max_pixels=10): 57 | return int(max(0, (v - vmin) / (vmax - vmin)) * (max_pixels - min_pixels) + min_pixels) 58 | 59 | # Paint ambient temerature 60 | r = colorbar(self.ambient_temp) 61 | self._screen.fill(pygame.Color(r, 0, 255 - r, 128)) 62 | 63 | # paint room temperature 64 | for i in range(self.n_width): 65 | for j in range(self.n_length): 66 | x = self.render_origin_w + i * self._render_cell_size 67 | y = self.render_origin_h - (j + 1) * self._render_cell_size 68 | rect = pygame.Rect(x, y, self._render_cell_size, self._render_cell_size) 69 | r = colorbar(self.state[i][j]) 70 | color = pygame.Color(r, 0, 255 - r, 128) 71 | pygame.draw.rect(self._screen, color, rect) 72 | 73 | # paint heaters 74 | for i, equip in enumerate(self.equipments): 75 | pixels = ((equip.loc / self.cell_size) * self._render_cell_size).astype(int) 76 | r = radius_normalizer(heaters[i], vmax=10000) 77 | xs = pixels[0] + self.render_origin_w 78 | ys = self.render_origin_h - pixels[1] 79 | pygame.draw.circle(self._screen, pygame.Color(255,0,0,255), (xs,ys), r, width=0) 80 | 81 | # paint coolers 82 | for i, cooler in enumerate(self.coolers): 83 | pixels = ((cooler.loc / self.cell_size) * self._render_cell_size).astype(int) 84 | r = radius_normalizer(actuators[i], vmin=0, vmax=10000) 85 | xs = pixels[0] + self.render_origin_w 86 | ys = self.render_origin_h - pixels[1] 87 | pygame.draw.circle(self._screen, pygame.Color(0,255,0,255), (xs,ys),r, width=0) 88 | 89 | # paint chtc 90 | for i in range(self.n_width + 1): 91 | for j in range(self.n_length + 1): 92 | xs = self.render_origin_w + i * self._render_cell_size 93 | ys = self.render_origin_h - j * self._render_cell_size 94 | xe0 = self.render_origin_w + i * self._render_cell_size 95 | ye0 = self.render_origin_h - (j + 1) * self._render_cell_size 96 | xe1 = self.render_origin_w + (i + 1) * self._render_cell_size 97 | ye1 = self.render_origin_h - j * self._render_cell_size 98 | alpha0 = colorbar(chtc[i][j][0], vmin=0, vmax=50) 99 | alpha1 = colorbar(chtc[i][j][1], vmin=0, vmax=50) 100 | width0 = 1 101 | width1 = 1 102 | if(chtc[i][j][0] < 5): 103 | alpha0 = 0 104 | width0 = 5 105 | if(chtc[i][j][1] < 5): 106 | alpha1 = 0 107 | width1 = 5 108 | if(j < self.n_length): 109 | pygame.draw.line(self._screen, pygame.Color(alpha0,alpha0,alpha0), (xs,ys), (xe0,ye0), width=width0) 110 | if(i < self.n_width): 111 | pygame.draw.line(self._screen, pygame.Color(alpha1,alpha1,alpha1), (xs,ys), (xe1,ye1), width=width1) 112 | 113 | pygame.display.update() 114 | done = False 115 | for event in pygame.event.get(): 116 | if event.type == pygame.QUIT: 117 | done=True 118 | keys = pygame.key.get_pressed() 119 | 120 | return done, keys -------------------------------------------------------------------------------- /xenoverse/anymdp/test_ppo.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | from gymnasium import spaces 3 | import numpy as np 4 | from stable_baselines3 import PPO 5 | from sb3_contrib import RecurrentPPO 6 | from stable_baselines3.common.env_checker import check_env 7 | from stable_baselines3.common.evaluation import evaluate_policy 8 | from stable_baselines3.common.monitor import Monitor 9 | from xenoverse.utils import dump_task, load_task 10 | import gymnasium as gym 11 | import xenoverse.anymdp 12 | from xenoverse.anymdp import AnyMDPTaskSampler, GarnetTaskSampler, AnyPOMDPTaskSampler, MultiTokensAnyPOMDPTaskSampler 13 | from xenoverse.anymdp.test_utils import train 14 | import argparse 15 | 16 | 17 | 18 | def create_env(task): 19 | """创建并包装环境""" 20 | env = gym.make("anymdp-v0") 21 | env.set_task(task) 22 | 23 | # 检查环境是否符合 Gym 规范(重要!) 24 | check_env(env, warn=True) 25 | 26 | # 使用 Monitor 包装以记录统计信息 27 | env = Monitor(env) 28 | return env 29 | 30 | def train_multi_discrete_ppo(task): 31 | """训练 PPO 模型""" 32 | 33 | # 创建向量化环境(SB3 推荐) 34 | from stable_baselines3.common.vec_env import DummyVecEnv 35 | 36 | # 可以并行多个环境加速训练 37 | n_envs = 4 38 | env = DummyVecEnv([lambda:create_env(task) for _ in range(n_envs)]) 39 | 40 | # 配置 PPO 超参数 41 | model = RecurrentPPO( 42 | policy="MlpLstmPolicy", # 多层感知机策略 43 | env=env, 44 | learning_rate=3e-4, # 学习率 45 | n_steps=2048, # 每个环境采集的步数 46 | batch_size=256, # 小批量大小 47 | n_epochs=10, # 每次更新的训练轮数 48 | gamma=0.99, # 折扣因子 49 | gae_lambda=0.95, # GAE参数 50 | clip_range=0.2, # PPO裁剪范围 51 | verbose=1, # 打印训练信息 52 | tensorboard_log="./ppo_multi_discrete_tensorboard/", # TensorBoard日志 53 | device="auto" # 自动选择设备(CPU/GPU) 54 | ) 55 | 56 | # 开始训练 57 | total_timesteps = 500_000 # 总训练步数 58 | print(f"\n开始训练,总步数: {total_timesteps}") 59 | model.learn(total_timesteps=total_timesteps, progress_bar=True) 60 | 61 | # 保存模型 62 | model.save("ppo_multi_discrete_model") 63 | print("模型已保存: ppo_multi_discrete_model.zip") 64 | 65 | env.close() 66 | return model 67 | 68 | 69 | def evaluate_model(model_path=None, n_eval_episodes=10): 70 | """评估训练好的模型""" 71 | 72 | # 加载模型或直接使用传入的模型 73 | if model_path: 74 | model = PPO.load(model_path) 75 | else: 76 | model = train_multi_discrete_ppo() 77 | 78 | # 创建评估环境 79 | eval_env = create_env() 80 | 81 | # 使用 SB3 内置评估函数 82 | mean_reward, std_reward = evaluate_policy( 83 | model, 84 | eval_env, 85 | n_eval_episodes=n_eval_episodes, 86 | deterministic=True, # 使用确定性策略评估 87 | render=False 88 | ) 89 | 90 | print("\n" + "="*50) 91 | print(f"评估结果({n_eval_episodes} 回合):") 92 | print(f"平均奖励: {mean_reward:.2f} ± {std_reward:.2f}") 93 | print("="*50) 94 | 95 | # 可视化一个回合 96 | print("\n可视化一个评估回合...") 97 | obs, _ = eval_env.reset() 98 | total_reward = 0 99 | for step in range(100): 100 | action, _states = model.predict(obs, deterministic=True) 101 | obs, reward, terminated, truncated, info = eval_env.step(action) 102 | total_reward += reward 103 | print(f"Step {step}: Action {action}, Reward {reward:.3f}, Obs {obs[:2]}") 104 | if terminated or truncated: 105 | break 106 | 107 | print(f"回合总奖励: {total_reward:.2f}") 108 | eval_env.close() 109 | 110 | return model 111 | 112 | 113 | if __name__ == "__main__": 114 | 115 | parser = argparse.ArgumentParser() 116 | parser.add_argument("--task", type=str, default=None, help="task file") 117 | args = parser.parse_args() 118 | if(args.task != None): 119 | task = load_task(args.task) 120 | else: 121 | """ 122 | # Test MDP Task Sampler 123 | task = AnyMDPTaskSampler(state_space=16, 124 | action_space=5, 125 | min_state_space=None, 126 | verbose=True) 127 | # Test Garnet Task Sampler 128 | task = GarnetTaskSampler(state_space=16, 129 | action_space=5, 130 | min_state_space=None, 131 | verbose=True) 132 | # Test POMDP Task Sampler 133 | task = AnyPOMDPTaskSampler(state_space=16, 134 | action_space=5, 135 | min_state_space=None, 136 | observation_space=16, 137 | density = 0.1, 138 | verbose=True) 139 | 140 | task = MultiTokensAnyPOMDPTaskSampler(state_space=128, 141 | action_space=5, 142 | min_state_space=None, 143 | observation_space=32, 144 | observation_tokens=4, 145 | action_tokens=1, 146 | density = 0.1, 147 | verbose=True) 148 | """ 149 | 150 | task = MultiTokensAnyPOMDPTaskSampler(state_space=128, 151 | action_space=5, 152 | min_state_space=None, 153 | observation_space=32, 154 | observation_tokens=4, 155 | action_tokens=1, 156 | density = 0.1, 157 | verbose=True) 158 | 159 | 160 | # 选项1: 仅训练 161 | model = train_multi_discrete_ppo(task) 162 | 163 | # 选项2: 训练并评估 164 | model = evaluate_model(n_eval_episodes=5) 165 | 166 | # 选项3: 加载已有模型并评估 167 | # model = evaluate_model(model_path="ppo_multi_discrete_model.zip", n_eval_episodes=5) 168 | 169 | print("\n训练与评估完成!") -------------------------------------------------------------------------------- /xenoverse/anymdp/task_sampler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Any MDP Task Sampler 3 | """ 4 | import numpy 5 | import scipy.sparse as sp 6 | from numpy import random 7 | from copy import deepcopy 8 | from xenoverse.utils import pseudo_random_seed 9 | from xenoverse.anymdp.solver import check_valuefunction 10 | from xenoverse.utils import RandomFourier 11 | from xenoverse.anymdp.task_sampler_utils import sample_bandit, sample_mdp, sample_garnet 12 | 13 | eps = 1e-10 14 | 15 | def AnyMDPTaskSampler(state_space:int=64, 16 | action_space:int=5, 17 | min_state_space:int=None, 18 | seed=None, 19 | verbose=False): 20 | # Sampling Transition Matrix and Reward Matrix based on Irwin-Hall Distribution and Gaussian Distribution 21 | if(seed is not None): 22 | random.seed(seed) 23 | else: 24 | random.seed(pseudo_random_seed()) 25 | 26 | assert(state_space >= 8 or state_space == 1),"State Space must be at least 8 or 1 (Multi-armed Bandit)!" 27 | 28 | if(state_space < 2): 29 | max_steps = 1 30 | else: 31 | lower_bound = max(4.0 * state_space, 100) 32 | upper_bound = max(min(8.0 * state_space, 500), lower_bound + 1) 33 | max_steps = random.uniform(lower_bound, upper_bound) 34 | 35 | # Sample a subset of states 36 | if(min_state_space is None): 37 | min_state_space = state_space 38 | real_state_space = state_space 39 | else: 40 | min_state_space = min(min_state_space, state_space) 41 | assert(min_state_space >= 8), "Minimum State Space must be at least 8!" 42 | real_state_space = random.randint(min_state_space, state_space + 1) 43 | state_mapping = numpy.random.permutation(state_space)[:real_state_space] 44 | 45 | # Generate Transition Matrix While Check its Quality 46 | task = {"ns": state_space, 47 | "na": action_space, 48 | "max_steps": max_steps, 49 | "state_mapping": state_mapping, 50 | "task_type": "MDP"} 51 | 52 | while(True): 53 | if(real_state_space == 1): 54 | task.update(sample_bandit(action_space)) 55 | break 56 | else: 57 | res = sample_mdp(real_state_space, action_space, max_steps, verbose=verbose) 58 | if(res is not None): 59 | task.update(res) 60 | if(check_valuefunction(task, verbose=verbose)): 61 | break 62 | elif(verbose): 63 | print("Failed to generate valid MDP, trying again...") 64 | 65 | return task 66 | 67 | def AnyPOMDPTaskSampler( 68 | state_space:int=64, 69 | action_space:int=5, 70 | min_state_space:int=None, 71 | observation_space:int=64, 72 | density=0.20, 73 | maximum_distribution=4, 74 | seed=None, 75 | verbose=False): 76 | 77 | task = AnyMDPTaskSampler(state_space, action_space, min_state_space, seed, verbose) 78 | task["no"] = observation_space 79 | task["task_type"] = "POMDP" 80 | real_states = task["state_mapping"].shape[0] 81 | density = min(density, maximum_distribution / observation_space) 82 | obs_mat=sp.random(real_states, observation_space, density=density, format='csr').toarray() 83 | for i in range(real_states): 84 | if(numpy.sum(obs_mat[i]) == 0): 85 | obs_mat[i][random.randint(observation_space)] = 1 86 | obs_mat[i] /= numpy.sum(obs_mat[i]) 87 | task["observation_transition"] = obs_mat 88 | return task 89 | 90 | def MultiTokensAnyPOMDPTaskSampler( 91 | state_space:int=256, 92 | action_space:int=5, 93 | min_state_space:int=None, 94 | observation_space:int=64, 95 | observation_tokens:int=4, 96 | action_tokens:int=2, 97 | density=0.20, 98 | maximum_distribution=4, 99 | seed=None, 100 | verbose=False): 101 | 102 | task = AnyMDPTaskSampler(state_space, action_space, min_state_space, seed, verbose) 103 | task["no"] = observation_space 104 | task["do"] = observation_tokens 105 | task["da"] = action_tokens 106 | task["task_type"] = "MTPOMDP" 107 | real_states = task["state_mapping"].shape[0] 108 | task["observation_transition"] = [] 109 | 110 | for obs_tok in range(observation_tokens): 111 | density = min(density, maximum_distribution / observation_space) 112 | obs_mat=sp.random(real_states, observation_space, density=density, format='csr').toarray() 113 | for i in range(real_states): 114 | if(numpy.sum(obs_mat[i]) == 0): 115 | obs_mat[i][random.randint(observation_space)] = 1 116 | obs_mat[i] /= numpy.sum(obs_mat[i]) 117 | task["observation_transition"].append(obs_mat) 118 | return task 119 | 120 | def GarnetTaskSampler(state_space:int=128, 121 | action_space:int=5, 122 | min_state_space:int=None, 123 | b:int=2, 124 | sigma:float=0.1, 125 | seed=None, 126 | verbose=False): 127 | # Sampling Transition Matrix and Reward Matrix based on Irwin-Hall Distribution and Gaussian Distribution 128 | if(seed is not None): 129 | random.seed(seed) 130 | else: 131 | random.seed(pseudo_random_seed()) 132 | 133 | assert(state_space >= 8 or state_space == 1),"State Space must be at least 8 or 1 (Multi-armed Bandit)!" 134 | 135 | if(state_space < 2): 136 | max_steps = 1 137 | else: 138 | lower_bound = max(4.0 * state_space, 100) 139 | upper_bound = max(min(8.0 * state_space, 500), lower_bound + 1) 140 | max_steps = random.uniform(lower_bound, upper_bound) 141 | 142 | # Sample a subset of states 143 | if(min_state_space is None): 144 | min_state_space = state_space 145 | real_state_space = state_space 146 | else: 147 | min_state_space = min(min_state_space, state_space) 148 | assert(min_state_space >= 8), "Minimum State Space must be at least 8!" 149 | real_state_space = random.randint(min_state_space, state_space + 1) 150 | state_mapping = numpy.random.permutation(state_space)[:real_state_space] 151 | 152 | # Generate Transition Matrix While Check its Quality 153 | task = {"ns": state_space, 154 | "na": action_space, 155 | "max_steps": max_steps, 156 | "state_mapping": state_mapping} 157 | 158 | task.update(sample_garnet(real_state_space, action_space, max_steps, b, sigma, verbose=verbose)) 159 | 160 | return task -------------------------------------------------------------------------------- /xenoverse/utils/grid_ops.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy import random 3 | from attr import attrs, attrib 4 | from .tools import conv2d_numpy 5 | 6 | """ 7 | Generating Topology For A Maze 8 | """ 9 | 10 | @attrs 11 | class Rectangle(object): 12 | lb = attrib(type=list, default=None) 13 | rt = attrib(type=list, default=None) 14 | 15 | def resample(self, cells, max_size=10, min_size=2): 16 | m_x, m_y = cells.shape 17 | 18 | # Sample the rectangle width and length first 19 | w_x = random.randint(min_size, max_size + 1) 20 | w_y = random.randint(min_size, max_size + 1) 21 | 22 | kernel = numpy.ones((w_x, w_y), dtype="float32") 23 | 24 | # Use convolution to make sure no overlapping in rectangles 25 | overlap = conv2d_numpy(cells, kernel, stride=(1, 1)) 26 | freerows, freecols = numpy.where(overlap < 0.5) 27 | 28 | if(freerows.shape[0] > 0): 29 | sel_idx = random.randint(0, freerows.shape[0]) 30 | else: 31 | return False 32 | 33 | self.lb = [freerows[sel_idx], freecols[sel_idx]] 34 | self.rt = [self.lb[0] + w_x - 1, self.lb[1] + w_y - 1] 35 | return True 36 | 37 | def refresh_rectangle(self, cells): 38 | m_x, m_y = cells.shape 39 | b_x = max(0, self.lb[0] - 1) 40 | b_y = max(0, self.lb[1] - 1) 41 | e_x = min(m_x, self.rt[0] + 2) 42 | e_y = min(m_y, self.rt[1] + 2) 43 | cells[b_x:e_x, b_y:e_y] = 1 44 | 45 | def refresh_occupancy(self, cells): 46 | self.refresh_rectangle(cells) 47 | cells[self.lb[0]:(self.rt[0] + 1), self.lb[1]:(self.rt[1] + 1)] = 0 48 | 49 | def genmaze_largeroom(n, room_number, room_size=(2,4)): 50 | cells_occ = numpy.zeros((n-2, n-2), dtype=numpy.int8) 51 | cells_wall = numpy.ones((n-2, n-2), dtype=numpy.int8) 52 | rects = [] 53 | 54 | # TRY PUT 6 RECTANGLES 55 | max_try = 5 56 | for _ in range(room_number): 57 | rect = Rectangle() 58 | for _ in range(max_try): 59 | is_succ = rect.resample(cells_occ, min_size=room_size[0], max_size=room_size[1]) 60 | if(is_succ): 61 | rect.refresh_rectangle(cells_occ) 62 | rects.append(rect) 63 | break 64 | for rect in rects: 65 | rect.refresh_occupancy(cells_wall) 66 | 67 | cell_occs = numpy.ones((n, n), dtype=numpy.int8) 68 | cell_walls = numpy.ones((n, n), dtype=numpy.int8) 69 | cell_occs[1:n-1, 1:n-1] = cells_occ 70 | cell_walls[1:n-1, 1:n-1] = cells_wall 71 | 72 | return cell_occs, cell_walls, rects 73 | 74 | def genmaze_by_primwall(n, allow_loops=True, wall_density=0.30): 75 | # Dig big rooms in the region 76 | cell_occs, cell_walls, rects = genmaze_largeroom(n, random.randint(0, (n - 2) ** 2 // 16)) 77 | 78 | # Dig the initial holes 79 | for i in range(1, n, 2): 80 | for j in range(1, n, 2): 81 | if(not cell_occs[i,j]): 82 | cell_walls[i,j] = 0 83 | 84 | #Initialize the logics for prim based maze generation 85 | wall_dict = dict() 86 | path_dict = dict() 87 | rev_path_dict = dict() 88 | path_idx = 0 89 | for i in range(1, n - 1): 90 | for j in range(1, n - 1): 91 | if(cell_walls[i,j]): # we will keep the axial point 92 | wall_dict[i, j] = 0 93 | elif(not cell_occs[i,j]): 94 | path_dict[i,j] = path_idx 95 | rev_path_dict[path_idx] = [(i,j)] 96 | path_idx += 1 97 | for rect in rects: 98 | xb = rect.lb[0] + 1 99 | yb = rect.lb[1] + 1 100 | xe = rect.rt[0] + 2 101 | ye = rect.rt[1] + 2 102 | rev_path_dict[path_idx] = [] 103 | for i in range(xb, xe): 104 | for j in range(yb, ye): 105 | path_dict[i,j] = path_idx 106 | rev_path_dict[path_idx].append((i, j)) 107 | path_idx += 1 108 | 109 | #Prim the wall until all points are connected 110 | max_cell_walls = numpy.prod(cell_walls[1:-1, 1:-1].shape) 111 | while len(rev_path_dict) > 1 or (allow_loops and numpy.sum(cell_walls[1:-1, 1:-1]) > max_cell_walls * wall_density): 112 | wall_list = list(wall_dict.keys()) 113 | random.shuffle(wall_list) 114 | for i, j in wall_list: 115 | new_path_id = -1 116 | connected_path_id = dict() 117 | abandon_path_id = dict() 118 | max_duplicate = 1 119 | 120 | for d_i, d_j in [(i - 1, j), (i + 1, j), (i, j - 1), (i, j + 1)]: 121 | if((d_i > 0 and d_i < n and d_j > 0 and d_j < n) 122 | and cell_walls[d_i, d_j] < 1): 123 | # calculate duplicate path id that might creat a loop 124 | if path_dict[d_i, d_j] not in connected_path_id: 125 | connected_path_id[path_dict[d_i, d_j]] = 1 126 | else: 127 | connected_path_id[path_dict[d_i, d_j]] += 1 128 | if(connected_path_id[path_dict[d_i, d_j]] > max_duplicate): 129 | max_duplicate = connected_path_id[path_dict[d_i, d_j]] 130 | 131 | # decide the new path_id and find those to be deleted 132 | if(path_dict[d_i, d_j] < new_path_id or new_path_id < 0): 133 | if(new_path_id >= 0): 134 | abandon_path_id[new_path_id] = (new_i, new_j) 135 | new_path_id = path_dict[d_i, d_j] 136 | new_i = d_i 137 | new_j = d_j 138 | elif(path_dict[d_i, d_j] != new_path_id): # need to be abandoned 139 | abandon_path_id[path_dict[d_i, d_j]] = (d_i, d_j) 140 | if(len(abandon_path_id) >= 1 and max_duplicate < 2): 141 | break 142 | if(len(abandon_path_id) >= 1 and max_duplicate > 1 and allow_loops): 143 | break 144 | if(allow_loops and len(rev_path_dict) < 2 and random.random() < 0.2): 145 | break 146 | 147 | if(new_path_id < 0): 148 | continue 149 | 150 | # add the released wall 151 | rev_path_dict[new_path_id].append((i,j)) 152 | path_dict[i,j] = new_path_id 153 | cell_walls[i,j] = 0 154 | del wall_dict[i,j] 155 | 156 | # merge the path 157 | for path_id in abandon_path_id: 158 | rev_path_dict[new_path_id].extend(rev_path_dict[path_id]) 159 | for t_i_o, t_j_o in rev_path_dict[path_id]: 160 | path_dict[t_i_o,t_j_o] = new_path_id 161 | del rev_path_dict[path_id] 162 | return cell_walls 163 | 164 | if __name__=="__main__": 165 | print(genmaze_by_primwall(15).astype("int8")) 166 | -------------------------------------------------------------------------------- /xenoverse/anymdp/visualizer.py: -------------------------------------------------------------------------------- 1 | """ 2 | AnyMDP Task Visualization 3 | """ 4 | 5 | import numpy as np 6 | import numpy 7 | import matplotlib.pyplot as plt 8 | import matplotlib.patches as mpatches 9 | from copy import deepcopy 10 | from xenoverse.anymdp.solver import update_value_matrix 11 | from xenoverse.anymdp.anymdp_env import map_transition_reward 12 | 13 | 14 | def anymdp_task_visualizer(task, 15 | need_lengends=True, 16 | need_ticks=True, 17 | show_gui=True, 18 | file_path=None): 19 | # 创建一个图形和坐标轴 20 | fig, ax = plt.subplots(figsize=(8, 8)) 21 | 22 | ns = task["ns"] 23 | na = task["na"] 24 | 25 | transition = task["transition"] 26 | reward = task["reward"] 27 | 28 | s_0 = task["s_0"] 29 | s_e = task["s_e"] 30 | 31 | state_mapping = task["state_mapping"] 32 | state_mapping = [str(state_mapping[i]) for i in range(ns)] 33 | 34 | vm = np.zeros((ns, na)) 35 | vm = update_value_matrix(task["transition"], task["reward"], 0.99, vm) 36 | vsm = np.max(vm, axis=-1) 37 | 38 | if(need_ticks): 39 | ax.set_xticks(np.arange(- 0.5, ns + 0.5)) 40 | ax.set_yticks(np.arange(- 0.5, ns + 0.5)) 41 | ax.set_xticklabels([''] + state_mapping) 42 | ax.set_yticklabels([''] + state_mapping) 43 | else: 44 | ax.set_xticks([]) 45 | ax.set_yticks([]) 46 | 47 | ax.set_xlim(0, ns) 48 | ax.set_ylim(0, ns) 49 | 50 | ax.tick_params(axis='both', which='both', length=0) 51 | 52 | trans_ss = np.mean(transition, axis=1) 53 | r_position = np.mean(reward, axis=(0, 1)) 54 | 55 | for i in range(ns): # State From 56 | for j in range(ns): # State To 57 | alpha = min(trans_ss[i, j] * 5.0, 1.0) 58 | rect = plt.Rectangle((j, i), 1, 1, facecolor='grey', alpha=alpha, edgecolor='none') 59 | ax.add_patch(rect) 60 | 61 | # Start states 62 | for s in s_0: 63 | rect = plt.Rectangle((0, s), ns, 1, facecolor='green', alpha=0.25, edgecolor='none') 64 | ax.add_patch(rect) 65 | 66 | # End states 67 | for s in s_e: 68 | if(s >= ns-1): 69 | color = 'blue' 70 | alpha = 0.40 71 | else: 72 | color = 'red' 73 | alpha = 0.20 74 | 75 | rect = plt.Rectangle((0, s), ns, 1, facecolor=color, alpha=alpha, edgecolor='none') 76 | ax.add_patch(rect) 77 | rect = plt.Rectangle((s, 0), 1, ns, facecolor=color, alpha=alpha, edgecolor='none') 78 | ax.add_patch(rect) 79 | 80 | ax.set_xlabel('State ($t+1$)', fontsize=12, fontweight='bold') 81 | ax.set_ylabel('State ($t$)', fontsize=12, fontweight='bold') 82 | 83 | lw = 24 / (ns + 16) 84 | for i in range(ns + 1): 85 | ax.axhline(y=i, color='black', linewidth=lw) 86 | ax.axvline(x=i, color='black', linewidth=lw) 87 | 88 | # Plot the value function 89 | nonpitfalls = np.array([i for i in range(ns) if i not in s_e]) 90 | 91 | v_max = np.max(vsm[nonpitfalls]) 92 | v_min = np.min(vsm[nonpitfalls]) 93 | 94 | scale = (v_max - v_min) * 0.05 95 | ax_v = ax.twinx() 96 | ax_v.set_ylim(v_min - scale, v_max + scale) 97 | ax_v.plot(nonpitfalls + 0.5, vsm[nonpitfalls], color='black', marker='o', linestyle='-', linewidth=2.5) 98 | 99 | ax_v.set_ylabel('State Value Function', fontsize=12, fontweight='bold', color='black') 100 | ax_v.tick_params(axis='y', labelcolor='black') 101 | 102 | if(need_lengends): 103 | transition_patch = mpatches.Patch(color='grey', alpha=0.5, label='$\mathbb{E}_{a}[P(s_t,a,s_{t+1})]$') 104 | born_patch = mpatches.Patch(color='green', alpha=0.2, label='$\mathcal{S}_0$') 105 | pitfall_patch = mpatches.Patch(color='red', alpha=0.2, label='$\mathcal{S}_E$ (pitfalls)') 106 | goal_patch = mpatches.Patch(color='blue', alpha=0.4, label='$\mathcal{S}_E$ (goals)') 107 | 108 | ax.legend(handles=[transition_patch, born_patch, pitfall_patch, goal_patch], loc='center left', fontsize=10) 109 | 110 | # Show and save 111 | if(file_path is not None): 112 | plt.savefig(file_path + '.pdf', format='pdf') 113 | 114 | if(show_gui): 115 | plt.show() 116 | 117 | def rearrange_states(task, K=5): 118 | trans_ss = numpy.sum(task["transition"], axis=1) 119 | ra_task = deepcopy(task) 120 | 121 | s_map = [] 122 | for s in task["s_0"]: 123 | s_map.append(s) 124 | 125 | vm = numpy.zeros((task["ns"], task["na"]), dtype='float32') 126 | vm = update_value_matrix(task["transition"], task["reward"], 0.99, vm) 127 | vsm = numpy.max(vm, axis=-1) 128 | print(task["s_e"]) 129 | 130 | while len(s_map) < task["ns"]: 131 | s_trans_sum = [] 132 | for s in range(len(trans_ss)): 133 | if(s in s_map): 134 | continue 135 | p2s = numpy.mean(trans_ss[s_map, [s for _ in range(len(s_map))]], axis=0) 136 | if(p2s > 1.0e-6): 137 | s_trans_sum.append((s, vsm[s], p2s)) 138 | s_sorted_trans = sorted(s_trans_sum, key=lambda x:x[2], reverse=True) 139 | s_sorted_trans = sorted(s_sorted_trans[:K], key=lambda x:x[1], reverse=False) 140 | s_map.append(s_sorted_trans[0][0]) 141 | 142 | # make the goal last 143 | for s in task["s_e"]: 144 | if(numpy.sum(task["reward"][:, :, s] > 0) and s_map.index(s) > task['ns'] // 2 and s_map.index(s) != task["ns"] - 1): # mv the goal to the end 145 | s_map[-1], s_map[s_map.index(s)] = s_map[s_map.index(s)], s_map[-1] 146 | 147 | s_map_inv = list(range(task["ns"])) 148 | for i, s in enumerate(s_map): 149 | s_map_inv[s] = i 150 | 151 | ra_task["transition"] *= 0.0 152 | ra_task["reward"] *= 0.0 153 | ra_task["transition"], ra_task["reward"] = map_transition_reward( 154 | task["transition"], 155 | task["reward"], 156 | ra_task["transition"], 157 | ra_task["reward"], 158 | s_map_inv) 159 | ra_task["state_mapping"] = s_map 160 | 161 | ra_task["s_0"] = [] 162 | ra_task["s_e"] = [] 163 | for s in task["s_0"]: 164 | ra_task["s_0"].append(s_map_inv[s]) 165 | for s in task["s_e"]: 166 | ra_task["s_e"].append(s_map_inv[s]) 167 | 168 | return ra_task 169 | 170 | if __name__ == '__main__': 171 | from xenoverse.anymdp import AnyMDPTaskSampler, GarnetTaskSampler 172 | ns = 64 173 | na = 5 174 | #task = AnyMDPTaskSampler(ns, na, verbose=True) 175 | task = GarnetTaskSampler(ns, na, b=2, verbose=True) 176 | task = rearrange_states(task, K=5) 177 | anymdp_task_visualizer(task, need_ticks=False, 178 | need_lengends=False, 179 | file_path=f'./vis_anymdp_ns{ns}na{na}') -------------------------------------------------------------------------------- /xenoverse/anymdp/solver.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import numpy as np 3 | from numpy import random 4 | from numba import njit 5 | import networkx as nx 6 | import scipy.stats as stats 7 | from scipy.stats import spearmanr 8 | from copy import deepcopy 9 | 10 | def normalized_mrr(scores1, scores2, k=None): 11 | assert numpy.shape(scores1) == numpy.shape(scores2) 12 | n = numpy.shape(scores1)[0] 13 | 14 | if k is None: 15 | k = n 16 | else: 17 | k = min(k, n) 18 | 19 | indices1 = np.argsort(-scores1) 20 | indices2 = np.argsort(-scores2) 21 | indices1_rev = indices1[::-1] 22 | 23 | ranks = np.zeros(n) 24 | for i, idx in enumerate(indices2): 25 | ranks[idx] = i + 1 26 | 27 | invranks = np.zeros(n) 28 | for i, idx in enumerate(indices1_rev): 29 | invranks[idx] = i + 1 30 | 31 | mrrmax = 0.0 32 | mrrmin = 0.0 33 | mrr = 0.0 34 | 35 | for i in range(k): 36 | idx = indices1[i] 37 | mrrmax += 1.0 / (i + 1) ** 2 38 | mrrmin += 1.0 / ((i + 1) * invranks[idx]) 39 | mrr += 1.0 / ((i + 1) * ranks[idx]) 40 | 41 | return (mrr - mrrmin) / (mrrmax - mrrmin) 42 | 43 | def mean_mrr(X, Y, k=None): 44 | if X.shape != Y.shape: 45 | raise ValueError("X and Y must have the same shape") 46 | if(X.ndim == 1): 47 | return normalized_mrr(X, Y, k) 48 | nmrrs = [] 49 | 50 | for i in range(X.shape[0]): 51 | x_col = X[i] 52 | y_col = Y[i] 53 | nmrr = normalized_mrr(x_col, y_col) 54 | nmrrs.append(nmrr) 55 | return numpy.mean(nmrrs) 56 | 57 | @njit(cache=True) 58 | def update_value_matrix(t_mat, r_mat, gamma, vm, max_iteration=-1, is_greedy=True): 59 | diff = 1.0 60 | cur_vm = numpy.copy(vm) 61 | ns, na, _ = r_mat.shape 62 | alpha = 1.0 63 | iteration = 0 64 | while diff > 1.0e-4 and ( 65 | (max_iteration < 0) or 66 | (max_iteration > iteration and max_iteration > 1) or 67 | (iteration < 1 and random.random() < max_iteration)): 68 | iteration += 1 69 | old_vm = numpy.copy(cur_vm) 70 | for s in range(ns): 71 | for a in range(na): 72 | exp_q = 0.0 73 | for sn in range(ns): 74 | if(is_greedy): 75 | exp_q += t_mat[s,a,sn] * (gamma * numpy.max(cur_vm[sn]) + r_mat[s, a, sn]) 76 | else: 77 | exp_q += t_mat[s,a,sn] * (gamma * numpy.mean(cur_vm[sn]) + r_mat[s, a, sn]) 78 | cur_vm[s,a] += alpha * (exp_q - cur_vm[s,a]) 79 | 80 | diff = numpy.sqrt(numpy.mean((old_vm - cur_vm)**2)) 81 | alpha = max(0.80 * alpha, 0.50) 82 | return cur_vm 83 | 84 | def get_opt_trajectory_dist(s0, s0_prob, se, ns, na, transition, vm, K=8): 85 | a_max = numpy.argmax(vm, axis=1) 86 | i_indices = np.arange(ns)[:, None] 87 | j_indices = np.arange(ns) 88 | max_trans = numpy.copy(transition[i_indices, a_max[:, None], j_indices]) 89 | for s in se: 90 | max_trans[s, s0] = s0_prob # s_e directly lead to s0 91 | 92 | for _ in range(K): 93 | max_trans = numpy.matmul(max_trans, max_trans) 94 | gini_impurity = [] 95 | normal_entropy = [] 96 | 97 | for s in s0: 98 | stable_prob = max_trans[s] + 1.0e-12 # calculation safety 99 | gini_impurity.append(1.0 - numpy.sum(stable_prob * stable_prob)) 100 | normal_entropy.append(-numpy.sum(stable_prob * numpy.log(stable_prob)) / numpy.log(ns)) 101 | 102 | # Check gini impurity 103 | return numpy.min(gini_impurity), numpy.min(normal_entropy) 104 | 105 | def check_valuefunction(task, verbose=False): 106 | t_mat = numpy.copy(task["transition"]) 107 | 108 | r_mat = numpy.copy(task["reward"]) 109 | ns, na, _ = t_mat.shape 110 | gamma = numpy.power(2, -1.0 / ns) 111 | vm_opt = update_value_matrix(t_mat, r_mat, gamma, numpy.zeros((ns, na), dtype=float), is_greedy=True) 112 | vm_rnd = update_value_matrix(t_mat, r_mat, gamma, numpy.zeros((ns, na), dtype=float), is_greedy=False) 113 | 114 | # Get Average Reward 115 | avg_vm_opt = vm_opt * (1.0 - gamma) * task["max_steps"] 116 | avg_vm_rnd = vm_rnd * (1.0 - gamma) * task["max_steps"] 117 | vm_diffs = [] 118 | 119 | for s in task["s_0"]: 120 | vm_diff = numpy.max(avg_vm_opt[s]) - numpy.max(avg_vm_rnd[s]) 121 | if(vm_diff < 2.0): 122 | return False 123 | vm_diffs.append(vm_diff) 124 | 125 | # check the stationary distribution of the optimal value function 126 | K = int(numpy.log2(task["max_steps"])) + 1 127 | gini, ent = get_opt_trajectory_dist( 128 | deepcopy(task["s_0"]), 129 | numpy.copy(task["s_0_prob"]), 130 | deepcopy(task["s_e"]), 131 | ns, na, 132 | numpy.copy(t_mat), 133 | vm_opt, 134 | K=K) 135 | 136 | t_mat_sum = numpy.sum(t_mat, axis=-1) 137 | error = (t_mat_sum - 1.0)**2 138 | if(len(task["s_e"]) > 0): 139 | error[task["s_e"]] = 0.0 140 | if((error >= 1.0e-6).any()): 141 | if(verbose): 142 | print("Transition Matrix Error: ", numpy.where(error>=1.0e-6)) 143 | return False 144 | 145 | vm_diffs = numpy.mean(vm_diffs) 146 | if(verbose): 147 | print("Value Diff: {:.4f}, Gini Impurity: {:.4f}, Normalized Entropy: {:.4f}, final_goal_terminate: {}".format(vm_diffs, gini,ent, task["final_goal_terminate"])) 148 | return gini > 0.70 and ent > 0.35 149 | 150 | def get_stable_dist(task): 151 | t_mat = numpy.copy(task["transition"]) 152 | r_mat = numpy.copy(task["reward"]) 153 | ns, na, _ = t_mat.shape 154 | gamma = numpy.power(2, -1.0 / ns) 155 | vm_opt = update_value_matrix(t_mat, r_mat, gamma, 156 | numpy.zeros((ns, na), dtype=float), 157 | is_greedy=True) 158 | a_max = numpy.argmax(vm_opt, axis=1) 159 | i_indices = np.arange(ns)[:, None] 160 | j_indices = np.arange(ns) 161 | opt_trans = numpy.copy(t_mat[i_indices, a_max[:, None], j_indices]) 162 | rnd_trans = numpy.mean(t_mat, axis=1) 163 | s0 = task["s_0"] 164 | s0_prob = task["s_0_prob"] 165 | for s in task["s_e"]: 166 | opt_trans[s, s0] = s0_prob # s_e directly lead to s0 167 | rnd_trans[s, s0] = s0_prob # s_e directly lead to s0 168 | 169 | for _ in range(20): 170 | opt_trans = numpy.matmul(opt_trans, opt_trans) 171 | rnd_trans = numpy.matmul(rnd_trans, rnd_trans) 172 | 173 | s_0_dist = numpy.zeros((ns,)) 174 | s_0_dist[s0] = s0_prob 175 | opt_prob = numpy.sort(numpy.matmul(numpy.transpose(opt_trans), s_0_dist))[::-1] 176 | rnd_prob = numpy.sort(numpy.matmul(numpy.transpose(rnd_trans), s_0_dist))[::-1] 177 | 178 | return opt_prob, rnd_prob -------------------------------------------------------------------------------- /xenoverse/linds/task_sampler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Any MDP Task Sampler 3 | """ 4 | import numpy 5 | from numpy import random 6 | from numpy import linalg 7 | from copy import deepcopy 8 | from xenoverse.utils import pseudo_random_seed, weights_and_biases, RandomFourier, dump_task, load_task 9 | import pickle 10 | 11 | 12 | def banded_trim(A, B, C): 13 | no, ns = C.shape 14 | An, Bn, Cn = A.copy(), B.copy(), C.copy() 15 | width = random.randint(2, max(ns // 2, 3) + 1) 16 | if (width >= ns): 17 | return An, Bn, Cn 18 | for i in range(ns): 19 | for j in range(ns): 20 | if(abs(i - j) > width): 21 | An[i, j] = 0.0 22 | return An, Bn, Cn 23 | 24 | def triangle_trim(A, B, C): 25 | no, ns = C.shape 26 | An, Bn, Cn = A.copy(), B.copy(), C.copy() 27 | width = random.randint(-1, max(ns // 4, 2) + 1) 28 | if (width >= ns): 29 | return An, Bn, Cn 30 | for i in range(ns): 31 | for j in range(ns): 32 | if(j < i + width): 33 | An[i, j] = 0.0 34 | return An, Bn, Cn 35 | 36 | def no_trim(A, B, C): 37 | return A, B, C 38 | 39 | def sample_variants_(ns, na, no): 40 | AB, X = weights_and_biases(ns+na, ns, need_bias=True) 41 | C, Y = weights_and_biases(ns, no, need_bias=False) 42 | 43 | A = AB[:, :ns] * random.choice([0.01, 0.02, 0.05, 0.1, 0.2]) 44 | B = AB[:, ns:] 45 | X = X * random.choice([0.0, 0.05, 0.1]) 46 | 47 | trim_funcs = [banded_trim, triangle_trim, no_trim] 48 | trim_func = random.choice(trim_funcs) 49 | A, B, C = trim_func(A, B, C) 50 | 51 | return A, B, C, X, Y 52 | 53 | def sample_target_spaces_(no): 54 | eps = min(random.uniform(0.2, 1.2), 1.0) 55 | tgt_valid = numpy.zeros((no,)) 56 | while(numpy.sum(tgt_valid) < 0.5): 57 | tgt_valid = random.binomial(1, eps, size=(no,)) 58 | return tgt_valid 59 | 60 | def LinearDSSampler(state_dim:int=16, 61 | action_dim:int=8, 62 | observation_dim:int=8, 63 | seed=None, 64 | verbose=False): 65 | # Sampling Transition Matrix and Reward Matrix based on Irwin-Hall Distribution and Gaussian Distribution 66 | # Task: 67 | # mode: static goal or moving goal 68 | # ndim: number of inner dimensions 69 | # born_loc: born location and noise 70 | # sgoal_loc: static goal location, range of sink, and reward 71 | # pf_loc: pitfall location, range of sink, and reward 72 | # line_potential_energy: line potential energy specified by direction and detal_V 73 | # point_potential_energy: point potential energy specified by location and detal_V 74 | 75 | if(seed is not None): 76 | pseudo_random_seed(seed) 77 | else: 78 | pseudo_random_seed() 79 | 80 | task = dict() 81 | 82 | # static: static goal, one-step reward with reset 83 | # dynamic: moving goal, continuous reward 84 | # universal: an random reward field generated by a neural network 85 | 86 | task["state_dim"] = state_dim 87 | task["observation_dim"] = observation_dim 88 | task["action_dim"] = action_dim 89 | 90 | task["max_steps"] = random.randint(100, 1000) # At most 10-dimensional space 91 | task_valid = False 92 | while task_valid is False: 93 | task["ld_A"], task["ld_B"], task["ld_C"], task["ld_X"], task["ld_Y"] = sample_variants_(state_dim, action_dim, observation_dim) 94 | task_valid = (linalg.matrix_rank(task["ld_B"]) > min(action_dim, state_dim)-1) and \ 95 | (linalg.matrix_rank(task["ld_C"]) > min(observation_dim, state_dim) - 1) 96 | 97 | # Sample rewards 98 | task["action_cost"] = max(random.uniform(-1.0, 1.0) * random.exponential(0.05), 0.0) 99 | task["reward_base"] = random.exponential(0.10) 100 | task["terminate_punish"] = random.exponential(scale=5.0) * random.choice([0, 1, 1]) 101 | 102 | # probability without any procedural reward 103 | task["reward_factor"] = random.exponential(scale=0.50) 104 | task["target_valid"]= sample_target_spaces_(observation_dim) 105 | task["target_type"] = random.choice(["dynamic_target", "dynamic_target", "static_target"]) 106 | 107 | is_valid_task = False 108 | while not is_valid_task: 109 | born_loc = int(max(random.exponential(scale=1.0), 1)) 110 | task["initial_states"] = [random.randn(state_dim) for _ in range(born_loc)] 111 | task["noise_drift"] = numpy.clip(random.uniform(-0.02, 0.02), 0.0, 0.02) 112 | 113 | # fixed command versus random command 114 | if(task["target_type"] == "static_target"): 115 | task["command"] = random.randn(observation_dim) * random.choice([0, 1]) 116 | task["target_delay"] = 0 117 | cmd = task["command"] 118 | elif(task["target_type"] == "dynamic_target"): 119 | task["command"] = RandomFourier(observation_dim) 120 | task["target_delay"] = max(random.randint(-10, 30), 0) 121 | cmd = task["command"](-task["target_delay"]) 122 | else: 123 | raise Exception("Unknown target type: {}".format(target_type)) 124 | # Make sure that the initial states are not too far away from the target 125 | for bloc in task["initial_states"]: 126 | error = numpy.linalg.norm((cmd - task["ld_C"] @ bloc - task["ld_Y"]) * task["target_valid"]) 127 | error2 = numpy.linalg.norm(bloc) 128 | if(error > 3.0 or error2 > 10.0): 129 | is_valid_task = False 130 | break 131 | else: 132 | is_valid_task = True 133 | 134 | return task 135 | 136 | def LinearDSSamplerRandomDim(max_state_dim:int=16, 137 | max_observation_dim:int=16, 138 | max_action_dim:int=8, 139 | seed=None, 140 | verbose=False): 141 | assert max_state_dim >= 2, "max_state_dim should be at least 2" 142 | assert max_action_dim >= 1, "max_action_dim should be at least 1" 143 | state_dim = random.randint(1, max_state_dim+1) 144 | min_action_dim = max(1, (state_dim + 1) // 2) 145 | max_action_dim = min(max_action_dim, state_dim * 3 // 2) 146 | min_observation_dim = max(1, state_dim // 4) 147 | max_observation_dim = min(max_observation_dim, state_dim * 3 // 2) 148 | action_dim = random.randint(min_action_dim, max_action_dim+1) 149 | observation_dim = random.randint(min_observation_dim, max_observation_dim + 1) 150 | 151 | return LinearDSSampler(state_dim=state_dim, 152 | action_dim=action_dim, 153 | observation_dim=observation_dim, 154 | seed=seed, verbose=verbose) 155 | 156 | if __name__ == "__main__": 157 | import argparse 158 | parser = argparse.ArgumentParser() 159 | parser.add_argument('--output', type=str, default='lind_task.pkl', help='Output file for the sampled LIND task') 160 | args = parser.parse_args() 161 | 162 | task = LinearDSSamplerRandomDim() 163 | dump_task(args.output, task) -------------------------------------------------------------------------------- /xenoverse/anymdp/anymdp_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gym Environment For Any MDP 3 | """ 4 | import numpy 5 | import gymnasium as gym 6 | import pygame 7 | from numpy import random 8 | from numba import njit 9 | from gymnasium import spaces 10 | from xenoverse.utils import pseudo_random_seed 11 | 12 | @njit(cache=True) 13 | def map_transition_reward(t, r, t_obs, r_obs, s_mapping): 14 | ns, na, _ = t.shape 15 | for i,si in enumerate(s_mapping): 16 | for a in range(na): 17 | for j,sj in enumerate(s_mapping): 18 | t_obs[si][a][sj] = t[i][a][j] 19 | r_obs[si][a][sj] = r[i][a][j] 20 | return t_obs, r_obs 21 | 22 | class AnyMDPEnv(gym.Env): 23 | def __init__(self, max_steps): 24 | """ 25 | Pay Attention max_steps might be reseted by task settings 26 | """ 27 | self.observation_space = spaces.Discrete(1) 28 | self.action_space = spaces.Discrete(1) 29 | self.max_steps = max_steps 30 | self.task_set = False 31 | 32 | def set_task(self, task_config): 33 | for k,v in task_config.items(): 34 | setattr(self, k, v) 35 | if(self.task_type=="MDP"): 36 | self.no = self.ns 37 | self.observation_space = spaces.Discrete(self.ns) 38 | self.action_space = spaces.Discrete(self.na) 39 | elif(self.task_type=="POMDP"): 40 | self.observation_space = spaces.Discrete(self.no) 41 | self.action_space = spaces.Discrete(self.na) 42 | elif(self.task_type=="MTPOMDP"): 43 | self.observation_space = spaces.MultiDiscrete([self.no] * self.do) 44 | self.action_space = spaces.MultiDiscrete([self.na] * self.da) 45 | else: 46 | raise NotImplementedError(f"Unknown task type: {self.task_type}") 47 | 48 | assert self.transition.shape == self.reward.shape 49 | assert self.transition.shape[0] == len(self.state_mapping) and self.transition.shape[1] == self.na 50 | 51 | assert self.ns > 0, "State space must be at least 1" 52 | assert self.na > 1, "Action space must be at least 2" 53 | assert self.no > 0, "Observation space must be at least 1" 54 | 55 | # inverse mapping from observation to inner state 56 | self.obs2inner = list(range(self.ns)) 57 | for i,s in enumerate(self.state_mapping): 58 | self.obs2inner[s] = i # do permutation to avoid empty mapping 59 | self.obs2inner[i] = s 60 | # Get observation transition and reward 61 | self.transition_obs = numpy.zeros((self.ns, self.na, self.ns)) 62 | self.reward_obs = numpy.zeros((self.ns, self.na, self.ns)) 63 | self.transition_obs, self.reward_obs = map_transition_reward(self.transition, self.reward, self.transition_obs, self.reward_obs, self.state_mapping) 64 | 65 | # check transition matrix is valid 66 | t_mat_sum = numpy.sum(self.transition, axis=-1) 67 | error = (t_mat_sum - 1.0)**2 68 | if(len(self.s_e) > 0): 69 | error[self.s_e] = 0.0 70 | if((error >= 1.0e-6).any()): 71 | raise Exception(f'Transition Matrix Sum != 1 at {numpy.where(error>=1.0e-6)}') 72 | 73 | # check if there is any state that is both start and end 74 | intersection = numpy.intersect1d(self.s_0, self.s_e) 75 | if(len(intersection) > 0): 76 | raise Exception(f'State {intersection} is {self.s_0} and {self.s_e}') 77 | 78 | self.task_set = True 79 | self.need_reset = True 80 | 81 | def reset(self, *args, **kwargs): 82 | if(not self.task_set): 83 | raise Exception("Must call \"set_task\" first") 84 | 85 | self.steps = 0 86 | self.need_reset = False 87 | random.seed(pseudo_random_seed()) 88 | 89 | self._state = numpy.random.choice(self.s_0, p=self.s_0_prob) 90 | return self.get_observation, {"steps": self.steps} 91 | 92 | def single_step(self, action): 93 | if(self.need_reset or not self.task_set): 94 | raise Exception("Must \"set_task\" and \"reset\" before doing any actions") 95 | if(self._state in self.s_e and self.ns > 1): 96 | raise Exception(f"Unexpected Error: given an terminated state") 97 | assert action < self.na, f"Action {action} is out of range" 98 | 99 | transition_gt = numpy.copy(self.transition[self._state, action]) 100 | next_state = random.choice(len(self.state_mapping), p=transition_gt) 101 | 102 | # sample the reward 103 | reward_gt = self.reward[self._state, action, next_state] 104 | reward_gt_noise = self.reward_noise[self._state, action, next_state] 105 | reward = random.normal(reward_gt, reward_gt_noise) 106 | 107 | self._state = next_state 108 | terminated = (self._state in self.s_e) or (self.ns < 2) 109 | 110 | return reward_gt, reward, terminated 111 | 112 | def step(self, action): 113 | self.steps += 1 114 | truncated = self.steps >= self.max_steps 115 | terminated = False 116 | if(numpy.ndim(action) >= 1): 117 | assert numpy.shape(action) == (self.da,), f"Action {action} is out of range" 118 | rewards = 0 119 | rewards_gt = 0 120 | for act in action: 121 | reward_gt, reward, term = self.single_step(act) 122 | rewards += reward 123 | rewards_gt += reward_gt 124 | if(term): 125 | terminated=True 126 | break 127 | else: 128 | rewards_gt, rewards, terminated = self.single_step(action) 129 | info = {"steps": self.steps, "reward_gt": rewards_gt} 130 | info["transition_gt"] = self.transition_obs[self.state, action] 131 | 132 | return self.get_observation, rewards, terminated, truncated, info 133 | 134 | @property 135 | def state(self): 136 | return int(self.state_mapping[self._state]) 137 | 138 | @property 139 | def inner_state(self, query_state=None): 140 | if(query_state is None): 141 | return int(self._state) 142 | else: 143 | return int(self.obs2inner[query_state]) 144 | 145 | @property 146 | def get_observation(self): 147 | if(self.task_type == "MDP"): 148 | return int(self.state_mapping[self._state]) 149 | elif(self.task_type == "MTPOMDP"): 150 | toks = [] 151 | for ot in self.observation_transition: 152 | probs = ot[self._state] 153 | toks.append(random.choice(self.no, p=probs)) 154 | return numpy.array(toks, dtype=int) 155 | elif(self.task_type == "POMDP"): 156 | probs = self.observation_transition[self._state] 157 | return random.choice(self.no, p=probs) 158 | else: 159 | raise NotImplementedError 160 | 161 | def get_gt_transition(self): 162 | return self.transition_obs 163 | 164 | def get_gt_reward(self): 165 | return self.reward_obs -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/maze_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gym Environment For Maze3D 3 | """ 4 | import numpy 5 | import gymnasium as gym 6 | import pygame 7 | 8 | from gym import error, spaces, utils 9 | from gym.utils import seeding 10 | from xenoverse.mazeworld.envs.maze_continuous_3d import MazeCoreContinuous3D 11 | from xenoverse.mazeworld.envs.dynamics import DEFAULT_ACTION_SPACE_16, DEFAULT_ACTION_SPACE_32 12 | 13 | class MazeWorldEnvBase(gym.Env): 14 | """ 15 | All Maze World Environments Use This Base Class 16 | """ 17 | def __init__(self, 18 | maze_type, 19 | enable_render=True, 20 | render_scale=480, 21 | max_steps=5000, 22 | ): 23 | self.maze_type = maze_type 24 | self.enable_render = enable_render 25 | self.render_viewsize = render_scale 26 | 27 | self.need_reset = True 28 | self.need_set_task = True 29 | 30 | def set_task(self, task_config): 31 | self.maze_core.set_task(task_config) 32 | self.need_set_task = False 33 | 34 | def reset(self, *args, **kwargs): 35 | if(self.need_set_task): 36 | raise Exception("Must call \"set_task\" before reset") 37 | state = self.maze_core.reset() 38 | if(self.enable_render): 39 | self.maze_core.render_init(self.render_viewsize) 40 | self.keyboard_press = pygame.key.get_pressed() 41 | info = {"steps": self.maze_core.steps} 42 | self.maze_core.get_info(info) 43 | self.need_reset = False 44 | self.key_done = False 45 | return state, info 46 | 47 | def action_control(self, action): 48 | raise NotImplementedError("Must implement the action control logic") 49 | 50 | def step(self, action=None): 51 | if(self.need_reset): 52 | raise Exception("Must \"reset\" before doing any actions") 53 | 54 | internal_action = self.action_control(action) 55 | 56 | # In keyboard control, process only continues when key is pressed 57 | info = {"steps": self.maze_core.steps} 58 | if(internal_action is None): 59 | return self.maze_core.get_observation(), 0, False, False, info 60 | reward, terminated, truncated = self.maze_core.do_action(internal_action) 61 | self.maze_core.get_info(info) 62 | 63 | if(terminated or truncated): 64 | self.need_reset=True 65 | 66 | return self.maze_core.get_observation(), reward, terminated, truncated, info 67 | 68 | def render(self, mode="human"): 69 | if(mode != "human"): 70 | raise NotImplementedError("Only human mode is supported") 71 | if(self.enable_render): 72 | self.key_done, self.keyboard_press = self.maze_core.render_update() 73 | 74 | def get_local_map(self, map_range=8, resolution=(128, 128)): 75 | """ 76 | Returns the localized god-view map relative to the agent's standpoint 77 | """ 78 | return self.maze_core.get_local_map(map_range=map_range, resolution=resolution) 79 | 80 | def get_global_map(self, resolution=(128, 128)): 81 | """ 82 | Returns the global god-view map 83 | """ 84 | return self.maze_core.get_global_map(resolution=resolution) 85 | 86 | def get_target_location(self): 87 | """ 88 | Acquire relative position of the target to the agent 89 | Return (Distance, Angle) 90 | """ 91 | target_id = self.maze_core._commands_sequence[self.maze_core._commands_sequence_idx] 92 | target_grid = self.maze_core._landmarks_coordinates[target_id] 93 | deta_grid = numpy.zeros(shape=(2,), dtype=numpy.float32) 94 | deta_grid[0] = target_grid[0] - self.maze_core._agent_grid[0] 95 | deta_grid[1] = target_grid[1] - self.maze_core._agent_grid[1] 96 | angle = numpy.arctan2(deta_grid[1], deta_grid[0]) - self.maze_core._agent_ori 97 | if(angle < -numpy.pi): 98 | angle += 2 * numpy.pi 99 | elif(angle > numpy.pi): 100 | angle -= 2 * numpy.pi 101 | dist = numpy.sqrt(numpy.sum(deta_grid * deta_grid)) 102 | return dist, angle 103 | 104 | def save_trajectory(self, file_name, view_size=480): 105 | if(not self.enable_render): 106 | self.maze_core.render_init(view_size) 107 | self.maze_core.render_trajectory(file_name) 108 | 109 | class MazeWorldContinuous3D(MazeWorldEnvBase): 110 | def __init__(self, 111 | enable_render=True, 112 | render_scale=480, 113 | max_steps = 5000, 114 | resolution=(320, 320), 115 | visibility_3D=12.0, 116 | command_in_observation=False, 117 | action_space_type="Discrete16", # Must choose in Discrete16, Discrete32, Continuous 118 | ): 119 | super(MazeWorldContinuous3D, self).__init__( 120 | "Continuous3D", 121 | enable_render=enable_render, 122 | render_scale=render_scale, 123 | max_steps=max_steps 124 | ) 125 | self.maze_core = MazeCoreContinuous3D( 126 | resolution_horizon = resolution[0], 127 | resolution_vertical = resolution[1], 128 | max_steps = max_steps, 129 | visibility_3D=visibility_3D, 130 | command_in_observation=command_in_observation 131 | ) 132 | 133 | self.inner_action_list = None 134 | if(action_space_type == "Discrete16"): 135 | self.action_space = spaces.Discrete(16) 136 | # Using Default Discrete Actions 137 | self.inner_action_list = DEFAULT_ACTION_SPACE_16 138 | elif(action_space_type == "Discrete32"): 139 | self.action_space = spaces.Discrete(32) 140 | # Using Default Discrete Actions 141 | self.inner_action_list = DEFAULT_ACTION_SPACE_32 142 | elif(action_space_type == "Continuous"): 143 | # Turning Left/Right and go backward / forward 144 | self.action_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=numpy.float32) 145 | else: 146 | raise ValueError("Invalid Action Space Type {}. Can only accept Discrete16, Discrete32, Continuous".format(action_space_type)) 147 | 148 | # observation is the x, y coordinate of the grid 149 | self.observation_space = spaces.Box(low=0, high=255, shape=(resolution[0], resolution[1], 3), dtype=numpy.uint8) 150 | 151 | def action_control(self, action): 152 | if(action is None): # Only when there is no action input can we use keyboard control 153 | pygame.time.delay(20) # 50 FPS 154 | tr, ws = self.maze_core.movement_control(self.keyboard_press) 155 | else: 156 | if(self.inner_action_list is not None): 157 | tr, ws = self.inner_action_list[action] 158 | else: 159 | tr, ws = action 160 | if(tr is None or ws is None): 161 | return None 162 | return [tr, ws] 163 | 164 | @property 165 | def list_actions(self): 166 | return self.inner_action_list -------------------------------------------------------------------------------- /xenoverse/metalang/generator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 DeepEvolution Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # This file is used to generate data for meta language models 16 | 17 | import sys 18 | import argparse 19 | import random 20 | import time 21 | import _io 22 | import numpy 23 | import pickle 24 | from xenoverse.metalang import MetaLangV1 25 | from xenoverse.metalang import MetaLangV2 26 | from xenoverse.metalang import TaskSamplerV1, TaskSamplerV2 27 | 28 | def _text_io(tokens, output_stream): 29 | if(isinstance(output_stream, _io.TextIOWrapper)): 30 | need_close = False 31 | elif(isinstance(output_stream, str)): 32 | output_stream = open(output_stream, "w") 33 | need_close = True 34 | else: 35 | output_stream = sys.stdout 36 | need_close = False 37 | for i in range(tokens.shape[0]): 38 | output_stream.write("\t".join(map(str, tokens[i].tolist()))) 39 | output_stream.write("\n") 40 | if(need_close): 41 | output_stream.close() 42 | 43 | def metalang_generator(sample_type='sequences', 44 | version='v1', 45 | vocab_size=64, 46 | patterns_number=10, 47 | n_gram=3, 48 | error_rate=0.15, 49 | embedding_size=16, 50 | hidden_size=64, 51 | samples=1000, 52 | lambda_weight=5.0, 53 | batch_size=1, 54 | task_file=None, 55 | sequence_length=4096, 56 | output_type='txt', 57 | output=None 58 | ): 59 | seed_base = int(time.time()*1000 % 1000000) 60 | 61 | if(sample_type == 'tasks'): 62 | if(output is None): 63 | raise Exception("Must specify --output when sample_type is tasks") 64 | if(version=='v1'): 65 | tasks = [TaskSamplerV1(seed=i+seed_base, 66 | n_vocab=vocab_size, 67 | n_patterns=patterns_number, 68 | n_gram=n_gram, 69 | error_ratio=error_rate) for i in range(samples)] 70 | elif(version=='v2'): 71 | tasks = [TaskSamplerV2(seed=i+seed_base, 72 | n_vocab=vocab_size, 73 | n_emb=embedding_size, 74 | n_hidden=hidden_size, 75 | n_gram=n_gram, 76 | _lambda=lambda_weight 77 | ) for i in range(samples)] 78 | output_file_name = output 79 | if(not output_file_name.endswith('.pkl')): 80 | output_file_name += '.pkl' 81 | pickle.dump(tasks, open(output_file_name, 'wb')) 82 | else: 83 | if(version == 'v1'): 84 | env = MetaLangV1(L=sequence_length) 85 | elif(version == 'v2'): 86 | env = MetaLangV2(L=sequence_length) 87 | 88 | if(task_file is None): 89 | tasks = None 90 | else: 91 | tasks = pickle.load(open(task_file, 'rb')) 92 | 93 | batch_size = batch_size 94 | if(tasks is None): 95 | # Generate a unique task for each sample 96 | batch_size = 1 97 | if(version=='v1'): 98 | tasks = [TaskSamplerV1(seed=i+seed_base, 99 | n_vocab=vocab_size, 100 | n_patterns=patterns_number, 101 | n_gram=n_gram, 102 | error_ratio=error_rate) for i in range(samples)] 103 | elif(version=='v2'): 104 | tasks = [TaskSamplerV2(seed=i+seed_base, 105 | n_vocab=vocab_size, 106 | n_emb=embedding_size, 107 | n_hidden=hidden_size, 108 | n_gram=n_gram, 109 | _lambda=lambda_weight 110 | ) for i in range(samples)] 111 | batch_number = (samples - 1) // batch_size + 1 112 | tokens = [] 113 | if(len(tasks) < batch_number): 114 | tasks = tasks * ((batch_number - 1)//len(tasks) + 1) 115 | random.shuffle(tasks) 116 | for i in range(batch_number): 117 | env.set_task(tasks[i]) 118 | seed_base = int(time.time()*1000 % 1000000) 119 | token = env.batch_generator(batch_size, seed=i+seed_base) 120 | tokens.append(token) 121 | tokens=numpy.concatenate(tokens, axis=0) 122 | 123 | if(output_type == 'npy'): 124 | numpy.save(output, tokens) 125 | elif(output_type == 'txt'): 126 | _text_io(tokens, output) 127 | 128 | 129 | if __name__=='__main__': 130 | parser = argparse.ArgumentParser(description='Generating Meta Language Tasks or Sequences') 131 | parser.add_argument('--version', type=str, choices=['v1', 'v2'], default='v2') 132 | parser.add_argument('--sample_type', type=str, choices=['tasks', 'sequences'], default='sequences', help='Generate tasks or sequences') 133 | parser.add_argument('--task_file', type=str, default=None, help='Specify task file to generate from if the sample_type is sequences. Default will generate task on the fly.') 134 | parser.add_argument('--vocab_size', type=int, default=64) 135 | parser.add_argument('--embedding_size', type=int, default=16) 136 | parser.add_argument('--hidden_size', type=int, default=16) 137 | parser.add_argument('--patterns_number', type=int, default=10) 138 | parser.add_argument('--error_rate', type=float, default=0.20) 139 | parser.add_argument('--n_gram', nargs='+', type=int, default=[3,4,5,6], help="A [List of] length n used for generating tasks") 140 | parser.add_argument('--lambda_weight', type=float, default=5.0, help="Lambda weight multiplied for softmax sampling in MetaLangV2") 141 | parser.add_argument('--batch_size', type=int, default=1) 142 | parser.add_argument('--sequence_length', type=int, default=4096) 143 | parser.add_argument('--samples', type=int, default=100, help='number of sequences / tasks to generate') 144 | parser.add_argument('--output_type', type=str, choices=['txt', 'npy'], default='txt') 145 | parser.add_argument('--output', type=str, default=None) 146 | 147 | args = parser.parse_args() 148 | metalang_generator(**vars(args)) 149 | -------------------------------------------------------------------------------- /xenoverse/linds/solver.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import numpy as np 3 | from numpy import random 4 | from numba import njit 5 | import scipy.stats as stats 6 | from scipy.stats import spearmanr 7 | from copy import deepcopy 8 | 9 | import numpy as np 10 | import scipy.linalg as la 11 | from scipy import sparse 12 | import osqp 13 | import matplotlib.pyplot as plt 14 | from xenoverse.utils import dump_task, load_task 15 | 16 | class LTISystemMPC(object): 17 | """ 18 | Solving Linear Time-Invariant System Model Predictive Control Problem 19 | x_{k+1} = A x_k + B u_k + X 20 | y_k = C x_k + Y 21 | """ 22 | def __init__(self, env, 23 | K=20, # forward looking steps 24 | gamma=0.99): # cost for each action relative to observation 25 | self.Na = env.action_space.shape[0] 26 | self.Nx = env.state_dim # state dimension 27 | self.Nu = env.action_dim # action dimension 28 | self.Ny = env.observation_dim # optimize in reward dimension 29 | self.A = env.ld_phi 30 | self.B = env.ld_gamma 31 | self.C = env.ld_C # directly map observation to reward space 32 | self.X = env.ld_Xt.reshape(-1, 1) 33 | self.Y = env.ld_Y.reshape(-1, 1) 34 | # MPC parameters 35 | self.K = K # forward looking steps 36 | Q = np.diag(gamma ** np.arange(K)) # discount factor weights 37 | P = np.eye(K) * env.action_cost / max(env.reward_factor, 1.0e-6) # action cost weights 38 | 39 | self.W_Q = np.kron(Q, np.diag(env.target_valid)) 40 | self.W_P = np.kron(P, np.eye(self.Nu)) 41 | 42 | # constraints 43 | self.u_min = env.action_space.low[:self.Nu] 44 | self.u_max = env.action_space.high[:self.Nu] 45 | 46 | self.u_lb = np.kron(np.ones(self.K), self.u_min) 47 | self.u_ub = np.kron(np.ones(self.K), self.u_max) 48 | 49 | # construct constraints 50 | self._pre_build_matrices() 51 | 52 | def _pre_build_matrices(self): 53 | 54 | """ 55 | forward looking prediction model 56 | OUTPUT = [x1, x2, ..., x_K] 57 | OUTPUT = H @ U + F1 @ x0 + F2 58 | Output: shape (K * Ny, 1) 59 | H: shape (K * Ny, K * Nu) 60 | U: shape (K * Nu, 1) 61 | x0: shape (Nx, 1) 62 | F1: shape (K * Ny, Nx) 63 | F2: shape (K * Ny, 1) 64 | """ 65 | 66 | self.H = np.zeros((self.K * self.Ny, self.K * self.Nu)) 67 | self.F1 = np.zeros((self.K * self.Ny, self.Nx)) 68 | self.F2 = np.zeros((self.K * self.Ny, 1)) 69 | 70 | A_power = list() 71 | tmp = np.eye(self.Nx) 72 | A_power.append(tmp) 73 | for i in range(self.K + 1): 74 | tmp = tmp @ self.A 75 | A_power.append(tmp) 76 | A_power = A_power[::-1] # 逆序存储A的幂次 77 | for i in range(self.K): 78 | for j in range(self.K-i): 79 | k = self.K - i - j - 1 80 | self.H[i*self.Ny:(i+1)*self.Ny, k*self.Nu:(k+1)*self.Nu] = self.C @ A_power[i] @ self.B 81 | self.F1[i*self.Ny:(i+1)*self.Ny, :] = self.C @ A_power[i+1] 82 | self.F2[i*self.Ny:(i+1)*self.Ny, :] = self.C @ A_power[i] @ self.X + self.Y 83 | 84 | # Pre-compute Sigma matrices 85 | self.W_H = self.H.T @ self.W_Q @ self.H + self.W_P 86 | 87 | # Pre-compute Bias effect 88 | self.G_11 = self.F1.T @ self.W_Q @ self.F1 89 | self.G_12 = self.F1.T @ self.W_Q 90 | self.G_21 = self.G_12.T 91 | 92 | self.A_cons = np.kron(np.eye(self.K), np.eye(self.Nu)) 93 | 94 | def solve(self, x_current, ref_trajectory): 95 | """ 96 | Reference trajectory can be shorter than K steps 97 | """ 98 | ref_trajectory = numpy.array(ref_trajectory) 99 | if(ref_trajectory.ndim == 1): 100 | Y_ref = np.kron(np.ones((self.K)), ref_trajectory[:self.Ny]).reshape(-1, 1) 101 | else: 102 | Y_ref = np.zeros((self.K * self.Ny, 1)) 103 | for i in range(self.K): 104 | if(ref_trajectory.shape[0] > i): 105 | Y_ref[self.Ny * (self.K - i - 1):self.Ny * (self.K - i), 0] = ref_trajectory[i, :self.Ny].flatten() 106 | else: 107 | Y_ref[self.Ny * (self.K - i - 1):self.Ny * (self.K - i), 0] = ref_trajectory[-1, :self.Ny].flatten() 108 | x = x_current.reshape(-1, 1) 109 | 110 | f = self.F1 @ x + self.F2 - Y_ref 111 | f = f.T @ self.W_Q @ self.H 112 | f = f.flatten() 113 | prob = osqp.OSQP() 114 | prob.setup(sparse.csc_matrix(self.W_H), f, sparse.csc_matrix(self.A_cons), 115 | self.u_lb, self.u_ub, verbose=False) 116 | res = prob.solve() 117 | 118 | if res.info.status != 'solved': 119 | print(f"Fail to solve QP: {res.info.status}") 120 | return None 121 | 122 | # return the optimal control sequence 123 | u_opt = numpy.zeros((self.Na, )) 124 | u_opt[:self.Nu] = res.x[:self.Nu] 125 | return u_opt 126 | 127 | def test_mpc(env, use_mpc=True, plot=False): 128 | mpc = LTISystemMPC(env, K=50, gamma=0.99) 129 | 130 | T_sim = 400 131 | obs, info = env.reset() 132 | x_current = env._state 133 | 134 | error_history = [] 135 | reward_history = [] 136 | 137 | for t in range(T_sim): 138 | #action = env.action_space.sample() 139 | cmd = env.get_future_inner_cmds(K=mpc.K) 140 | if(use_mpc is False): 141 | action = env.action_space.sample() 142 | else: 143 | action = mpc.solve(x_current, cmd) 144 | 145 | obs, reward, terminated, truncated, info = env.step(action) 146 | 147 | #print("Step {}, Obs {}, Act {}, State {}, Cmd {}, Target {}".format(t, obs, action, env._state, cmd, env.target_valid)) 148 | 149 | error_history.append(info["error"]) 150 | reward_history.append(reward) 151 | x_current = env._state 152 | 153 | if terminated or truncated: 154 | obs, info = env.reset() 155 | x_current = env._state 156 | 157 | if(plot): 158 | plt.figure(figsize=(12, 8)) 159 | 160 | plt.subplot(2, 1, 1) 161 | plt.plot(error_history, 'b-', label='errors') 162 | plt.legend() 163 | plt.grid(True) 164 | 165 | plt.subplot(2, 1, 2) 166 | plt.plot(reward_history, 'g-', label='rewards') 167 | plt.legend() 168 | plt.grid(True) 169 | 170 | plt.tight_layout() 171 | plt.show() 172 | 173 | tracking_error = np.mean(error_history) 174 | rewards = np.mean(reward_history) 175 | 176 | name = "MPC" if use_mpc else "Random" 177 | 178 | print(f"--{name}-- Tracking Errors: {tracking_error:.4f} Rewards: {rewards:.4f}") 179 | 180 | 181 | if __name__ == "__main__": 182 | import gymnasium as gym 183 | import numpy 184 | from xenoverse.linds import LinearDSSamplerRandomDim 185 | import argparse 186 | 187 | parser = argparse.ArgumentParser() 188 | parser.add_argument("--task", type=str, default=None) 189 | args = parser.parse_args() 190 | if(args.task is not None): 191 | task = load_task(args.task) 192 | else: 193 | task = LinearDSSamplerRandomDim() 194 | dump_task("./task.pkl", task) 195 | task["action_cost"] = 0.0 196 | env = gym.make("linear-dynamics-v0-visualizer") 197 | env.set_task(task) 198 | 199 | print("Task type:", task["target_type"]) 200 | print("Start MPC solver demonstration...") 201 | test_mpc(env, use_mpc=False, plot=True) 202 | test_mpc(env, plot=True) 203 | print("...Test Passed") -------------------------------------------------------------------------------- /xenoverse/mazeworld/envs/dynamics.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import math 3 | from numba import njit 4 | 5 | PI_4 = 0.7853981 6 | PI_2 = 1.5707963 7 | PI = 3.1415926 8 | t_PI = 6.2831852 9 | PI2d = 57.29578 10 | 11 | OFFSET_10 = numpy.asarray([0.5, 0.5], dtype="float64") 12 | OFFSET_01 = numpy.asarray([-0.5, 0.5], dtype="float64") 13 | OFFSET_m0 = numpy.asarray([-0.5, -0.5], dtype="float64") 14 | OFFSET_0m = numpy.asarray([0.5, -0.5], dtype="float64") 15 | 16 | DEFAULT_ACTION_SPACE_16 = [(0.0, 0.5), 17 | (0.05, 0.0), (-0.05, 0.0), 18 | (0.1, 0.0), (-0.1, 0.0), 19 | (0.2, 0.0), (-0.2, 0.0), 20 | (0.3, 0.0), (-0.3, 0.0), 21 | (0.5, 0.0), (-0.5, 0.0), 22 | (0.0, 1.0), 23 | (0.05, 1.0), 24 | (-0.05, 1.0), 25 | (0.10, 1.0), 26 | (-0.10, 1.0), 27 | ] 28 | 29 | DEFAULT_ACTION_SPACE_32 = [(0.0, 0.2), 30 | (0.02, 0.0), (-0.02, 0.0), 31 | (0.05, 0.0), (-0.05, 0.0), 32 | (0.1, 0.0), (-0.1, 0.0), 33 | (0.2, 0.0), (-0.2, 0.0), 34 | (0.3, 0.0), (-0.3, 0.0), 35 | (0.4, 0.0), (-0.4, 0.0), 36 | (0.5, 0.0), (-0.5, 0.0), 37 | (0.0, 0.5), (0.0, 1.0), 38 | (0.02, 0.5), (0.02, 1.0), 39 | (-0.02, 0.5), (-0.02, 1.0), 40 | (0.05, 0.5), (0.05, 1.0), 41 | (-0.05, 0.5), (-0.05, 1.0), 42 | (0.10, 0.5), (0.10, 1.0), 43 | (-0.10, 0.5), (-0.10, 1.0), 44 | (0.0, -0.2), 45 | (0.1, -0.2), (-0.1, -0.2) 46 | ] 47 | 48 | @njit(cache=True) 49 | def angle_normalization(t): 50 | while(t > PI): 51 | t -= t_PI 52 | while(t < -PI): 53 | t += t_PI 54 | return t 55 | 56 | @njit(cache=True) 57 | def nearest_point(pos, line_1, line_2): 58 | unit_ori = line_2 - line_1 59 | edge_norm = numpy.sqrt(numpy.sum(unit_ori * unit_ori)) 60 | unit_ori /= max(1.0e-6, edge_norm) 61 | 62 | dist_1 = numpy.sum((pos - line_1) * unit_ori) 63 | if(dist_1 > edge_norm): 64 | return numpy.sqrt(numpy.sum((pos - line_2) ** 2)), numpy.copy(line_2) 65 | elif(dist_1 < 0): 66 | return numpy.sqrt(numpy.sum((pos - line_1) ** 2)), numpy.copy(line_1) 67 | else: 68 | line_p = line_1 + dist_1 * unit_ori 69 | return numpy.sqrt(numpy.sum((pos - line_p) ** 2)), numpy.copy(line_p) 70 | 71 | @njit(cache=True) 72 | def collision_force(dist_vec, cell_size, col_dist): 73 | dist = float(numpy.sqrt(numpy.sum(dist_vec * dist_vec))) 74 | eff_col_dist = col_dist / cell_size 75 | if(dist > 0.708 + eff_col_dist): 76 | return numpy.array([0.0, 0.0], dtype="float64") 77 | if(abs(dist_vec[0]) < 0.5 and abs(dist_vec[1]) < 0.5): 78 | return numpy.float64(0.50 / max(dist, 1.0e-6) * (0.708 + eff_col_dist - dist) * cell_size) * dist_vec 79 | x_pos = (dist_vec[0] + dist_vec[1] > 0) 80 | y_pos = (dist_vec[1] - dist_vec[0] > 0) 81 | if(x_pos and y_pos): 82 | dist, np = nearest_point(dist_vec, OFFSET_10, OFFSET_01) 83 | elif((not x_pos) and y_pos): 84 | dist, np = nearest_point(dist_vec, OFFSET_01, OFFSET_m0) 85 | elif((not x_pos) and (not y_pos)): 86 | dist, np = nearest_point(dist_vec, OFFSET_m0, OFFSET_0m) 87 | elif(x_pos and (not y_pos)): 88 | dist, np = nearest_point(dist_vec, OFFSET_0m, OFFSET_10) 89 | 90 | if(eff_col_dist < dist): 91 | return numpy.array([0.0, 0.0], dtype="float64") 92 | else: 93 | ori = dist_vec - np 94 | ori_norm = numpy.sqrt(numpy.sum(ori * ori)) 95 | ori *= 1.0 / max(1.0e-6, ori_norm) 96 | return (0.50 * (eff_col_dist - dist) * cell_size) * ori 97 | 98 | @njit(cache=True) 99 | def vector_move_no_collision(ori, turn_rate, walk_speed, dt): 100 | d_theta = turn_rate * dt 101 | arc = walk_speed * dt 102 | c_theta = numpy.cos(ori) 103 | s_theta = numpy.sin(ori) 104 | c_dt = numpy.cos(0.5 * d_theta) 105 | s_dt = numpy.sin(0.5 * d_theta) 106 | 107 | n_ori = ori + d_theta 108 | # Shape it to [-PI, PI] 109 | n_ori = angle_normalization(n_ori) 110 | 111 | if(abs(d_theta) < 1.0e-8): 112 | d_x = c_theta * arc 113 | d_y = s_theta * arc 114 | else: 115 | # Turning Radius 116 | rad = walk_speed / turn_rate 117 | offset = 2.0 * s_dt * rad 118 | c_n = c_theta * c_dt - s_theta * s_dt 119 | s_n = c_theta * s_dt + s_theta * c_dt 120 | d_x = c_n * offset 121 | d_y = s_n * offset 122 | 123 | return n_ori, numpy.array([d_x, d_y], dtype="float64") 124 | 125 | @njit(cache=True) 126 | def search_optimal_action(ori, targ1, targ2, candidate_action, delta_t): 127 | d_targ1 = numpy.array(targ1, dtype=numpy.float64) 128 | if(targ2 is not None): 129 | d_targ2 = numpy.array(targ2, dtype=numpy.float64) 130 | costs = [] 131 | for action in candidate_action: 132 | tr = action[0] * PI 133 | ws = action[1] 134 | n_ori, n_loc = vector_move_no_collision(ori, tr, ws, delta_t) 135 | 136 | # The position error costs 137 | dist_loss = numpy.sum((n_loc - d_targ1) ** 2) 138 | dist = numpy.sqrt(dist_loss) 139 | cost = dist_loss 140 | 141 | # The action costs 142 | cost += 1.0e-4 * (action[0] ** 2 + action[1] ** 2) 143 | 144 | # The orientation costs 145 | targ1_ang = math.atan2(d_targ1[1], d_targ1[0]) 146 | delta1_ang = angle_normalization(targ1_ang - n_ori) 147 | delta2_ang = delta1_ang 148 | if(targ2 is not None): # Else try prepare for the next target 149 | targ2_ang = math.atan2(d_targ2[1], d_targ2[0]) 150 | delta2_ang = angle_normalization(targ2_ang - n_ori) 151 | 152 | # Try to face the next target by looking ahead 153 | f= min(dist/0.2, 1.0) 154 | cost += delta1_ang * delta1_ang * f + delta2_ang * delta2_ang * (1 - f) 155 | costs.append(cost) 156 | return numpy.argmin(numpy.array(costs)) 157 | 158 | def vector_move_with_collision(ori, pos, turn_rate, walk_speed, delta_t, cell_walls, cell_size, col_dist): 159 | slide_factor = 0.20 160 | tmp_pos = numpy.copy(numpy.array(pos, dtype="float64")) 161 | 162 | t_prec = 0.01 163 | iteration = int(delta_t / t_prec) 164 | collision = 0.0 165 | 166 | for i in range(iteration + 1): 167 | t_res = min(delta_t - i * t_prec, t_prec) 168 | if(t_res < 1.0e-8): 169 | continue 170 | ori, offset = vector_move_no_collision(ori, turn_rate, walk_speed, t_res) 171 | exp_pos = tmp_pos + offset 172 | exp_cell = exp_pos / cell_size 173 | 174 | #consider the collision in new cell 175 | col_f = numpy.array([0.0, 0.0], dtype="float64") 176 | for i in range(-1, 2): 177 | for j in range(-1, 2): 178 | w_i = i + int(exp_cell[0]) 179 | w_j = j + int(exp_cell[1]) 180 | if(w_i > -1 and w_i < cell_walls.shape[0] and w_j > -1 and w_j < cell_walls.shape[1]): 181 | if(cell_walls[w_i,w_j] > 0): 182 | cell_deta = exp_cell - numpy.floor(exp_cell) - numpy.array([i + 0.5, j + 0.5], dtype="float32") 183 | col_f += collision_force(cell_deta, cell_size, col_dist) 184 | tmp_pos = col_f + exp_pos 185 | collision += numpy.sqrt(numpy.sum(col_f ** 2)) 186 | 187 | return ori, tmp_pos, collision 188 | -------------------------------------------------------------------------------- /xenoverse/anyhvac/anyhvac_solver.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from xenoverse.anyhvac.anyhvac_env import HVACEnv 4 | from xenoverse.anyhvac.anyhvac_env_vis import HVACEnvVisible 5 | 6 | class HVACSolverGTPID: 7 | def __init__(self, env): 8 | 9 | self.env = env 10 | 11 | 12 | 13 | required_attrs = [ 14 | 'sensors', 'coolers', 'target_temperature', 15 | 'sec_per_step', 'lower_bound', 'upper_bound', 16 | 'include_time_in_observation', 'include_heat_in_observation' 17 | ] 18 | 19 | for attr in required_attrs: 20 | if not hasattr(env, attr): 21 | raise AttributeError(f"Missing required attribute: {attr}") 22 | setattr(self, attr, getattr(env, attr)) 23 | 24 | 25 | 26 | 27 | 28 | self.corr_sensor_cooler = [] 29 | for sensor in self.sensors: 30 | nx, ny = sensor.nloc 31 | # px, py = sensor.loc 32 | cooler_whts = numpy.asarray([cooler.cooler_diffuse[nx, ny] for cooler in self.coolers]) 33 | while(numpy.sum(cooler_whts) < 1.0e-6): 34 | cooler_whts *=10.0 35 | cooler_whts += 1.0e-12 36 | self.corr_sensor_cooler.append(cooler_whts) 37 | self.corr_sensor_cooler /= numpy.clip(numpy.sum(self.corr_sensor_cooler, axis=1, keepdims=True), a_min=1e-6, a_max=None) 38 | self.cooler_int = numpy.zeros(len(self.coolers)) 39 | self.minimum_action = numpy.ones(len(self.coolers)) * 0.01 40 | self.last_action = numpy.copy(self.minimum_action) 41 | self.acc_diff = numpy.zeros(len(self.sensors)) 42 | self.last_observation = numpy.array(self.env._get_obs()) 43 | self.ki = 2.0e-2 44 | self.kp = 5.0e-3 45 | self.kd = 5.0e-3 46 | self.delta_t = self.sec_per_step / 60 47 | 48 | def _extract_sensor_readings(self, observation_with_time): 49 | """ 50 | Extracts only the sensor readings from the observation vector, 51 | which might include a time component. 52 | """ 53 | obs_array = numpy.array(observation_with_time) 54 | if self.env.include_time_in_observation or self.env.include_heat_in_observation: 55 | 56 | if obs_array.shape[0] > len(self.sensors): 57 | return obs_array[:len(self.sensors)] 58 | 59 | elif obs_array.shape[0] == len(self.sensors): 60 | return obs_array 61 | else: 62 | raise ValueError(f"Observation shape {obs_array.shape} incompatible with num_sensors {len(self.sensors)} and include_time_in_observation=True") 63 | else: 64 | return obs_array # No time feature expected 65 | def policy(self, observation): 66 | # 兼容observation含有t的情况 67 | current_sensor_readings = self._extract_sensor_readings(observation) 68 | # print(current_sensor_readings.shape, current_sensor_readings) 69 | effective_target_temp = self.target_temperature 70 | 71 | # current_observation_arr = numpy.array(observation) 72 | current_observation_arr = numpy.array(current_sensor_readings) 73 | 74 | # diff calculation 75 | 76 | diff = effective_target_temp - current_observation_arr 77 | # print("diff",diff) 78 | if self.last_observation.shape != current_observation_arr.shape: 79 | self.last_observation = numpy.zeros_like(current_observation_arr) # Re-initialize if shape mismatch 80 | 81 | last_diff = effective_target_temp - self.last_observation 82 | 83 | # Ensure self.acc_diff has the same shape as diff 84 | if self.acc_diff.shape != diff.shape: 85 | self.acc_diff = numpy.zeros_like(diff) # Re-initialize if shape mismatch 86 | self.acc_diff += diff 87 | # d_e calculation: This seems to result in a per-sensor error signal vector 88 | d_e = - (self.kp * diff - self.kd * (diff - last_diff) / self.delta_t + self.ki * self.acc_diff) 89 | action_values_continuous = numpy.matmul(d_e, self.corr_sensor_cooler) 90 | switch_continuous = (action_values_continuous > -0.05).astype(numpy.float32) 91 | # Value part: Clipped continuous values 92 | value_clipped = numpy.clip(action_values_continuous, 0.0, 1.0) 93 | self.last_action = numpy.concatenate((switch_continuous, value_clipped)) # Store the flat action 94 | self.last_observation = numpy.copy(current_observation_arr) 95 | n_coolers = len(self.coolers) 96 | flat_action = numpy.zeros(2 * n_coolers, dtype=numpy.float32) 97 | flat_action[:n_coolers] = switch_continuous 98 | flat_action[n_coolers:] = value_clipped 99 | 100 | return flat_action 101 | def policy_mask(self, observation, mask=None): 102 | # 兼容observation含有t的情况 103 | current_sensor_readings = self._extract_sensor_readings(observation) 104 | effective_target_temp = self.target_temperature 105 | current_observation_arr = numpy.array(current_sensor_readings) 106 | 107 | # 处理mask参数 108 | n_coolers = len(self.coolers) 109 | if mask is None: 110 | mask = numpy.ones(n_coolers, dtype=bool) # 默认所有节点都受控 111 | elif len(mask) != n_coolers: 112 | raise ValueError(f"Mask size {len(mask)} doesn't match number of coolers {n_coolers}") 113 | 114 | # 检测mask变化并重置整个PID状态 115 | if not hasattr(self, 'last_mask') or self.last_mask is None: 116 | self.last_mask = numpy.copy(mask) 117 | print("init mask = ", mask) 118 | 119 | mask_changed = not numpy.array_equal(mask, self.last_mask) 120 | if mask_changed: 121 | # 当mask变化时,重置整个PID状态 122 | self.acc_diff = numpy.zeros_like(self.acc_diff) # 重置积分项 123 | # self.last_observation = numpy.zeros_like(current_observation_arr) # 重置上一次观测值 124 | self.last_mask = numpy.copy(mask) 125 | # print("mask changes: ", mask) 126 | 127 | # 计算温度差异 128 | diff = effective_target_temp - current_observation_arr 129 | 130 | # 初始化历史数据(如果形状不匹配) 131 | if self.last_observation.shape != current_observation_arr.shape: 132 | self.last_observation = numpy.zeros_like(current_observation_arr) 133 | 134 | last_diff = effective_target_temp - self.last_observation 135 | 136 | # 初始化累积误差(如果形状不匹配) 137 | if self.acc_diff.shape != diff.shape: 138 | self.acc_diff = numpy.zeros_like(diff) 139 | 140 | # 更新PID误差项 141 | self.acc_diff += diff 142 | 143 | # 计算PID控制信号 144 | d_e = - (self.kp * diff - self.kd * (diff - last_diff) / self.delta_t + self.ki * self.acc_diff) 145 | 146 | # 只计算受控节点的动作值 147 | active_corr_matrix = self.corr_sensor_cooler[:, mask] 148 | active_action_values = numpy.matmul(d_e, active_corr_matrix) 149 | 150 | # 创建完整尺寸的动作数组 151 | action_values_continuous = numpy.zeros(n_coolers, dtype=numpy.float32) 152 | action_values_continuous[mask] = active_action_values 153 | 154 | # 计算开关信号(只对受控节点) 155 | switch_continuous = numpy.zeros(n_coolers, dtype=numpy.float32) 156 | active_switch = (active_action_values > -0.05).astype(numpy.float32) 157 | switch_continuous[mask] = active_switch 158 | 159 | # 裁剪连续动作值(只对受控节点) 160 | value_clipped = numpy.zeros(n_coolers, dtype=numpy.float32) 161 | active_value_clipped = numpy.clip(active_action_values, 0.0, 1.0) 162 | value_clipped[mask] = active_value_clipped 163 | 164 | # 确保不受控节点的动作值为0 165 | non_controlled = ~mask 166 | switch_continuous[non_controlled] = 0.0 167 | value_clipped[non_controlled] = 0.0 168 | 169 | # 更新历史状态 170 | self.last_action = numpy.concatenate((switch_continuous, value_clipped)) 171 | self.last_observation = numpy.copy(current_observation_arr) 172 | 173 | # 构建最终动作向量 174 | flat_action = numpy.zeros(2 * n_coolers, dtype=numpy.float32) 175 | flat_action[:n_coolers] = switch_continuous 176 | flat_action[n_coolers:] = value_clipped 177 | 178 | return flat_action 179 | --------------------------------------------------------------------------------