├── coop_marl ├── models │ ├── __init__.py │ └── modules.py ├── envs │ ├── overcooked │ │ ├── gym_cooking │ │ │ ├── misc │ │ │ │ ├── __init__.py │ │ │ │ └── game │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── graphics │ │ │ │ │ ├── Plate.png │ │ │ │ │ ├── plate.png │ │ │ │ │ ├── blender.png │ │ │ │ │ ├── FreshOnion.png │ │ │ │ │ ├── agent-blue.png │ │ │ │ │ ├── arrow_down.png │ │ │ │ │ ├── arrow_left.png │ │ │ │ │ ├── arrow_up.png │ │ │ │ │ ├── blender2.png │ │ │ │ │ ├── blender3.png │ │ │ │ │ ├── cutboard.png │ │ │ │ │ ├── delivery.png │ │ │ │ │ ├── ChoppedOnion.png │ │ │ │ │ ├── FreshCarrot.png │ │ │ │ │ ├── FreshLettuce.png │ │ │ │ │ ├── FreshTomato.png │ │ │ │ │ ├── MashedCarrot.png │ │ │ │ │ ├── agent-green.png │ │ │ │ │ ├── agent-yellow.png │ │ │ │ │ ├── arrow_right.png │ │ │ │ │ ├── ChoppedCarrot.png │ │ │ │ │ ├── ChoppedLettuce.png │ │ │ │ │ ├── ChoppedTomato.png │ │ │ │ │ ├── agent-magenta.png │ │ │ │ │ ├── InProgressCarrot.png │ │ │ │ │ ├── ChoppedLettuce-ChoppedOnion.png │ │ │ │ │ ├── ChoppedOnion-ChoppedTomato.png │ │ │ │ │ ├── ChoppedLettuce-ChoppedTomato.png │ │ │ │ │ └── ChoppedLettuce-ChoppedOnion-ChoppedTomato.png │ │ │ │ │ ├── screenshots │ │ │ │ │ ├── open_room_blender_agents1_03-01-22_01-42-09.png │ │ │ │ │ └── open_room_blender_agents1_03-01-22_01-42-33.png │ │ │ │ │ └── utils.py │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ └── new_style_level │ │ │ │ │ ├── open_room_tomato_salad.json │ │ │ │ │ ├── open_room_salad_easy.json │ │ │ │ │ ├── full_divider_salad_easy.json │ │ │ │ │ ├── open_room_tomato_salad_r.json │ │ │ │ │ ├── open_room_blender.json │ │ │ │ │ ├── full_divider_salad_more_ingred.json │ │ │ │ │ ├── full_divider_salad_static.json │ │ │ │ │ ├── full_divider_salad_2.json │ │ │ │ │ ├── full_divider_salad_3.json │ │ │ │ │ ├── full_divider_salad_4.json │ │ │ │ │ ├── open_room_salad.json │ │ │ │ │ └── full_divider_salad.json │ │ │ ├── cooking_book │ │ │ │ ├── __init__.py │ │ │ │ ├── recipe.py │ │ │ │ └── recipe_drawer.py │ │ │ ├── cooking_world │ │ │ │ ├── __init__.py │ │ │ │ ├── constants.py │ │ │ │ ├── abstract_classes.py │ │ │ │ └── world_objects.py │ │ │ ├── environment │ │ │ │ ├── game │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── game.py │ │ │ │ ├── graphics │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── graphic_store.py │ │ │ │ ├── __init__.py │ │ │ │ └── environment.py │ │ │ ├── __init__.py │ │ │ ├── test.py │ │ │ └── demo_multiplayer_gameplay.py │ │ ├── setup.py │ │ └── overcooked_maker.py │ ├── __init__.py │ ├── gym_maker.py │ ├── mpe │ │ └── _mpe_utils │ │ │ └── simple_env.py │ └── one_step_matrix.py ├── worker │ └── __init__.py ├── evaluation │ └── __init__.py ├── controllers │ ├── __init__.py │ └── controllers.py ├── runners │ ├── __init__.py │ └── runners.py ├── utils │ ├── __init__.py │ ├── rebar │ │ ├── README.md │ │ ├── setup.py │ │ └── rebar │ │ │ ├── __init__.py │ │ │ ├── interrupting.py │ │ │ ├── storing.py │ │ │ ├── contextlib.py │ │ │ ├── widgets.py │ │ │ ├── stats │ │ │ ├── gpu.py │ │ │ ├── __init__.py │ │ │ ├── categories.py │ │ │ ├── writing.py │ │ │ └── reading.py │ │ │ ├── recurrence.py │ │ │ ├── paths.py │ │ │ ├── parallel.py │ │ │ ├── numpy.py │ │ │ ├── queuing.py │ │ │ └── logging.py │ ├── nn.py │ ├── metrics.py │ ├── logger.py │ ├── parser.py │ └── utils.py ├── agents │ ├── __init__.py │ └── agent.py └── trainers │ ├── __init__.py │ └── trainer.py ├── .gitignore ├── config ├── envs │ ├── rendezvous.yaml │ ├── one_step_matrix.yaml │ └── overcooked.yaml └── algs │ ├── maven │ ├── overcooked.yaml │ ├── rendezvous.yaml │ ├── one_step_matrix.yaml │ └── default.yaml │ ├── multi_sp │ ├── rendezvous.yaml │ ├── overcooked.yaml │ ├── one_step_matrix.yaml │ └── default.yaml │ ├── sp_mi │ ├── rendezvous.yaml │ ├── overcooked.yaml │ ├── one_step_matrix.yaml │ └── default.yaml │ ├── trajedi │ ├── rendezvous.yaml │ ├── overcooked.yaml │ ├── one_step_matrix.yaml │ └── default.yaml │ ├── incompat │ ├── rendezvous.yaml │ ├── overcooked.yaml │ ├── one_step_matrix.yaml │ └── default.yaml │ ├── meta │ ├── overcooked.yaml │ └── default.yaml │ └── default.yaml ├── setup.py ├── scripts ├── overcooked │ ├── multi_sp.sh │ ├── lipo.sh │ ├── trajedi.sh │ ├── multi_maven.sh │ └── multi_sp_mi.sh ├── pmr-c │ ├── multi_sp.sh │ ├── multi_maven.sh │ ├── sp_mi.sh │ ├── maven.sh │ ├── multi_sp_mi.sh │ ├── lipo.sh │ └── trajedi.sh ├── pmr-l │ ├── multi_sp.sh │ ├── sp_mi.sh │ ├── maven.sh │ ├── lipo.sh │ ├── multi_sp_mi.sh │ ├── multi_maven.sh │ └── trajedi.sh ├── cmg-h │ ├── multi_sp.sh │ ├── lipo.sh │ ├── sp_mi.sh │ ├── trajedi.sh │ ├── multi_sp_mi.sh │ ├── multi_maven.sh │ └── maven.sh └── cmg-s │ ├── multi_sp.sh │ ├── sp_mi.sh │ ├── multi_sp_mi.sh │ ├── trajedi.sh │ ├── lipo.sh │ ├── multi_maven.sh │ └── maven.sh ├── requirements.txt ├── install.sh ├── main.py ├── README.md └── gif_view.py /coop_marl/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/cooking_book/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/cooking_world/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/environment/game/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coop_marl/worker/__init__.py: -------------------------------------------------------------------------------- 1 | from .worker import RolloutWorker 2 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/environment/graphics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coop_marl/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from coop_marl.evaluation.eval import * 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.sublime-project 3 | 4 | *.sublime-workspace 5 | 6 | *__pycache__/ 7 | *.pyc 8 | *.egg-info/ 9 | 10 | *results* 11 | tmp/ 12 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/Plate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/Plate.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/plate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/plate.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/blender.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/blender.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/FreshOnion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/FreshOnion.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/agent-blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/agent-blue.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/arrow_down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/arrow_down.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/arrow_left.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/arrow_left.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/arrow_up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/arrow_up.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/blender2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/blender2.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/blender3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/blender3.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/cutboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/cutboard.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/delivery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/delivery.png -------------------------------------------------------------------------------- /config/envs/rendezvous.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # trainer_name: 3 | # k: v 4 | 5 | name: rendezvous 6 | horizon: 50 7 | n_landmarks: 4 8 | partner_obs: True 9 | mode: easy 10 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedOnion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedOnion.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/FreshCarrot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/FreshCarrot.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/FreshLettuce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/FreshLettuce.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/FreshTomato.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/FreshTomato.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/MashedCarrot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/MashedCarrot.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/agent-green.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/agent-green.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/agent-yellow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/agent-yellow.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/arrow_right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/arrow_right.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedCarrot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedCarrot.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedLettuce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedLettuce.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedTomato.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedTomato.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/agent-magenta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/agent-magenta.png -------------------------------------------------------------------------------- /coop_marl/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | from coop_marl.controllers.controllers import RandomController, PSController, MappingController 2 | 3 | __all__ = ['RandomController', 'PSController', 'MappingController'] -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/InProgressCarrot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/InProgressCarrot.png -------------------------------------------------------------------------------- /coop_marl/runners/__init__.py: -------------------------------------------------------------------------------- 1 | from coop_marl.runners.runners import EpisodesRunner, StepsRunner 2 | 3 | __all__ = ['EpisodesRunner', 'StepsRunner'] 4 | 5 | registered_runners = {a:eval(a) for a in __all__} 6 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedLettuce-ChoppedOnion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedLettuce-ChoppedOnion.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedOnion-ChoppedTomato.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedOnion-ChoppedTomato.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/environment/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_cooking.environment.environment import GymCookingEnvironment 2 | from gym_cooking.environment.cooking_zoo import CookingEnvironment as CookingZooEnvironment 3 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedLettuce-ChoppedTomato.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedLettuce-ChoppedTomato.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedLettuce-ChoppedOnion-ChoppedTomato.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/graphics/ChoppedLettuce-ChoppedOnion-ChoppedTomato.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup, find_packages 4 | 5 | setup(name='coop_marl', 6 | version='0.0.1', 7 | description='', 8 | packages=find_packages(), 9 | install_requires=[] 10 | ) -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/screenshots/open_room_blender_agents1_03-01-22_01-42-09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/screenshots/open_room_blender_agents1_03-01-22_01-42-09.png -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/screenshots/open_room_blender_agents1_03-01-22_01-42-33.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/51616/marl-lipo/HEAD/coop_marl/envs/overcooked/gym_cooking/misc/game/screenshots/open_room_blender_agents1_03-01-22_01-42-33.png -------------------------------------------------------------------------------- /config/algs/maven/overcooked.yaml: -------------------------------------------------------------------------------- 1 | def_config: !include config/algs/maven/default.yaml 2 | 3 | lr: 0.0005 4 | z_dim: 4 5 | n_iter: 30000 6 | n_sp_episodes: 8 7 | eval_interval: 10000 8 | discrim_coef: 0.1 9 | gamma: 0.99 10 | start_e: 1 11 | end_e: 0.05 12 | explore_decay_ts: 1000000 13 | -------------------------------------------------------------------------------- /config/algs/maven/rendezvous.yaml: -------------------------------------------------------------------------------- 1 | def_config: !include config/algs/maven/default.yaml 2 | 3 | lr: 0.0003 4 | z_dim: 4 5 | n_iter: 1000 6 | n_sp_episodes: 15 7 | eval_interval: 1000 8 | discrim_coef: 0.1 9 | gamma: 0.99 10 | start_e: 1 11 | end_e: 0.05 12 | explore_decay_ts: 200000 13 | -------------------------------------------------------------------------------- /config/algs/multi_sp/rendezvous.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | def_config: !include config/algs/multi_sp/default.yaml 7 | 8 | pop_size: 4 9 | n_iter: 300 10 | eval_interval: 100 11 | epcohs: 10 12 | num_mb: 2 13 | lr: 0.0003 14 | -------------------------------------------------------------------------------- /coop_marl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from rebar import dotdict, arrdict 2 | 3 | Dotdict = dotdict.dotdict 4 | Arrdict = arrdict.arrdict 5 | 6 | from .utils import * 7 | from .nn import * 8 | from .rl import * 9 | from .logger import * 10 | from .parser import * 11 | from .metrics import * 12 | 13 | -------------------------------------------------------------------------------- /coop_marl/utils/rebar/README.md: -------------------------------------------------------------------------------- 1 | # rebar 2 | Reinforcement learning utils from https://github.com/andyljones/megastep. 3 | 4 | rebar is Andy Jones’s personal reinforcement learning toolbox. 5 | I'm just merely using his toolkit in my project. All credit goes to [Andy Jones](https://andyljones.com/) 6 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register(id="cookingEnv-v1", 4 | entry_point="gym_cooking.environment:GymCookingEnvironment") 5 | register(id="cookingZooEnv-v0", 6 | entry_point="gym_cooking.environment:CookingZooEnvironment") 7 | -------------------------------------------------------------------------------- /config/algs/sp_mi/rendezvous.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | 7 | def_config: !include config/algs/sp_mi/default.yaml 8 | 9 | n_iter: 300 10 | eval_interval: 200 11 | z_dim: 4 12 | discrim_coef: 5.0 13 | epcohs: 10 14 | num_mb: 2 15 | lr: 0.0003 16 | -------------------------------------------------------------------------------- /config/envs/one_step_matrix.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # trainer_name: 3 | # k: v 4 | 5 | name: one_step_matrix 6 | n_conventions: 32 7 | k: [32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1] # 8 8 | payoffs: np.ones(n_conventions) # np.linspace(1.0,0.5,n_conventions) 9 | -------------------------------------------------------------------------------- /scripts/overcooked/multi_sp.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 444 555 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/multi_sp/overcooked.yaml \ 4 | --env_config_file config/envs/overcooked.yaml \ 5 | --config '{"pop_size": 8, "render": 0, "save_folder":"training_partners_8"}' --seed $seed 6 | done 7 | -------------------------------------------------------------------------------- /config/algs/trajedi/rendezvous.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | def_config: !include config/algs/trajedi/default.yaml 7 | 8 | n_iter: 300 9 | eval_interval: 200 10 | pop_size: 4 11 | diverse_coef: 10.0 12 | kernel_gamma: 0.0 13 | lr: 0.0003 14 | num_mb: 2 15 | epochs: 10 16 | -------------------------------------------------------------------------------- /config/algs/maven/one_step_matrix.yaml: -------------------------------------------------------------------------------- 1 | def_config: !include config/algs/maven/default.yaml 2 | 3 | render: False 4 | get_q_values: True 5 | lr: 0.0003 6 | z_dim: 32 7 | n_iter: 300 8 | n_sp_episodes: 100 9 | eval_interval: 500 10 | buffer_size: 10000 11 | discrim_coef: 10 12 | start_e: 1 13 | end_e: 0.05 14 | explore_decay_ts: 20000 15 | -------------------------------------------------------------------------------- /config/algs/multi_sp/overcooked.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | def_config: !include config/algs/multi_sp/default.yaml 7 | 8 | runner: StepsRunner 9 | pop_size: 4 10 | n_iter: 2000 11 | n_sp_ts: 10000 12 | eval_interval: 500 13 | epochs: 15 14 | num_mb: 5 15 | lr: 0.0005 16 | ent_coef: 0.03 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mlagents_envs==0.27.0 2 | PettingZoo==1.9.0 3 | pygame==2.0.1 4 | pymunk==6.0.0 5 | ray==1.9.0 6 | pygifsicle==1.0.5 7 | pyyaml-include==1.2.post2 8 | pyglet==1.5.15 9 | opencv-python==3.4.14.53 10 | gym==0.18.3 11 | tensorboard==2.8.0 12 | xvfbwrapper==0.2.9 13 | jupyterlab 14 | wandb 15 | matplotlib 16 | plotly 17 | tqdm 18 | -------------------------------------------------------------------------------- /coop_marl/utils/rebar/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup, find_packages 4 | 5 | setup(name='rebar', 6 | version='0.0.1', 7 | description='rebar is Andy Jones’s personal reinforcement learning toolbox.', 8 | packages=find_packages(), 9 | python_requires='>=3.6', 10 | install_requires=[] 11 | ) 12 | -------------------------------------------------------------------------------- /scripts/overcooked/lipo.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 444 555 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/incompat/overcooked.yaml \ 4 | --env_config_file config/envs/overcooked.yaml \ 5 | --config '{"discrim_coef": 0.5, "pop_size": 8, "render": 0, "save_folder": "training_partners_8", "xp_coef": 0.3, "z_dim": 8}' \ 6 | --seed $seed 7 | done -------------------------------------------------------------------------------- /scripts/overcooked/trajedi.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 444 555 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/trajedi/overcooked.yaml \ 4 | --env_config_file config/envs/overcooked.yaml \ 5 | --config '{"diverse_coef": 5, "kernel_gamma": 0.5, "pop_size": 8, "render": 0, "save_folder": "training_partners_8"}' --env_config '{}' --seed $seed 6 | done 7 | -------------------------------------------------------------------------------- /config/algs/sp_mi/overcooked.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | 7 | def_config: !include config/algs/sp_mi/default.yaml 8 | 9 | runner: StepsRunner 10 | n_iter: 2000 11 | n_sp_ts: 10000 12 | eval_interval: 500 13 | z_dim: 4 14 | discrim_coef: 10 15 | epochs: 15 16 | num_mb: 5 17 | lr: 0.0005 18 | ent_coef: 0.03 19 | -------------------------------------------------------------------------------- /config/algs/incompat/rendezvous.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | 7 | def_config: !include config/algs/incompat/default.yaml 8 | 9 | trainer: incompat 10 | n_iter: 300 11 | eval_interval: 300 12 | pop_size: 8 13 | z_dim: 4 14 | xp_coef: 1.0 15 | 16 | epcohs: 10 17 | num_mb: 2 18 | lr: 0.0003 19 | discrim_coef: 0.5 20 | -------------------------------------------------------------------------------- /config/algs/trajedi/overcooked.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | def_config: !include config/algs/trajedi/default.yaml 7 | 8 | runner: StepsRunner 9 | n_iter: 2000 10 | n_sp_ts: 10000 11 | eval_interval: 500 12 | pop_size: 4 13 | diverse_coef: 10.0 14 | kernel_gamma: 0.0 15 | lr: 0.0005 16 | num_mb: 5 17 | epochs: 15 18 | ent_coef: 0.03 19 | -------------------------------------------------------------------------------- /scripts/pmr-c/multi_sp.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 1 2 4 8 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/multi_sp/rendezvous.yaml \ 6 | --env_config_file config/envs/rendezvous.yaml \ 7 | --config '{"pop_size": '"${pop_size}"', "save_folder": "results_sweep_rendezvous"}' \ 8 | --env_config '{"mode": "easy"}' --seed $seed 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /scripts/pmr-l/multi_sp.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 1 2 4 8 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/multi_sp/rendezvous.yaml \ 6 | --env_config_file config/envs/rendezvous.yaml \ 7 | --config '{"pop_size": '"${pop_size}"', "save_folder": "results_sweep_rendezvous"}' \ 8 | --env_config '{"mode": "hard"}' --seed $seed 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='cooking-gym', 4 | version='0.0.1', 5 | description='Cooking gym with graphics and ideas based on: "Too Many Cooks: Overcooked environment"', 6 | author='David Rother, Rose E. Wang', 7 | email='david@edv-drucksysteme.de', 8 | packages=find_packages() + [""], 9 | install_requires=[] 10 | ) 11 | -------------------------------------------------------------------------------- /scripts/overcooked/multi_maven.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 444 555 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/maven/overcooked.yaml \ 4 | --env_config_file config/envs/overcooked.yaml \ 5 | --config '{"algo_name": "multi_maven", "discrim_coef": 5, "n_iter": 30000, "n_sp_episodes": 4, "pop_size": 8, "render": 0, "save_folder": "training_partners_8", "trainer": "incompat", "z_dim": 8}' --seed $seed 6 | done 7 | -------------------------------------------------------------------------------- /scripts/overcooked/multi_sp_mi.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 444 555 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/sp_mi/overcooked.yaml \ 4 | --env_config_file config/envs/overcooked.yaml \ 5 | --config '{"algo_name": "multi_sp_mi", "discrim_coef": 5, "n_sp_ts": 20000, "pop_size": 8, "render": 0, "save_folder": "training_partners_8", "trainer": "incompat", "z_dim": 8}' \ 6 | --env_config '{}' --seed $seed 7 | done 8 | -------------------------------------------------------------------------------- /scripts/cmg-h/multi_sp.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 8 16 32 64 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/multi_sp/one_step_matrix.yaml \ 6 | --env_config_file config/envs/one_step_matrix.yaml \ 7 | --config '{"pop_size": '"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_uneven_m32"}' \ 8 | --env_config '{}' --seed $seed 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /scripts/cmg-s/multi_sp.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 8 16 32 64 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/multi_sp/one_step_matrix.yaml \ 6 | --env_config_file config/envs/one_step_matrix.yaml \ 7 | --config '{"pop_size": '"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_k_8"}' \ 8 | --env_config '{"k": 8}' --seed $seed 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /config/algs/sp_mi/one_step_matrix.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | 7 | def_config: !include config/algs/sp_mi/default.yaml 8 | 9 | render: False 10 | get_act_dist: True 11 | n_iter: 300 12 | eval_interval: 300 13 | n_sp_episodes: 100 14 | n_xp_episodes: 100 15 | pop_size: 1 16 | z_dim: 32 17 | lr: 0.0003 18 | ent_coef: 0.0 19 | discrim_coef: 1.0 20 | xp_coef: 0.0 21 | num_mb: 2 22 | epochs: 10 23 | -------------------------------------------------------------------------------- /config/algs/incompat/overcooked.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | 7 | def_config: !include config/algs/incompat/default.yaml 8 | 9 | runner: StepsRunner 10 | trainer: incompat 11 | n_iter: 2000 12 | n_sp_ts: 10000 13 | n_xp_ts: 10000 14 | eval_interval: 500 15 | ent_coef: 0.03 16 | pop_size: 4 17 | z_dim: 8 18 | xp_coef: 0.2 19 | 20 | epochs: 15 21 | num_mb: 5 22 | lr: 0.0005 23 | discrim_coef: 0.1 24 | -------------------------------------------------------------------------------- /config/algs/multi_sp/one_step_matrix.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | 7 | def_config: !include config/algs/multi_sp/default.yaml 8 | 9 | render: False 10 | get_act_dist: True 11 | n_iter: 300 12 | eval_interval: 300 13 | n_sp_episodes: 100 14 | n_xp_episodes: 100 15 | pop_size: 32 16 | z_dim: 8 17 | lr: 0.0003 18 | ent_coef: 0.0 19 | discrim_coef: 0.0 20 | xp_coef: 0.0 21 | 22 | num_mb: 2 23 | epochs: 10 24 | -------------------------------------------------------------------------------- /scripts/cmg-h/lipo.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 8 16 32 64 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/incompat/one_step_matrix.yaml \ 6 | --env_config_file config/envs/one_step_matrix.yaml \ 7 | --config '{"num_xp_pair_sample": 64, "pop_size": '"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_uneven_m32", "xp_coef": 1}' --env_config '{}' --seed $seed 8 | done 9 | done 10 | -------------------------------------------------------------------------------- /config/algs/trajedi/one_step_matrix.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | def_config: !include config/algs/trajedi/default.yaml 7 | 8 | render: False 9 | get_act_dist: True 10 | n_iter: 300 11 | eval_interval: 300 12 | n_sp_episodes: 100 13 | n_xp_episodes: 100 14 | pop_size: 32 15 | z_dim: 8 16 | ent_coef: 0.0 17 | discrim_coef: 0.0 18 | diverse_coef: 10.0 19 | kernel_gamma: 0.0 20 | lr: 0.0003 21 | num_mb: 2 22 | epochs: 10 23 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/cooking_world/constants.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ChopFoodStates(Enum): 5 | FRESH = "Fresh" 6 | CHOPPED = "Chopped" 7 | 8 | 9 | class BlenderFoodStates(Enum): 10 | FRESH = "Fresh" 11 | IN_PROGRESS = "InProgress" 12 | MASHED = "Mashed" 13 | 14 | 15 | ONION_INIT_STATE = ChopFoodStates.FRESH 16 | TOMATO_INIT_STATE = ChopFoodStates.FRESH 17 | LETTUCE_INIT_STATE = ChopFoodStates.FRESH 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /config/algs/incompat/one_step_matrix.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | 7 | def_config: !include config/algs/incompat/default.yaml 8 | 9 | render: False 10 | eval_all_pairs: False 11 | get_act_dist: True 12 | n_iter: 300 13 | eval_interval: 300 14 | n_sp_episodes: 100 15 | n_xp_episodes: 100 16 | pop_size: 32 17 | z_dim: 8 18 | lr: 0.0003 19 | ent_coef: 0.0 20 | discrim_coef: 0.0 21 | xp_coef: 1.0 22 | num_xp_pair_sample: 32 23 | num_mb: 2 24 | epochs: 10 25 | -------------------------------------------------------------------------------- /scripts/cmg-h/sp_mi.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 8 16 32 64 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/sp_mi/one_step_matrix.yaml \ 6 | --env_config_file config/envs/one_step_matrix.yaml \ 7 | --config '{"discrim_coef": 50, "n_sp_episodes": 6400, "n_workers": 16, "save_folder": "results_sweep_one_step_matrix_uneven_m32", "trainer": "simple", "z_dim": '"${pop_size}"'}' \ 8 | --env_config '{}' --seed $seed 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /scripts/cmg-s/sp_mi.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 8 16 32 64 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/sp_mi/one_step_matrix.yaml \ 6 | --env_config_file config/envs/one_step_matrix.yaml \ 7 | --config '{"discrim_coef": 50, "n_sp_episodes": 6400, "n_workers": 16, "save_folder": "results_sweep_one_step_matrix_k_8", "trainer": "simple", "z_dim": '"${pop_size}"'}' \ 8 | --env_config '{"k": 8}' --seed $seed 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /config/envs/overcooked.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # trainer_name: 3 | # k: v 4 | 5 | name: overcooked 6 | mode: full_divider_salad_4 7 | horizon: 200 8 | recipes: ["LettuceSalad", 9 | "TomatoSalad", 10 | "TomatoLettuceSalad", 11 | "TomatoCarrotSalad", 12 | "ChoppedCarrot", 13 | "ChoppedOnion" 14 | ] 15 | obs_spaces: dense 16 | num_agents: 2 17 | interact_reward: 0.5 18 | progress_reward: 1.0 19 | complete_reward: 10.0 20 | step_cost: 0.05 21 | -------------------------------------------------------------------------------- /coop_marl/utils/rebar/rebar/__init__.py: -------------------------------------------------------------------------------- 1 | """**rebar** helps with reinforcement. That's why it's called rebar! It's a toolkit that has evolved 2 | as I've worked on RL projects. 3 | 4 | Unlike the :mod:`megastep` module which is stable, documented and feature-complete, rebar is an unstable, 5 | undocumented work-in-progress. It's in the megastep repo because megastep itself uses two of rebar's most useful components: 6 | :class:`~rebar.dotdict.dotdict` and :class:`~rebar.arrdict.arrdict`, while the demo uses a whole lot more. 7 | """ -------------------------------------------------------------------------------- /coop_marl/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from coop_marl.envs.gym_maker import GymMaker 2 | from coop_marl.envs.mpe.rendezvous import Rendezvous 3 | from coop_marl.envs.overcooked.overcooked_maker import OvercookedMaker 4 | from coop_marl.envs.one_step_matrix import OneStepMatrixGame 5 | 6 | registered_envs = {} 7 | registered_envs['gym_maker'] = GymMaker.make_env 8 | registered_envs['rendezvous'] = Rendezvous.make_env 9 | registered_envs['overcooked'] = OvercookedMaker.make_env 10 | registered_envs['one_step_matrix'] = OneStepMatrixGame.make_env 11 | -------------------------------------------------------------------------------- /scripts/cmg-s/multi_sp_mi.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 1 2 4 8 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/sp_mi/one_step_matrix.yaml \ 6 | --env_config_file config/envs/one_step_matrix.yaml \ 7 | --config '{"algo_name": "multi_sp_mi", "discrim_coef": 50, "n_sp_episodes": 800, "n_workers": 16, "pop_size": '"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_k_8", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 8 | --env_config '{"k": 8}' --seed $seed 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /scripts/pmr-c/multi_maven.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 1 2 4 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/maven/rendezvous.yaml \ 6 | --env_config_file config/envs/rendezvous.yaml \ 7 | --config '{"algo_name": "multi_maven", "discrim_coef": 10, "n_iter": 4000, "n_sp_episodes": 30, "n_workers": 16, "pop_size": '"${pop_size}"', "save_folder": "results_sweep_rendezvous", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 8 | --env_config '{"mode": "easy"}' --seed $seed 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /coop_marl/agents/__init__.py: -------------------------------------------------------------------------------- 1 | from coop_marl.agents.agent import Agent 2 | 3 | from coop_marl.agents.qmix import QMIXAgent 4 | from coop_marl.agents.incompat_mappo_z import IncompatMAPPOZ 5 | from coop_marl.agents.mappo_trajedi import MAPPOTrajeDiAgent 6 | from coop_marl.agents.mappo_rl2 import MAPPORL2Agent 7 | 8 | __all__ = ['Agent', 9 | 'QMIXAgent', 10 | 'IncompatMAPPOZ', 11 | 'MAPPOTrajeDiAgent', 12 | 'MAPPORL2Agent'] 13 | 14 | registered_agents = {a:eval(a) for a in __all__} # dict([(a,eval(a)) for a in __all__]) 15 | -------------------------------------------------------------------------------- /coop_marl/trainers/__init__.py: -------------------------------------------------------------------------------- 1 | from coop_marl.trainers.trainer import Trainer, trainer_setup, population_based_setup, population_evaluation, collect_data 2 | from coop_marl.trainers.simple import SimplePSTrainer 3 | from coop_marl.trainers.incompat import IncompatTrainer 4 | from coop_marl.trainers.trajedi import TrajeDiTrainer 5 | from coop_marl.trainers.meta import MetaTrainer 6 | 7 | registered_trainers = {} 8 | 9 | registered_trainers['simple'] = SimplePSTrainer 10 | registered_trainers['incompat'] = IncompatTrainer 11 | registered_trainers['trajedi'] = TrajeDiTrainer 12 | registered_trainers['meta'] = MetaTrainer 13 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sudo apt-get install gifsicle xvfb -y 3 | conda install -c conda-forge cudatoolkit=11.1 cudnn=8.4.1 4 | pip install --upgrade pip 5 | pip install setuptools==66 6 | pip install wheel==0.38.4 7 | pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html 8 | pip install -r requirements.txt 9 | pip install protobuf==3.20 10 | 11 | # install rebar 12 | cd coop_marl/utils/ 13 | git clone https://github.com/51616/rebar.git 14 | cd rebar 15 | python setup.py develop 16 | cd ../../.. 17 | 18 | cd coop_marl/envs/overcooked/ 19 | pip install -e . 20 | cd ../../.. 21 | 22 | # install coop_marl 23 | python setup.py develop 24 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/test.py: -------------------------------------------------------------------------------- 1 | from gym_cooking.environment import cooking_zoo 2 | 3 | n_agents = 1 4 | num_humans = 1 5 | max_steps = 100 6 | render = False 7 | 8 | level = 'open_room_salad_easy' 9 | seed = 1 10 | record = False 11 | max_num_timesteps = 1000 12 | recipes = ["LettuceSalad", 'LettuceSalad'] 13 | 14 | env = parallel_env = cooking_zoo.parallel_env(level=level, num_agents=n_agents, record=record, 15 | max_steps=max_num_timesteps, recipes=recipes, obs_spaces=["dense"], 16 | interact_reward=0.5, progress_reward=1.0, complete_reward=10.0, 17 | step_cost=0.05) 18 | obs = env.reset() 19 | print(obs) 20 | -------------------------------------------------------------------------------- /config/algs/meta/overcooked.yaml: -------------------------------------------------------------------------------- 1 | 2 | def_config: !include config/algs/meta/default.yaml 3 | 4 | runner: StepsRunner 5 | n_iter: 500 6 | n_ts: 320000 7 | eval_interval: 100 8 | n_eval_ep: 2 9 | render: False 10 | training_device: 'cuda' 11 | 12 | n_workers: 16 # 8 13 | critic_use_local_obs: True 14 | anneal_lr: True 15 | num_anneal_iter: 500 16 | min_anneal_lr: 0.0003 17 | lr: 0.0005 18 | gamma: 0.99 19 | gae_lambda: 0.95 20 | ent_coef: 0.03 21 | clip_param: 0.3 22 | vf_clip_param: 10 23 | vf_coef: 1.0 24 | max_len: 50 25 | num_seq_mb: 1600 # num_seq_mb * max_len timesteps per minibatch -> n_ts/(max_len*num_seq_mb) minibatches 26 | num_mb: 0 27 | mb_size: 0 28 | epochs: 15 29 | env_wrappers: [ZWrapper, AgentIDWrapper, StateWrapper] 30 | z_dim: 8 31 | 32 | partner_dir: [] 33 | partner_iterations: null 34 | -------------------------------------------------------------------------------- /scripts/cmg-h/trajedi.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 8 16 64 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/trajedi/one_step_matrix.yaml \ 6 | --env_config_file config/envs/one_step_matrix.yaml \ 7 | --config '{"diverse_coef": 0.1, "pop_size": '"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_uneven_m32"}' \ 8 | --env_config '{}' --seed $seed 9 | done 10 | done 11 | 12 | for seed in 111 222 333 13 | do 14 | xvfb-run -a python main.py --config_file config/algs/trajedi/one_step_matrix.yaml \ 15 | --env_config_file config/envs/one_step_matrix.yaml \ 16 | --config '{"diverse_coef": 0.2, "pop_size": 32, "save_folder": "results_sweep_one_step_matrix_uneven_m32"}' \ 17 | --env_config '{}' --seed $seed 18 | done 19 | -------------------------------------------------------------------------------- /scripts/cmg-s/trajedi.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/trajedi/one_step_matrix.yaml \ 4 | --env_config_file config/envs/one_step_matrix.yaml \ 5 | --config '{"diverse_coef": 0.05, "pop_size": 8, "save_folder": "results_sweep_one_step_matrix_k_8"}' \ 6 | --env_config '{"k": 8}' --seed $seed 7 | done 8 | 9 | for pop_size in 16 32 64 10 | do 11 | for seed in 111 222 333 12 | do 13 | xvfb-run -a python main.py --config_file config/algs/trajedi/one_step_matrix.yaml \ 14 | --env_config_file config/envs/one_step_matrix.yaml \ 15 | --config '{"diverse_coef": 0.01, "pop_size": '"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_k_8"}' \ 16 | --env_config '{"k": 8}' --seed $seed 17 | done 18 | done 19 | -------------------------------------------------------------------------------- /coop_marl/agents/agent.py: -------------------------------------------------------------------------------- 1 | from coop_marl.utils import Arrdict 2 | 3 | class Agent: 4 | # Every agent should have these functions 5 | def __init__(self, config): 6 | self.validate_config(config) 7 | 8 | def act(self, inp): 9 | raise NotImplementedError 10 | 11 | def preprocess(self, traj): 12 | raise NotImplementedError 13 | 14 | def train(self, batch): 15 | raise NotImplementedError 16 | 17 | # def get_dummy_decision(self): 18 | # raise NotImplementedError 19 | 20 | def get_prev_decision_view(self): 21 | # raise NotImplementedError 22 | return Arrdict() 23 | 24 | def reset(self): 25 | raise NotImplementedError 26 | 27 | def validate_config(self, config): 28 | raise NotImplementedError -------------------------------------------------------------------------------- /scripts/cmg-s/lipo.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/incompat/one_step_matrix.yaml \ 4 | --env_config_file config/envs/one_step_matrix.yaml \ 5 | --config '{"num_xp_pair_sample": 64, "pop_size": 8, "save_folder": "results_sweep_one_step_matrix_k_8", "xp_coef": 0.5}' \ 6 | --env_config '{"k": 8}' --seed $seed 7 | done 8 | 9 | for pop_size in 16 32 64 10 | do 11 | for seed in 111 222 333 12 | do 13 | xvfb-run -a python main.py --config_file config/algs/incompat/one_step_matrix.yaml \ 14 | --env_config_file config/envs/one_step_matrix.yaml \ 15 | --config '{"num_xp_pair_sample": 64, "pop_size":'"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_k_8", "xp_coef": 1}' \ 16 | --env_config '{"k": 8}' --seed $seed 17 | done 18 | done 19 | -------------------------------------------------------------------------------- /config/algs/default.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # trainer_name: 3 | # k: v 4 | trainer: simple 5 | runner: EpisodesRunner 6 | render: True 7 | render_only_sp: False 8 | render_mode: 'rgb_array' 9 | vary_z_eval: False 10 | n_workers: 16 11 | flatten_traj: True 12 | num_cpus: 1 13 | use_gpu: False 14 | debug: False 15 | device: 'cpu' 16 | n_grad_cum: 0 17 | training_device: 'cpu' 18 | save_folder: 'results' 19 | checkpoint: Null 20 | run_name: '' 21 | save_interval: 0 22 | 23 | anneal_lr: False 24 | l2_reg_coef: 0.0 25 | min_action: -1 26 | max_action: 1 27 | norm_obs: True 28 | norm_ret: True 29 | norm_state: True 30 | anneal_lr: False 31 | use_value_norm: False 32 | pol_init_var: 1.0 33 | hidden_size: 64 34 | num_hidden: 2 35 | clip_v_loss: True 36 | save_dir: Null 37 | shared_z: True 38 | 39 | use_bandit: False 40 | uniform_selector_keep_last: False 41 | -------------------------------------------------------------------------------- /config/algs/trajedi/default.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | def_config: !include config/algs/default.yaml 7 | 8 | render: True 9 | render_mode: 'rgb_array' 10 | num_cpus: 1 11 | use_gpu: False 12 | debug: False 13 | 14 | algo_name: trajedi 15 | trainer: trajedi 16 | runner: EpisodesRunner 17 | agent_name: MAPPOTrajeDiAgent 18 | 19 | use_br: False 20 | n_iter: 500 21 | pop_size: 2 22 | diverse_coef: 1.0 23 | kernel_gamma: 0.0 24 | flatten_traj: True 25 | 26 | eval_interval: 50 27 | n_sp_episodes: 50 28 | n_xp_episodes: 50 29 | n_eval_ep: 10 30 | 31 | z_dim: 4 32 | z_discrete: True 33 | gamma: 0.99 34 | lr: 0.0001 35 | vf_coef: 0.5 36 | ent_coef: 0.03 37 | epochs: 5 38 | num_mb: 3 39 | gae_lambda: 0.95 40 | clip_param: 0.3 41 | vf_clip_param: 10.0 42 | 43 | env_wrappers: [ZWrapper, AgentIDWrapper, StateWrapper] -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/misc/game/utils.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | 3 | 4 | class Color: 5 | BLACK = (0, 0, 0) 6 | FLOOR = (245, 230, 210) # light gray 7 | COUNTER = (220, 170, 110) # tan/gray 8 | COUNTER_BORDER = (114, 93, 51) # darker tan 9 | DELIVERY = (96, 96, 96) # grey 10 | 11 | 12 | KeyToTuple = { 13 | pygame.K_UP: (0, -1), # 273 14 | pygame.K_DOWN: (0, 1), # 274 15 | pygame.K_RIGHT: (1, 0), # 275 16 | pygame.K_LEFT: (-1, 0), # 276 17 | } 18 | 19 | KeyToTuple_human1 = { 20 | pygame.K_UP: 4, # 273 21 | pygame.K_DOWN: 3, # 274 22 | pygame.K_RIGHT: 2, # 275 23 | pygame.K_LEFT: 1, # 276 24 | pygame.K_SPACE: 0, 25 | pygame.K_f: 5 26 | } 27 | 28 | KeyToTuple_human2 = { 29 | pygame.K_w: (0, -1), 30 | pygame.K_s: (0, 1), 31 | pygame.K_d: (1, 0), 32 | pygame.K_a: (-1, 0), 33 | } 34 | -------------------------------------------------------------------------------- /coop_marl/utils/rebar/rebar/interrupting.py: -------------------------------------------------------------------------------- 1 | import signal 2 | from .contextlib import maybeasynccontextmanager 3 | import logging 4 | 5 | log = logging.getLogger(__name__) 6 | 7 | class Interrupter: 8 | 9 | def __init__(self): 10 | self._is_set = False 11 | 12 | def check(self): 13 | if self._is_set: 14 | self.reset() 15 | raise KeyboardInterrupt() 16 | 17 | def handle(self, signum, frame): 18 | log.info('Setting interrupt flag') 19 | self._is_set = True 20 | 21 | def reset(self): 22 | self._is_set = False 23 | 24 | _INTERRUPTER = Interrupter() 25 | 26 | @maybeasynccontextmanager 27 | def interrupter(): 28 | old = signal.signal(signal.SIGINT, _INTERRUPTER.handle) 29 | try: 30 | yield _INTERRUPTER 31 | finally: 32 | _INTERRUPTER.reset() 33 | signal.signal(signal.SIGINT, old) -------------------------------------------------------------------------------- /scripts/pmr-c/sp_mi.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 1 4 8 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/sp_mi/rendezvous.yaml \ 6 | --env_config_file config/envs/rendezvous.yaml \ 7 | --config '{"discrim_coef": 1, "n_sp_episodes": 400, "n_workers": 16, "save_folder": "results_sweep_rendezvous", "trainer": "simple", "z_dim": '"${pop_size}"'}' \ 8 | --env_config '{"mode": "easy"}' --seed $seed 9 | done 10 | done 11 | 12 | 13 | for seed in 111 222 333 14 | do 15 | xvfb-run -a python main.py --config_file config/algs/sp_mi/rendezvous.yaml \ 16 | --env_config_file config/envs/rendezvous.yaml \ 17 | --config '{"discrim_coef": 10, "n_sp_episodes": 400, "n_workers": 16, "save_folder": "results_sweep_rendezvous", "trainer": "simple", "z_dim": 2}' \ 18 | --env_config '{"mode": "easy"}' --seed $seed 19 | done 20 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/open_room_tomato_salad.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- -\n- -\n- -\n- -\n- -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 1, "X_POSITION": [1], "Y_POSITION": [6]}}, 4 | {"CutBoard": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [5]}}, 5 | {"DeliverSquare": {"COUNT": 1, "X_POSITION": [3], "Y_POSITION": [0]}}], 6 | "DYNAMIC_OBJECTS": [{"Plate": {"COUNT": 1, "X_POSITION": [4], "Y_POSITION": [0]}}, 7 | {"Plate": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [3]}}, 8 | {"Tomato": {"COUNT": 1, "X_POSITION": [2], "Y_POSITION": [6]}}, 9 | {"Tomato": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [4]}} 10 | ], 11 | "AGENTS": [{"MAX_COUNT": 4, "X_POSITION": [2, 3, 4], "Y_POSITION": [2, 3, 4]}] 12 | } -------------------------------------------------------------------------------- /config/algs/maven/default.yaml: -------------------------------------------------------------------------------- 1 | def_config: !include config/algs/default.yaml 2 | 3 | algo_name: maven 4 | trainer: simple 5 | agent_name: QMIXAgent 6 | runner: EpisodesRunner 7 | flatten_traj: False 8 | vary_z_eval: True 9 | n_iter: 500 10 | n_sp_episodes: 10 11 | n_eval_ep: 10 # for each z value 12 | eval_interval: 50 13 | hidden_dim: 64 14 | mixing_embed_dim: 32 15 | hypernet_embed: 128 16 | buffer_size: 1000 17 | batch_size: 128 18 | 19 | maven: True 20 | discrim_coef: 0.1 21 | z_dim: 4 22 | z_discrete: True 23 | discrim_hidden_dim: 64 24 | z_policy: False 25 | 26 | lr: 0.001 27 | gamma: 0.99 28 | start_e: 1 29 | end_e: 0.05 30 | explore_decay_ts: 100000 31 | target_update_freq: 25 32 | env_wrappers: [ZWrapper, AgentIDWrapper, StateWrapper] 33 | 34 | # for incompat trainer 35 | pop_size: 1 36 | num_xp_pair_sample: 0 37 | use_bandit: False 38 | pg_xp_max_only: False 39 | value_xp_max_only: False 40 | eval_all_pairs: False 41 | -------------------------------------------------------------------------------- /scripts/pmr-c/maven.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 1 2 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/maven/rendezvous.yaml \ 6 | --env_config_file config/envs/rendezvous.yaml \ 7 | --config '{"discrim_coef": 10, "n_iter": 4000, "n_sp_episodes": 30, "n_workers": 16, "save_folder": "results_sweep_rendezvous", "trainer": "simple", "z_dim": '"${pop_size}"'}' --env_config '{"mode": "easy"}' --seed $seed 8 | done 9 | done 10 | 11 | for pop_size in 4 8 12 | do 13 | for seed in 111 222 333 14 | do 15 | xvfb-run -a python main.py --config_file config/algs/maven/rendezvous.yaml \ 16 | --env_config_file config/envs/rendezvous.yaml \ 17 | --config '{"discrim_coef": 1, "n_iter": 4000, "n_sp_episodes": 30, "n_workers": 16, "save_folder": "results_sweep_rendezvous", "trainer": "simple", "z_dim": '"${pop_size}"'}' --env_config '{"mode": "easy"}' --seed $seed 18 | done 19 | done 20 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/open_room_salad_easy.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- -\n- -\n- -\n- -\n- -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 2, "X_POSITION": [0,6], "Y_POSITION": [3]}}, 4 | {"DeliverSquare": {"COUNT": 2, "X_POSITION": [0,6], "Y_POSITION": [4]}} 5 | ], 6 | "DYNAMIC_OBJECTS": [ 7 | {"Plate": {"COUNT": 2, "X_POSITION": [0,6], "Y_POSITION": [1]}}, 8 | {"Lettuce": {"COUNT": 2, "X_POSITION": [0,6], "Y_POSITION": [5]}}, 9 | {"Tomato": {"COUNT": 2, "X_POSITION": [1,2], "Y_POSITION": [0]}}, 10 | {"Onion": {"COUNT": 2, "X_POSITION": [4,5], "Y_POSITION": [0]}}, 11 | {"Carrot": {"COUNT": 2, "X_POSITION": [2,4], "Y_POSITION": [6]}} 12 | ], 13 | "AGENTS": [{"MAX_COUNT": 4, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [1 ,2, 3, 4, 5]}] 14 | } -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/full_divider_salad_easy.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- - -\n- - -\n- - -\n- - -\n- - -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 2, "X_POSITION": [0,6], "Y_POSITION": [3]}}, 4 | {"DeliverSquare": {"COUNT": 2, "X_POSITION": [0,6], "Y_POSITION": [4]}} 5 | ], 6 | "DYNAMIC_OBJECTS": [ 7 | {"Plate": {"COUNT": 2, "X_POSITION": [0,6], "Y_POSITION": [1]}}, 8 | {"Lettuce": {"COUNT": 2, "X_POSITION": [0,6], "Y_POSITION": [5]}}, 9 | {"Tomato": {"COUNT": 2, "X_POSITION": [1,2], "Y_POSITION": [0]}}, 10 | {"Onion": {"COUNT": 2, "X_POSITION": [4,5], "Y_POSITION": [0]}}, 11 | {"Carrot": {"COUNT": 2, "X_POSITION": [2,4], "Y_POSITION": [6]}} 12 | ], 13 | "AGENTS": [{"MAX_COUNT": 4, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [1 ,2, 3, 4, 5]}] 14 | } -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/open_room_tomato_salad_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- -\n- -\n- -\n- -\n- -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}, 4 | {"CutBoard": {"COUNT": 1, "X_POSITION": [0, 6], "Y_POSITION": [1 ,2, 3, 4, 5]}}, 5 | {"DeliverSquare": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}], 6 | "DYNAMIC_OBJECTS": [{"Plate": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}, 7 | {"Plate": {"COUNT": 1, "X_POSITION": [0, 6], "Y_POSITION": [1 ,2, 3, 4, 5]}}, 8 | {"Tomato": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}, 9 | {"Tomato": {"COUNT": 1, "X_POSITION": [0, 6], "Y_POSITION": [1 ,2, 3, 4, 5]}} 10 | ], 11 | "AGENTS": [{"MAX_COUNT": 4, "X_POSITION": [2, 3, 4], "Y_POSITION": [2, 3, 4]}] 12 | } -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/open_room_blender.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- -\n- -\n- -\n- -\n- -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}, 4 | {"Blender": {"COUNT": 1, "X_POSITION": [0, 6], "Y_POSITION": [1 ,2, 3, 4, 5]}}, 5 | {"DeliverSquare": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}], 6 | "DYNAMIC_OBJECTS": [{"Plate": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}, 7 | {"Plate": {"COUNT": 1, "X_POSITION": [0, 6], "Y_POSITION": [1 ,2, 3, 4, 5]}}, 8 | {"Lettuce": {"COUNT": 1, "X_POSITION": [0, 6], "Y_POSITION": [1 ,2, 3, 4, 5]}}, 9 | {"Carrot": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}} 10 | ], 11 | "AGENTS": [{"MAX_COUNT": 4, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [1 ,2, 3, 4, 5]}] 12 | } -------------------------------------------------------------------------------- /scripts/pmr-c/multi_sp_mi.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/sp_mi/rendezvous.yaml \ 4 | --env_config_file config/envs/rendezvous.yaml \ 5 | --config '{"algo_name": "multi_sp_mi", "discrim_coef": 1, "n_sp_episodes": 400, "n_workers": 16, "pop_size": 1, "save_folder": "results_sweep_rendezvous", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 6 | --env_config '{"mode": "easy"}' --seed $seed 7 | done 8 | 9 | for pop_size in 2 4 10 | do 11 | for seed in 111 222 333 12 | do 13 | xvfb-run -a python main.py --config_file config/algs/sp_mi/rendezvous.yaml \ 14 | --env_config_file config/envs/rendezvous.yaml \ 15 | --config '{"algo_name": "multi_sp_mi", "discrim_coef": 1, "n_sp_episodes": 400, "n_workers": 16, "pop_size": '"${pop_size}"', "save_folder": "results_sweep_rendezvous", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 16 | --env_config '{"mode": "easy"}' --seed $seed 17 | done 18 | done 19 | -------------------------------------------------------------------------------- /scripts/cmg-s/multi_maven.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/maven/one_step_matrix.yaml \ 4 | --env_config_file config/envs/one_step_matrix.yaml \ 5 | --config '{"algo_name": "multi_maven", "discrim_coef": 5, "n_sp_episodes": 800, "n_workers": 16, "pop_size": 1, "save_folder": "results_sweep_one_step_matrix_k_8", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 6 | --env_config '{"k": 8}' --seed $seed 7 | done 8 | 9 | for pop_size in 2 4 8 10 | do 11 | for seed in 111 222 333 12 | do 13 | xvfb-run -a python main.py --config_file config/algs/maven/one_step_matrix.yaml \ 14 | --env_config_file config/envs/one_step_matrix.yaml \ 15 | --config '{"algo_name": "multi_maven", "discrim_coef": 1, "n_sp_episodes": 800, "n_workers": 16, "pop_size": '"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_k_8", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 16 | --env_config '{"k": 8}' --seed $seed 17 | done 18 | done -------------------------------------------------------------------------------- /scripts/cmg-h/multi_sp_mi.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/sp_mi/one_step_matrix.yaml \ 4 | --env_config_file config/envs/one_step_matrix.yaml \ 5 | --config '{"algo_name": "multi_sp_mi", "discrim_coef": 10, "n_sp_episodes": 800, "n_workers": 16, "pop_size": 1, "save_folder": "results_sweep_one_step_matrix_uneven_m32", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 6 | --env_config {} --seed $seed 7 | done 8 | 9 | for pop_size in 2 4 8 10 | do 11 | for seed in 111 222 333 12 | do 13 | xvfb-run -a python main.py --config_file config/algs/sp_mi/one_step_matrix.yaml \ 14 | --env_config_file config/envs/one_step_matrix.yaml \ 15 | --config '{"algo_name": "multi_sp_mi", "discrim_coef": 50, "n_sp_episodes": 800, "n_workers": 16, "pop_size": '"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_uneven_m32", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 16 | --env_config {} --seed $seed 17 | done 18 | done 19 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/environment/graphics/graphic_store.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym_cooking.cooking_world.world_objects import * 3 | from collections import namedtuple 4 | 5 | 6 | GraphicScaling = namedtuple("GraphicScaling", ["holding_scale", "container_scale"]) 7 | 8 | 9 | class GraphicStore: 10 | 11 | OBJECT_PROPERTIES = {Blender: GraphicScaling(None, 0.5)} 12 | 13 | def __init__(self, world_height, world_width): 14 | self.scale = 80 # num pixels per tile 15 | self.holding_scale = 0.5 16 | self.container_scale = 0.7 17 | self.width = self.scale * world_width 18 | self.height = self.scale * world_height 19 | self.tile_size = (self.scale, self.scale) 20 | self.holding_size = tuple((self.holding_scale * np.asarray(self.tile_size)).astype(int)) 21 | self.container_size = tuple((self.container_scale * np.asarray(self.tile_size)).astype(int)) 22 | self.holding_container_size = tuple((self.container_scale * np.asarray(self.holding_size)).astype(int)) 23 | 24 | 25 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/full_divider_salad_more_ingred.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- - -\n- - -\n- - -\n- - -\n- - -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [3]}}, 4 | {"DeliverSquare": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [4]}} 5 | ], 6 | "DYNAMIC_OBJECTS": [ 7 | {"Plate": {"COUNT": 2, "X_POSITION": [0,6], "Y_POSITION": [1]}}, 8 | {"Lettuce": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [5]}}, 9 | {"Tomato": {"COUNT": 2, "X_POSITION": [6], "Y_POSITION": [2,3]}}, 10 | {"Onion": {"COUNT": 2, "X_POSITION": [4,5], "Y_POSITION": [0]}}, 11 | {"Carrot": {"COUNT": 2, "X_POSITION": [4,5], "Y_POSITION": [6]}} 12 | ], 13 | "AGENTS": [{"MAX_COUNT": 1, "X_POSITION": [1 ,2], "Y_POSITION": [1 ,2, 3, 4, 5]}, 14 | {"MAX_COUNT": 1, "X_POSITION": [4 ,5], "Y_POSITION": [1 ,2, 3, 4, 5]} 15 | ] 16 | } -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/full_divider_salad_static.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- - -\n- - -\n- - -\n- - -\n- - -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [3]}}, 4 | {"DeliverSquare": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [3]}} 5 | ], 6 | "DYNAMIC_OBJECTS": [ 7 | {"Plate": {"COUNT": 1, "X_POSITION": [3], "Y_POSITION": [3]}}, 8 | {"Lettuce": {"COUNT": 2, "X_POSITION": [0,6], "Y_POSITION": [5]}}, 9 | {"Tomato": {"COUNT": 2, "X_POSITION": [1,2], "Y_POSITION": [0]}}, 10 | {"Onion": {"COUNT": 2, "X_POSITION": [4,5], "Y_POSITION": [0]}}, 11 | {"Carrot": {"COUNT": 2, "X_POSITION": [2,4], "Y_POSITION": [6]}} 12 | ], 13 | "AGENTS": [{"MAX_COUNT": 1, "X_POSITION": [1 ,2], "Y_POSITION": [1 ,2, 3, 4, 5]}, 14 | {"MAX_COUNT": 1, "X_POSITION": [4 ,5], "Y_POSITION": [1 ,2, 3, 4, 5]} 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /coop_marl/utils/rebar/rebar/storing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pickle 3 | from . import paths 4 | import time 5 | 6 | def store_latest(run_name, objs, throttle=0): 7 | path = paths.path(run_name, 'storing').with_suffix('.pkl') 8 | if path.exists(): 9 | if (time.time() - path.lstat().st_mtime) < throttle: 10 | return False 11 | 12 | state_dicts = {k: v.state_dict() for k, v in objs.items()} 13 | bs = pickle.dumps(state_dicts) 14 | path.with_suffix('.tmp').write_bytes(bs) 15 | path.with_suffix('.tmp').rename(path) 16 | 17 | return True 18 | 19 | def runs(): 20 | return paths.runs() 21 | 22 | def stored(run_name=-1): 23 | ps = paths.subdirectory(run_name, 'storing').glob('*.pkl') 24 | infos = [] 25 | for p in ps: 26 | infos.append({ 27 | **paths.parse(p), 28 | 'path': p}) 29 | 30 | return pd.DataFrame(infos) 31 | 32 | def load(run_name=-1, procname='MainProcess'): 33 | path = stored(run_name).loc[lambda df: df.procname == procname].iloc[-1].path 34 | return pickle.loads(path.read_bytes()) -------------------------------------------------------------------------------- /config/algs/meta/default.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | def_config: !include config/algs/default.yaml 7 | 8 | algo_name: meta_rl # used for creating a save directory 9 | trainer: meta 10 | runner: EpisodesRunner 11 | agent_name: MAPPORL2Agent 12 | render: True 13 | render_mode: 'rgb_array' 14 | eval_interval: 50 15 | n_eval_ep: 10 16 | num_cpus: 1 17 | use_gpu: False 18 | debug: False 19 | 20 | n_workers: 4 21 | n_iter: 400 22 | eval_interval: 50 23 | n_episodes: 50 24 | n_ts: 5000 25 | n_eval_ep: 10 26 | z_dim: 4 27 | z_discrete: True 28 | flatten_traj: True 29 | critic_use_local_obs: False 30 | 31 | hidden_size: 256 32 | lr: 0.001 33 | gamma: 0.99 34 | gae_lambda: 0.95 35 | ent_coef: 0.01 36 | clip_param: 0.3 37 | vf_clip_param: 10 38 | vf_coef: 1.0 39 | max_len: 50 40 | num_seq_mb: 100 # 100*50 timesteps per minibatch 41 | num_mb: 0 42 | mb_size: 0 43 | epochs: 10 44 | env_wrappers: [ZWrapper, AgentIDWrapper, StateWrapper] 45 | shared_z: False 46 | 47 | partner_dir: [] 48 | partner_iterations: [] 49 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/full_divider_salad_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- - -\n- - -\n- - -\n- - -\n- - -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [3]}}, 4 | {"DeliverSquare": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [3]}} 5 | ], 6 | "DYNAMIC_OBJECTS": [ 7 | {"Plate": {"COUNT": 1, "X_POSITION": [3], "Y_POSITION": [1,2,3,4,5]}}, 8 | {"Lettuce": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [1,2,3,4,5]}}, 9 | {"Tomato": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [1,2,3,4,5]}}, 10 | {"Onion": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [1,2,3,4,5]}}, 11 | {"Carrot": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [1,2,3,4,5]}} 12 | ], 13 | "AGENTS": [{"MAX_COUNT": 1, "X_POSITION": [1 ,2], "Y_POSITION": [1 ,2, 3, 4, 5]}, 14 | {"MAX_COUNT": 1, "X_POSITION": [4 ,5], "Y_POSITION": [1 ,2, 3, 4, 5]} 15 | ] 16 | } -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/full_divider_salad_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- - -\n- - -\n- - -\n- - -\n- - -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [3]}}, 4 | {"DeliverSquare": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [3]}} 5 | ], 6 | "DYNAMIC_OBJECTS": [ 7 | {"Plate": {"COUNT": 1, "X_POSITION": [3], "Y_POSITION": [1,2,3,4,5]}}, 8 | {"Lettuce": {"COUNT": 2, "X_POSITION": [0], "Y_POSITION": [1,2,3,4,5]}}, 9 | {"Tomato": {"COUNT": 2, "X_POSITION": [6], "Y_POSITION": [1,2,3,4,5]}}, 10 | {"Onion": {"COUNT": 2, "X_POSITION": [6], "Y_POSITION": [1,2,3,4,5]}}, 11 | {"Carrot": {"COUNT": 2, "X_POSITION": [0], "Y_POSITION": [1,2,3,4,5]}} 12 | ], 13 | "AGENTS": [{"MAX_COUNT": 1, "X_POSITION": [1 ,2], "Y_POSITION": [1 ,2, 3, 4, 5]}, 14 | {"MAX_COUNT": 1, "X_POSITION": [4 ,5], "Y_POSITION": [1 ,2, 3, 4, 5]} 15 | ] 16 | } -------------------------------------------------------------------------------- /scripts/cmg-h/multi_maven.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 1 4 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/maven/one_step_matrix.yaml \ 6 | --env_config_file config/envs/one_step_matrix.yaml \ 7 | --config '{"algo_name": "multi_maven", "discrim_coef": 1, "n_sp_episodes": 800, "n_workers": 16, "pop_size": '"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_uneven_m32", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 8 | --env_config '{}' --seed $seed 9 | done 10 | done 11 | 12 | for pop_size in 2 8 13 | do 14 | for seed in 111 222 333 15 | do 16 | xvfb-run -a python main.py --config_file config/algs/maven/one_step_matrix.yaml \ 17 | --env_config_file config/envs/one_step_matrix.yaml \ 18 | --config '{"algo_name": "multi_maven", "discrim_coef": 5, "n_sp_episodes": 800, "n_workers": 16, "pop_size": '"${pop_size}"', "save_folder": "results_sweep_one_step_matrix_uneven_m32", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 19 | --env_config '{}' --seed $seed 20 | done 21 | done -------------------------------------------------------------------------------- /scripts/pmr-c/lipo.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 1 4 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/incompat/rendezvous.yaml \ 6 | --env_config_file config/envs/rendezvous.yaml \ 7 | --config '{"pop_size": '"${pop_size}"', "save_folder": "results_sweep_rendezvous", "xp_coef": 0.5}' \ 8 | --env_config '{"mode": "easy"}' --seed $seed 9 | done 10 | done 11 | 12 | for seed in 111 222 333 13 | do 14 | xvfb-run -a python main.py --config_file config/algs/incompat/rendezvous.yaml \ 15 | --env_config_file config/envs/rendezvous.yaml \ 16 | --config '{"pop_size": 2, "save_folder": "results_sweep_rendezvous", "xp_coef": 0.1}' \ 17 | --env_config '{"mode": "easy"}' --seed $seed 18 | done 19 | 20 | for seed in 111 222 333 21 | do 22 | xvfb-run -a python main.py --config_file config/algs/incompat/rendezvous.yaml \ 23 | --env_config_file config/envs/rendezvous.yaml \ 24 | --config '{"pop_size": 8, "save_folder": "results_sweep_rendezvous", "xp_coef": 0.25}' \ 25 | --env_config '{"mode": "easy"}' --seed $seed 26 | done 27 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/full_divider_salad_4.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- - -\n- - -\n- - -\n- - -\n- - -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [3]}}, 4 | {"DeliverSquare": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [3]}} 5 | ], 6 | "DYNAMIC_OBJECTS": [ 7 | {"Plate": {"COUNT": 1, "X_POSITION": [3], "Y_POSITION": [1,2,3,4,5]}}, 8 | {"Lettuce": {"COUNT": 1, "X_POSITION": [0,3], "Y_POSITION": [1,2,3,4,5]}}, 9 | {"Tomato": {"COUNT": 1, "X_POSITION": [3,6], "Y_POSITION": [1,2,3,4,5]}}, 10 | {"Onion": {"COUNT": 1, "X_POSITION": [0,3], "Y_POSITION": [1,2,3,4,5]}}, 11 | {"Carrot": {"COUNT": 1, "X_POSITION": [3,6], "Y_POSITION": [1,2,3,4,5]}} 12 | ], 13 | "AGENTS": [{"MAX_COUNT": 1, "X_POSITION": [1 ,2], "Y_POSITION": [1 ,2, 3, 4, 5]}, 14 | {"MAX_COUNT": 1, "X_POSITION": [4 ,5], "Y_POSITION": [1 ,2, 3, 4, 5]} 15 | ] 16 | } -------------------------------------------------------------------------------- /coop_marl/utils/nn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | FLOAT_MIN = -3.4e38 5 | FLOAT_MAX = 3.4e38 6 | 7 | # https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppo_continuous_action.py 8 | def ortho_layer_init(layer, std=np.sqrt(2), bias_const=0.0): 9 | torch.nn.init.orthogonal_(layer.weight, std) 10 | torch.nn.init.constant_(layer.bias, bias_const) 11 | return layer 12 | 13 | def k_uniform_init(layer, a=0, mode='fan_in', nonlinearity='leaky_relu'): 14 | torch.nn.init.kaiming_uniform_(layer.weight, a, mode, nonlinearity) 15 | torch.nn.init.constant_(layer.bias, 0) 16 | return layer 17 | 18 | def dict_to_tensor(obs_dict, device, axis=0, dtype=torch.float): 19 | # takes a dict of obs (e.g. player->obs) and returns tensor of obs as [N_player, obs_dim] 20 | return torch.stack([torch.as_tensor(o, dtype=dtype, device=device) for o in obs_dict.values()], axis=axis) 21 | 22 | def dict_to_np(obs_dict,*, axis=0, dtype=np.float32): 23 | # takes a dict of obs (e.g. player->obs) and returns tensor of obs as [N_player, obs_dim] 24 | return np.stack([np.array(o, dtype=dtype) for o in obs_dict.values()], axis=axis) -------------------------------------------------------------------------------- /config/algs/sp_mi/default.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | def_config: !include config/algs/default.yaml 7 | 8 | trainer: incompat 9 | runner: EpisodesRunner 10 | algo_name: sp_mi # used for creating a save directory 11 | render: True 12 | render_mode: 'rgb_array' 13 | eval_interval: 50 14 | n_eval_ep: 10 15 | num_cpus: 1 16 | use_gpu: False 17 | debug: False 18 | agent_name: IncompatMAPPOZ 19 | flatten_traj: False 20 | vary_z_eval: True 21 | eval_all_pairs: False 22 | num_xp_pair_sample: 0 23 | 24 | parent_only: False 25 | n_iter: 400 26 | eval_interval: 50 27 | n_sp_episodes: 50 28 | n_xp_episodes: 50 29 | n_eval_ep: 10 30 | z_dim: 4 31 | z_discrete: True 32 | pop_size: 1 33 | flatten_traj: False 34 | pg_xp_max_only: True 35 | value_xp_max_only: False 36 | anneal_xp: False 37 | 38 | lr: 0.0001 39 | use_hypernet: False 40 | # hyper_l2_reg_coef: 0.0001 41 | xp_coef: 0.0 42 | discrim_coef: 0.1 43 | gamma: 0.99 44 | gae_lambda: 0.95 45 | ent_coef: 0.03 46 | clip_param: 0.3 47 | vf_clip_param: 10 48 | vf_coef: 0.5 49 | num_mb: 5 50 | epochs: 3 51 | env_wrappers: [ZWrapper, AgentIDWrapper, StateWrapper] 52 | -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/open_room_salad.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- -\n- -\n- -\n- -\n- -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}, 4 | {"CutBoard": {"COUNT": 1, "X_POSITION": [0, 6], "Y_POSITION": [1 ,2, 3, 4, 5]}}, 5 | {"DeliverSquare": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}], 6 | "DYNAMIC_OBJECTS": [{"Plate": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}, 7 | {"Plate": {"COUNT": 1, "X_POSITION": [0, 6], "Y_POSITION": [1 ,2, 3, 4, 5]}}, 8 | {"Lettuce": {"COUNT": 1, "X_POSITION": [0, 6], "Y_POSITION": [1 ,2, 3, 4, 5]}}, 9 | {"Tomato": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}, 10 | {"Onion": {"COUNT": 1, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [0, 6]}}, 11 | {"Tomato": {"COUNT": 1, "X_POSITION": [0, 6], "Y_POSITION": [1 ,2, 3, 4, 5]}} 12 | ], 13 | "AGENTS": [{"MAX_COUNT": 4, "X_POSITION": [1 ,2, 3, 4, 5], "Y_POSITION": [1 ,2, 3, 4, 5]}] 14 | } -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/environment/environment.py: -------------------------------------------------------------------------------- 1 | from gym_cooking.environment import cooking_zoo 2 | from gym.utils import seeding 3 | 4 | import gym 5 | 6 | 7 | class GymCookingEnvironment(gym.Env): 8 | """Environment object for Overcooked.""" 9 | 10 | metadata = {'render.modes': ['human'], 'name': "cooking_zoo"} 11 | 12 | def __init__(self, level, record, max_steps, recipe, obs_spaces=["numeric"]): 13 | super().__init__() 14 | self.num_agents = 1 15 | self.zoo_env = cooking_zoo.parallel_env(level=level, num_agents=self.num_agents, record=record, 16 | max_steps=max_steps, recipes=[recipe], obs_spaces=obs_spaces) 17 | self.observation_space = self.zoo_env.observation_spaces["player_0"] 18 | self.action_space = self.zoo_env.action_spaces["player_0"] 19 | 20 | def step(self, action): 21 | converted_action = {"player_0": action} 22 | obs, reward, done, info = self.zoo_env.step(converted_action) 23 | return obs["player_0"], reward["player_0"], done["player_0"], info["player_0"] 24 | 25 | def reset(self): 26 | return self.zoo_env.reset()["player_0"] 27 | 28 | def render(self, mode='human'): 29 | pass 30 | 31 | -------------------------------------------------------------------------------- /scripts/pmr-l/sp_mi.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 1 2 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/sp_mi/rendezvous.yaml \ 6 | --env_config_file config/envs/rendezvous.yaml \ 7 | --config '{"discrim_coef": 10, "n_sp_episodes": 400, "n_workers": 16, "save_folder": "results_sweep_rendezvous", "trainer": "simple", "z_dim": '"${pop_size}"'}' \ 8 | --env_config '{"mode": "hard"}' --seed $seed 9 | done 10 | done 11 | 12 | for seed in 111 222 333 13 | do 14 | xvfb-run -a python main.py --config_file config/algs/sp_mi/rendezvous.yaml \ 15 | --env_config_file config/envs/rendezvous.yaml \ 16 | --config '{"discrim_coef": 5, "n_sp_episodes": 400, "n_workers": 16, "save_folder": "results_sweep_rendezvous", "trainer": "simple", "z_dim": 4}' \ 17 | --env_config '{"mode": "hard"}' --seed $seed 18 | done 19 | 20 | for seed in 111 222 333 21 | do 22 | xvfb-run -a python main.py --config_file config/algs/sp_mi/rendezvous.yaml \ 23 | --env_config_file config/envs/rendezvous.yaml \ 24 | --config '{"discrim_coef": 1, "n_sp_episodes": 400, "n_workers": 16, "save_folder": "results_sweep_rendezvous", "trainer": "simple", "z_dim": 8}' \ 25 | --env_config '{"mode": "hard"}' --seed $seed 26 | done 27 | -------------------------------------------------------------------------------- /scripts/cmg-h/maven.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/maven/one_step_matrix.yaml \ 4 | --env_config_file config/envs/one_step_matrix.yaml \ 5 | --config '{"discrim_coef": 5, "n_sp_episodes": 6400, "n_workers": 16, "save_folder": "results_sweep_one_step_matrix_uneven_m32", "trainer": "simple", "z_dim": 8}' --env_config '{}' --seed $seed 6 | done 7 | 8 | for seed in 111 222 333 9 | do 10 | xvfb-run -a python main.py --config_file config/algs/maven/one_step_matrix.yaml \ 11 | --env_config_file config/envs/one_step_matrix.yaml \ 12 | --config '{"discrim_coef": 10, "n_sp_episodes": 6400, "n_workers": 16, "save_folder": "results_sweep_one_step_matrix_uneven_m32", "trainer": "simple", "z_dim": 16}' --env_config '{}' --seed $seed 13 | done 14 | 15 | for pop_size in 32 64 16 | do 17 | for seed in 111 222 333 18 | do 19 | xvfb-run -a python main.py --config_file config/algs/maven/one_step_matrix.yaml \ 20 | --env_config_file config/envs/one_step_matrix.yaml \ 21 | --config '{"discrim_coef": 50, "n_sp_episodes": 6400, "n_workers": 16, "save_folder": "results_sweep_one_step_matrix_uneven_m32", "trainer": "simple", "z_dim": '"${pop_size}"'}' --env_config '{}' --seed $seed 22 | done 23 | done 24 | -------------------------------------------------------------------------------- /scripts/cmg-s/maven.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/maven/one_step_matrix.yaml \ 4 | --env_config_file config/envs/one_step_matrix.yaml \ 5 | --config '{"discrim_coef": 1, "n_sp_episodes": 6400, "n_workers": 16, "save_folder": "results_sweep_one_step_matrix_k_8", "trainer": "simple", "z_dim": 8}' \ 6 | --env_config '{"k": 8}' --seed $seed 7 | done 8 | 9 | for pop_size in 16 32 10 | do 11 | for seed in 111 222 333 12 | do 13 | xvfb-run -a python main.py --config_file config/algs/maven/one_step_matrix.yaml \ 14 | --env_config_file config/envs/one_step_matrix.yaml \ 15 | --config '{"discrim_coef": 5, "n_sp_episodes": 6400, "n_workers": 16, "save_folder": "results_sweep_one_step_matrix_k_8", "trainer": "simple", "z_dim": '"${pop_size}"'}' \ 16 | --env_config '{"k": 8}' --seed $seed 17 | done 18 | done 19 | 20 | for seed in 111 222 333 21 | do 22 | xvfb-run -a python main.py --config_file config/algs/maven/one_step_matrix.yaml \ 23 | --env_config_file config/envs/one_step_matrix.yaml \ 24 | --config '{"discrim_coef": 10, "n_sp_episodes": 6400, "n_workers": 16, "save_folder": "results_sweep_one_step_matrix_k_8", "trainer": "simple", "z_dim": 64}' \ 25 | --env_config '{"k": 8}' --seed $seed 26 | done 27 | -------------------------------------------------------------------------------- /scripts/pmr-l/maven.sh: -------------------------------------------------------------------------------- 1 | for pop_size in 1 8 2 | do 3 | for seed in 111 222 333 4 | do 5 | xvfb-run -a python main.py --config_file config/algs/maven/rendezvous.yaml \ 6 | --env_config_file config/envs/rendezvous.yaml \ 7 | --config '{"discrim_coef": 1, "n_iter": 4000, "n_sp_episodes": 30, "n_workers": 16, "save_folder": "results_sweep_rendezvous", "trainer": "simple", "z_dim": '"${pop_size}"'}' \ 8 | --env_config '{"mode": "hard"}' --seed $seed 9 | done 10 | done 11 | 12 | for seed in 111 222 333 13 | do 14 | xvfb-run -a python main.py --config_file config/algs/maven/rendezvous.yaml \ 15 | --env_config_file config/envs/rendezvous.yaml \ 16 | --config '{"discrim_coef": 0.005, "n_iter": 4000, "n_sp_episodes": 30, "n_workers": 16, "save_folder": "results_sweep_rendezvous", "trainer": "simple", "z_dim": 2}' \ 17 | --env_config '{"mode": "hard"}' --seed $seed 18 | done 19 | 20 | for seed in 111 222 333 21 | do 22 | xvfb-run -a python main.py --config_file config/algs/maven/rendezvous.yaml \ 23 | --env_config_file config/envs/rendezvous.yaml \ 24 | --config '{"discrim_coef": 5, "n_iter": 4000, "n_sp_episodes": 30, "n_workers": 16, "save_folder": "results_sweep_rendezvous", "trainer": "simple", "z_dim": 4}' \ 25 | --env_config '{"mode": "hard"}' --seed $seed 26 | done 27 | -------------------------------------------------------------------------------- /scripts/pmr-l/lipo.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/incompat/rendezvous.yaml \ 4 | --env_config_file config/envs/rendezvous.yaml \ 5 | --config '{"pop_size": 1, "save_folder": "results_sweep_rendezvous", "xp_coef": 0.5}' --env_config '{"mode": "hard"}' --seed $seed 6 | done 7 | 8 | for seed in 111 222 333 9 | do 10 | xvfb-run -a python main.py --config_file config/algs/incompat/rendezvous.yaml \ 11 | --env_config_file config/envs/rendezvous.yaml \ 12 | --config '{"pop_size": 2, "save_folder": "results_sweep_rendezvous", "xp_coef": 0.25}' --env_config '{"mode": "hard"}' --seed $seed 13 | done 14 | 15 | for seed in 111 222 333 16 | do 17 | xvfb-run -a python main.py --config_file config/algs/incompat/rendezvous.yaml \ 18 | --env_config_file config/envs/rendezvous.yaml \ 19 | --config '{"pop_size": 4, "save_folder": "results_sweep_rendezvous", "xp_coef": 0.25}' --env_config '{"mode": "hard"}' --seed $seed 20 | done 21 | 22 | for seed in 111 222 333 23 | do 24 | xvfb-run -a python main.py --config_file config/algs/incompat/rendezvous.yaml \ 25 | --env_config_file config/envs/rendezvous.yaml \ 26 | --config '{"pop_size": 8, "save_folder": "results_sweep_rendezvous", "xp_coef": 0.25}' --env_config '{"mode": "hard"}' --seed $seed 27 | done 28 | -------------------------------------------------------------------------------- /scripts/pmr-l/multi_sp_mi.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/sp_mi/rendezvous.yaml \ 4 | --env_config_file config/envs/rendezvous.yaml \ 5 | --config '{"algo_name": "multi_sp_mi", "discrim_coef": 1, "n_sp_episodes": 400, "n_workers": 16, "pop_size": 1, "save_folder": "results_sweep_rendezvous", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' --env_config '{"mode": "hard"}' --seed $seed 6 | done 7 | 8 | for seed in 111 222 333 9 | do 10 | xvfb-run -a python main.py --config_file config/algs/sp_mi/rendezvous.yaml \ 11 | --env_config_file config/envs/rendezvous.yaml \ 12 | --config '{"algo_name": "multi_sp_mi", "discrim_coef": 5, "n_sp_episodes": 400, "n_workers": 16, "pop_size": 2, "save_folder": "results_sweep_rendezvous", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' --env_config '{"mode": "hard"}' --seed $seed 13 | done 14 | 15 | for seed in 111 222 333 16 | do 17 | xvfb-run -a python main.py --config_file config/algs/sp_mi/rendezvous.yaml \ 18 | --env_config_file config/envs/rendezvous.yaml \ 19 | --config '{"algo_name": "multi_sp_mi", "discrim_coef": 1, "n_sp_episodes": 400, "n_workers": 16, "pop_size": 4, "save_folder": "results_sweep_rendezvous", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' --env_config '{"mode": "hard"}' --seed $seed 20 | done 21 | -------------------------------------------------------------------------------- /config/algs/multi_sp/default.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | def_config: !include config/algs/default.yaml 7 | 8 | render: True 9 | render_mode: 'rgb_array' 10 | num_cpus: 1 11 | use_gpu: False 12 | debug: False 13 | 14 | algo_name: multi_sp 15 | trainer: incompat 16 | runner: EpisodesRunner 17 | agent_name: IncompatMAPPOZ 18 | 19 | use_gpu: False 20 | debug: False 21 | flatten_traj: False 22 | parent_only: False 23 | eval_all_pairs: False 24 | z_dim: 4 25 | z_discrete: True 26 | pop_size: 1 27 | 28 | pg_xp_max_only: True 29 | value_xp_max_only: False 30 | shared_z: False 31 | use_bandit: False 32 | anneal_xp: False 33 | 34 | discrim_coef: 0.0 35 | xp_coef: 0.0 36 | use_hypernet: False 37 | num_xp_pair_sample: 0 38 | 39 | # these two reduce the trajedi trainer to just be a multi-run trainer 40 | use_br: False 41 | diverse_coef: 0.0 42 | 43 | n_iter: 100 44 | pop_size: 2 45 | kernel_gamma: 0.0 46 | flatten_traj: True 47 | 48 | eval_interval: 50 49 | n_sp_episodes: 50 50 | n_xp_episodes: 50 51 | n_sp_ts: 5000 52 | n_xp_ts: 5000 53 | n_eval_ep: 10 54 | hidden_size: 64 55 | num_xp_pair_sample: 0 56 | 57 | gamma: 0.99 58 | lr: 0.0001 59 | vf_coef: 0.5 60 | ent_coef: 0.03 61 | epochs: 5 62 | num_mb: 3 63 | mb_size: 0 64 | gae_lambda: 0.95 65 | clip_param: 0.3 66 | vf_clip_param: 10.0 67 | env_wrappers: [ZWrapper, AgentIDWrapper, StateWrapper] 68 | -------------------------------------------------------------------------------- /scripts/pmr-l/multi_maven.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/maven/rendezvous.yaml \ 4 | --env_config_file config/envs/rendezvous.yaml \ 5 | --config '{"algo_name": "multi_maven", "discrim_coef": 10, "n_iter": 4000, "n_sp_episodes": 30, "n_workers": 16, "pop_size": 1, "save_folder": "results_sweep_rendezvous", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 6 | --env_config '{"mode": "hard"}' --seed $seed 7 | done 8 | 9 | for seed in 111 222 333 10 | do 11 | xvfb-run -a python main.py --config_file config/algs/maven/rendezvous.yaml \ 12 | --env_config_file config/envs/rendezvous.yaml \ 13 | --config '{"algo_name": "multi_maven", "discrim_coef": 10, "n_iter": 4000, "n_sp_episodes": 30, "n_workers": 16, "pop_size": 2, "save_folder": "results_sweep_rendezvous", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 14 | --env_config '{"mode": "hard"}' --seed $seed 15 | done 16 | 17 | for seed in 111 222 333 18 | do 19 | xvfb-run -a python main.py --config_file config/algs/maven/rendezvous.yaml \ 20 | --env_config_file config/envs/rendezvous.yaml \ 21 | --config '{"algo_name": "multi_maven", "discrim_coef": 5, "n_iter": 4000, "n_sp_episodes": 30, "n_workers": 16, "pop_size": 4, "save_folder": "results_sweep_rendezvous", "trainer": "incompat", "vary_z_eval": 1, "z_dim": 8}' \ 22 | --env_config '{"mode": "hard"}' --seed $seed 23 | done 24 | -------------------------------------------------------------------------------- /config/algs/incompat/default.yaml: -------------------------------------------------------------------------------- 1 | # Format goes like this 2 | # def_config: 3 | # k: v 4 | # config: 5 | # k: v 6 | def_config: !include config/algs/default.yaml 7 | 8 | algo_name: incompat # used for creating a save directory 9 | trainer: incompat 10 | runner: EpisodesRunner 11 | agent_name: IncompatMAPPOZ 12 | render: True 13 | render_only_sp: True 14 | render_mode: 'rgb_array' 15 | eval_interval: 50 16 | n_eval_ep: 10 17 | num_cpus: 1 18 | use_gpu: False 19 | debug: False 20 | flatten_traj: False 21 | training_device: cuda 22 | 23 | parent_only: False 24 | eval_all_pairs: True 25 | n_iter: 400 26 | num_xp_pair_sample: 1000 # sample all pairs by default 27 | eval_interval: 50 28 | n_sp_episodes: 50 29 | n_xp_episodes: 50 30 | n_sp_ts: 5000 31 | n_xp_ts: 5000 32 | n_eval_ep: 10 33 | z_dim: 4 34 | z_discrete: True 35 | pop_size: 2 36 | flatten_traj: False 37 | 38 | lr: 0.0001 39 | use_hypernet: False 40 | pg_xp_max_only: True 41 | value_xp_max_only: False 42 | xp_coef: 1.0 43 | discrim_coef: 0.1 44 | gamma: 0.99 45 | gae_lambda: 0.95 46 | ent_coef: 0.03 47 | clip_param: 0.3 48 | vf_clip_param: 10 49 | vf_coef: 1.0 # 0.5 50 | num_mb: 5 51 | mb_size: 0 52 | epochs: 3 53 | env_wrappers: [ZWrapper, AgentIDWrapper, StateWrapper] 54 | shared_z: False 55 | use_bandit: False 56 | bandit_eps: 0.1 57 | bandit_window_size: 3 58 | uniform_selector_keep_last: False 59 | 60 | anneal_xp: False 61 | xp_coef_stop: 0.1 62 | xp_coef_start: 0.0 63 | n_anneal_iter: 1000 64 | -------------------------------------------------------------------------------- /scripts/pmr-c/trajedi.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/trajedi/rendezvous.yaml \ 4 | --env_config_file config/envs/rendezvous.yaml \ 5 | --config '{"diverse_coef": 10, "kernel_gamma": 0.1, "pop_size": 1, "save_folder": "results_sweep_rendezvous"}' \ 6 | --env_config '{"mode": "easy"}' --seed $seed 7 | done 8 | 9 | for seed in 111 222 333 10 | do 11 | xvfb-run -a python main.py --config_file config/algs/trajedi/rendezvous.yaml \ 12 | --env_config_file config/envs/rendezvous.yaml \ 13 | --config '{"diverse_coef": 50, "kernel_gamma": 0.1, "pop_size": 2, "save_folder": "results_sweep_rendezvous"}' \ 14 | --env_config '{"mode": "easy"}' --seed $seed 15 | done 16 | 17 | for seed in 111 222 333 18 | do 19 | xvfb-run -a python main.py --config_file config/algs/trajedi/rendezvous.yaml \ 20 | --env_config_file config/envs/rendezvous.yaml \ 21 | --config '{"diverse_coef": 1, "kernel_gamma": 0, "pop_size": 4, "save_folder": "results_sweep_rendezvous"}' \ 22 | --env_config '{"mode": "easy"}' --seed $seed 23 | done 24 | 25 | for seed in 111 222 333 26 | do 27 | xvfb-run -a python main.py --config_file config/algs/trajedi/rendezvous.yaml \ 28 | --env_config_file config/envs/rendezvous.yaml \ 29 | --config '{"diverse_coef": 10, "kernel_gamma": 0.5, "pop_size": 8, "save_folder": "results_sweep_rendezvous"}' \ 30 | --env_config '{"mode": "easy"}' --seed $seed 31 | done 32 | -------------------------------------------------------------------------------- /scripts/pmr-l/trajedi.sh: -------------------------------------------------------------------------------- 1 | for seed in 111 222 333 2 | do 3 | xvfb-run -a python main.py --config_file config/algs/trajedi/rendezvous.yaml \ 4 | --env_config_file config/envs/rendezvous.yaml \ 5 | --config '{"diverse_coef": 50, "kernel_gamma": 0.1, "pop_size": 1, "save_folder": "results_sweep_rendezvous"}' \ 6 | --env_config '{"mode": "hard"}' --seed $seed 7 | done 8 | 9 | for seed in 111 222 333 10 | do 11 | xvfb-run -a python main.py --config_file config/algs/trajedi/rendezvous.yaml \ 12 | --env_config_file config/envs/rendezvous.yaml \ 13 | --config '{"diverse_coef": 50, "kernel_gamma": 0, "pop_size": 2, "save_folder": "results_sweep_rendezvous"}' \ 14 | --env_config '{"mode": "hard"}' --seed $seed 15 | done 16 | 17 | for seed in 111 222 333 18 | do 19 | xvfb-run -a python main.py --config_file config/algs/trajedi/rendezvous.yaml \ 20 | --env_config_file config/envs/rendezvous.yaml \ 21 | --config '{"diverse_coef": 5, "kernel_gamma": 0.1, "pop_size": 4, "save_folder": "results_sweep_rendezvous"}' \ 22 | --env_config '{"mode": "hard"}' --seed $seed 23 | done 24 | 25 | for seed in 111 222 333 26 | do 27 | xvfb-run -a python main.py --config_file config/algs/trajedi/rendezvous.yaml \ 28 | --env_config_file config/envs/rendezvous.yaml \ 29 | --config '{"diverse_coef": 5, "kernel_gamma": 0.1, "pop_size": 8, "save_folder": "results_sweep_rendezvous"}' \ 30 | --env_config '{"mode": "hard"}' --seed $seed 31 | done 32 | -------------------------------------------------------------------------------- /coop_marl/utils/rebar/rebar/contextlib.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from functools import wraps 3 | 4 | class MaybeAsyncGeneratorContextManager: 5 | 6 | def __init__(self, func, args, kwargs): 7 | self._func = func 8 | self._args = args 9 | self._kwargs = kwargs 10 | self._sync = None 11 | self._async = None 12 | 13 | def __enter__(self): 14 | if self._sync is None: 15 | syncfunc = contextmanager(self._func) 16 | self._sync = syncfunc(*self._args, **self._kwargs) 17 | return self._sync.__enter__() 18 | 19 | def __exit__(self, t, v, tb): 20 | return self._sync.__exit__(t, v, tb) 21 | 22 | def __aenter__(self): 23 | if self._async is None: 24 | # Hide this 3.8 import; most users will never hit it 25 | from contextlib import asynccontextmanager 26 | 27 | @asynccontextmanager 28 | async def asyncfunc(*args, **kwargs): 29 | with contextmanager(self._func)(*args, **kwargs): 30 | yield 31 | self._async = asyncfunc(*self._args, **self._kwargs) 32 | return self._async.__aenter__() 33 | 34 | def __aexit__(self, t, v, tb): 35 | return self._async.__aexit__(t, v, tb) 36 | 37 | def maybeasynccontextmanager(func): 38 | @wraps(func) 39 | def helper(*args, **kwds): 40 | return MaybeAsyncGeneratorContextManager(func, args, kwds) 41 | return helper -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/utils/new_style_level/full_divider_salad.json: -------------------------------------------------------------------------------- 1 | { 2 | "LEVEL_LAYOUT": "-------\n- - -\n- - -\n- - -\n- - -\n- - -\n-------", 3 | "STATIC_OBJECTS": [{"CutBoard": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [3]}}, 4 | {"DeliverSquare": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [3]}} 5 | ], 6 | "DYNAMIC_OBJECTS": [ 7 | {"Plate": {"COUNT": 1, "X_POSITION": [3], "Y_POSITION": [1,2,3,4,5]}}, 8 | {"Lettuce": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [1,2,3,4,5]}}, 9 | {"Lettuce": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [1,2,3,4,5]}}, 10 | {"Tomato": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [1,2,3,4,5]}}, 11 | {"Tomato": {"COUNT": 1, "X_POSITION": [1,2], "Y_POSITION": [0]}}, 12 | {"Onion": {"COUNT": 1, "X_POSITION": [4,5], "Y_POSITION": [0]}}, 13 | {"Onion": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [1,2,3,4,5]}}, 14 | {"Carrot": {"COUNT": 1, "X_POSITION": [0], "Y_POSITION": [1,2,3,4,5]}}, 15 | {"Carrot": {"COUNT": 1, "X_POSITION": [6], "Y_POSITION": [1,2,3,4,5]}} 16 | ], 17 | "AGENTS": [{"MAX_COUNT": 1, "X_POSITION": [1 ,2], "Y_POSITION": [1 ,2, 3, 4, 5]}, 18 | {"MAX_COUNT": 1, "X_POSITION": [4 ,5], "Y_POSITION": [1 ,2, 3, 4, 5]} 19 | ] 20 | } -------------------------------------------------------------------------------- /coop_marl/utils/rebar/rebar/widgets.py: -------------------------------------------------------------------------------- 1 | import ipywidgets as widgets 2 | from IPython.display import display, clear_output 3 | import threading 4 | 5 | WRITE_LOCK = threading.RLock() 6 | 7 | class Output: 8 | 9 | def __init__(self, compositor, output, lines): 10 | self._compositor = compositor 11 | self._output = output 12 | self.lines = lines 13 | 14 | def refresh(self, content): 15 | # This is not thread-safe, but the recommended way to do 16 | # thread-safeness - to use append_stdout - causes flickering 17 | with WRITE_LOCK, self._output: 18 | clear_output(wait=True) 19 | print(content) 20 | 21 | def close(self): 22 | self._compositor.remove(self._output) 23 | 24 | class Compositor: 25 | 26 | def __init__(self, lines=80): 27 | self.lines = lines 28 | self._box = widgets.HBox( 29 | layout=widgets.Layout(align_items='stretch')) 30 | display(self._box) 31 | 32 | def output(self): 33 | output = widgets.Output( 34 | layout=widgets.Layout(width='100%')) 35 | self._box.children = (*self._box.children, output) 36 | 37 | return Output(self, output, self.lines) 38 | 39 | def remove(self, child): 40 | child.close() 41 | self._box.children = tuple(c for c in self._box.children if c != child) 42 | 43 | def clear(self): 44 | for child in self._box.children: 45 | self.remove(child) 46 | 47 | 48 | def test(): 49 | compositor = Compositor() 50 | first = compositor.output() 51 | second = compositor.output() 52 | 53 | first.refresh('left') 54 | second.refresh('right') -------------------------------------------------------------------------------- /coop_marl/envs/overcooked/gym_cooking/demo_multiplayer_gameplay.py: -------------------------------------------------------------------------------- 1 | from gym_cooking.environment.game.game import Game 2 | 3 | from gym_cooking.environment import cooking_zoo 4 | 5 | n_agents = 2 6 | num_humans = 1 7 | max_steps = 100 8 | render = False 9 | 10 | level = 'full_divider_salad_4' # 'open_room_salad_easy' 11 | seed = 3 12 | record = False 13 | max_num_timesteps = 1000 14 | recipes = [ 15 | "LettuceSalad", 16 | "TomatoSalad", 17 | "ChoppedCarrot", 18 | "ChoppedOnion", 19 | "TomatoLettuceSalad", 20 | "TomatoCarrotSalad" 21 | ] 22 | 23 | parallel_env = cooking_zoo.parallel_env(level=level, num_agents=n_agents, record=record, 24 | max_steps=max_num_timesteps, recipes=recipes, obs_spaces=["dense"], 25 | interact_reward=0.5, progress_reward=1.0, complete_reward=10.0, 26 | step_cost=0.05) 27 | 28 | action_spaces = parallel_env.action_spaces 29 | 30 | 31 | class CookingAgent: 32 | 33 | def __init__(self, action_space): 34 | self.action_space = action_space 35 | 36 | def get_action(self, observation) -> int: 37 | return self.action_space.sample() 38 | 39 | player_2_action_space = action_spaces["player_1"] 40 | cooking_agent = CookingAgent(player_2_action_space) 41 | game = Game(parallel_env, num_humans, [cooking_agent], max_steps, render=False) 42 | store = game.on_execute() 43 | 44 | # game = Game(parallel_env, num_humans, [], max_steps, render=False) 45 | # store = game.on_execute() 46 | 47 | # game = Game(parallel_env, 0, [cooking_agent,cooking_agent], max_steps) 48 | # store = game.on_execute_ai_only_with_delay() 49 | 50 | print("done") 51 | -------------------------------------------------------------------------------- /coop_marl/utils/metrics.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | from coop_marl.utils import Dotdict, chop_into_episodes # , get_logger 4 | 5 | def get_avg_metrics(metrics): 6 | # assume a list of dotdicts of structure {player_name: dotdict} 7 | # each corresponds to one agent 8 | temp = [defaultdict(list) for _ in range(len(metrics))] # list of dicts of lists 9 | for i, m in enumerate(metrics): 10 | for p in m: 11 | for k,v in m[p].items(): 12 | if isinstance(v, list): 13 | temp[i][k].extend(v) 14 | else: 15 | temp[i][k].append(v) 16 | 17 | out = defaultdict(list) 18 | for t in temp: 19 | for k in t: 20 | out[k].append(sum(t[k])/len(t[k])) # mean 21 | 22 | return Dotdict(out) 23 | 24 | def get_info(episodes): 25 | out = Dotdict() 26 | ret = defaultdict(int) 27 | n_ep = defaultdict(int) 28 | n_ts = defaultdict(int) 29 | players = list(episodes[0].inp.data.keys()) 30 | 31 | for ep in episodes: 32 | for p in players: 33 | dones = getattr(ep.outcome.done, p) 34 | if dones[-1]: 35 | rews = ep.outcome.reward[p] 36 | if 'reward_unnorm' in ep.outcome[p]: 37 | rews = ep.outcome[p].reward_unnorm 38 | ret[p] += sum(rews) 39 | n_ts[p] += rews.shape[0] 40 | n_ep[p] += 1 41 | # overcooked log complete dishes 42 | 43 | for p in players: 44 | out[p] = Dotdict() 45 | out[p]['avg_ret'] = ret[p]/n_ep[p] 46 | out[p]['avg_rew_per_ts'] = ret[p]/n_ts[p] 47 | out[p]['avg_ep_len'] = n_ts[p]/n_ep[p] 48 | return out 49 | 50 | def get_traj_info(traj): 51 | episodes = chop_into_episodes(traj) 52 | infos = get_info(episodes) 53 | return infos -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | def wait(): 2 | from coop_marl.utils import input_with_timeout 3 | try: 4 | t = 30 5 | input_with_timeout(f'Press Enter (or wait {t} seconds) to continue...', timeout=t) 6 | except Exception: 7 | print('Input timed out, executing the next command.') 8 | 9 | def main(): 10 | 11 | from coop_marl.utils import pblock, parse_args, create_parser 12 | args, conf, env_conf, trainer = parse_args(create_parser()) 13 | import sys 14 | from coop_marl.utils import get_logger, set_random_seed 15 | logger = get_logger() 16 | logger.info(pblock(' '.join(sys.argv), 'Argv...')) 17 | logger.info(pblock(args, 'CLI arguments...')) 18 | logger.info(pblock(conf, 'Training config...')) 19 | logger.info(pblock(env_conf, 'Environment config...')) 20 | # wait() 21 | set_random_seed(args.seed) 22 | # wandb.init(project=env_name, name=run_name, dir=conf['save_dir'], mode='offline', resume=True) 23 | # import wandb 24 | # wandb.init(project=..., 25 | # name= 26 | # pytorch=True) 27 | 28 | from tqdm import tqdm 29 | from coop_marl.trainers import registered_trainers 30 | trainer = registered_trainers[trainer](conf, env_conf) 31 | start_iter = trainer.iter 32 | save_interval = conf.save_interval if conf.save_interval else conf.eval_interval 33 | for i in tqdm(range(start_iter,conf.n_iter)): 34 | _ = trainer.train() # collect data and update the agents 35 | if ((i+1) % conf.eval_interval==0) or ((i+1)==conf.n_iter) or (i==0): 36 | _ = trainer.evaluate() 37 | if ((i+1) % save_interval==0) or ((i+1)==conf.n_iter) or (i==0): 38 | trainer.save() 39 | try: 40 | import ray 41 | ray.shutdown() 42 | logger.info(f'Ray is shutdown...') 43 | except Exception as e: 44 | logger.error(e) 45 | # wandb.finish() 46 | logger.close() 47 | if conf.render: 48 | import subprocess 49 | subprocess.run([f'python gif_view.py --path {conf["save_dir"]}'], shell=True) 50 | 51 | if __name__=='__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /coop_marl/utils/rebar/rebar/stats/gpu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pandas as pd 3 | from io import BytesIO 4 | from subprocess import check_output 5 | from . import writing 6 | import time 7 | 8 | 9 | def memory(device=0): 10 | total_mem = torch.cuda.get_device_properties(f'cuda:{device}').total_memory 11 | writing.max(f'gpu-memory/cache/{device}', torch.cuda.max_memory_cached(device)/total_mem) 12 | torch.cuda.reset_max_memory_cached() 13 | writing.max(f'gpu-memory/alloc/{device}', torch.cuda.max_memory_allocated(device)/total_mem) 14 | torch.cuda.reset_max_memory_allocated() 15 | torch.cuda.reset_max_memory_cached() 16 | 17 | def dataframe(): 18 | """Use `nvidia-smi --help-query-gpu` to get a list of query params""" 19 | params = { 20 | 'device': 'index', 21 | 'compute': 'utilization.gpu', 'access': 'utilization.memory', 22 | 'memused': 'memory.used', 'memtotal': 'memory.total', 23 | 'fan': 'fan.speed', 'power': 'power.draw', 'temp': 'temperature.gpu'} 24 | command = f"""nvidia-smi --format=csv,nounits,noheader --query-gpu={','.join(params.values())}""" 25 | df = pd.read_csv(BytesIO(check_output(command, shell=True)), header=None) 26 | df.columns = list(params.keys()) 27 | df = df.set_index('device') 28 | df = df.apply(pd.to_numeric, errors='coerce') 29 | return df 30 | 31 | _last = -1 32 | def vitals(device=None, throttle=0): 33 | # This is a fairly expensive op, so let's avoid doing it too often 34 | global _last 35 | if time.time() - _last < throttle: 36 | return 37 | _last = time.time() 38 | 39 | df = dataframe() 40 | if device is None: 41 | pass 42 | elif isinstance(device, int): 43 | df = df.loc[[device]] 44 | else: 45 | df = df.loc[device] 46 | 47 | fields = ['compute', 'access', 'fan', 'power', 'temp'] 48 | for (device, field), value in df[fields].stack().iteritems(): 49 | writing.mean(f'gpu/{field}/{device}', value) 50 | 51 | for device in df.index: 52 | writing.mean(f'gpu/memory/{device}', 100*df.loc[device, 'memused']/df.loc[device, 'memtotal']) -------------------------------------------------------------------------------- /coop_marl/utils/rebar/rebar/recurrence.py: -------------------------------------------------------------------------------- 1 | from . import arrdict 2 | from torch import nn 3 | from contextlib import contextmanager 4 | 5 | class State: 6 | 7 | def __init__(self): 8 | super().__init__() 9 | 10 | self._value = None 11 | 12 | def get(self, factory=None): 13 | if self._value is None and factory is not None: 14 | self._value = factory() 15 | return self._value 16 | 17 | def set(self, value): 18 | self._value = value 19 | 20 | def clear(self): 21 | self._value = None 22 | 23 | def __repr__(self): 24 | return f'State({self._value})' 25 | 26 | def __str__(self): 27 | return repr(self) 28 | 29 | def states(net): 30 | substates = {k: states(v) for k, v in net.named_children()} 31 | ownstates = {k: getattr(net, k) for k in dir(net) if isinstance(getattr(net, k), State)} 32 | return arrdict.arrdict({k: v for k, v in {**ownstates, **substates}.items() if v}) 33 | 34 | def _nonnull(x): 35 | y = type(x)() 36 | for k, v in x.items(): 37 | if isinstance(v, dict): 38 | subtree = _nonnull(v) 39 | if subtree: 40 | y[k] = subtree 41 | elif v is not None: 42 | y[k] = v 43 | return y 44 | 45 | def get(net): 46 | return _nonnull(states(net).map(lambda s: s.get())) 47 | 48 | def set(net, state): 49 | state.starmap(lambda r, n: n.set(r), states(net)) 50 | 51 | def clear(net): 52 | states(net).map(lambda s: s.clear()) 53 | 54 | @contextmanager 55 | def temp_clear(net): 56 | original = get(net) 57 | clear(net) 58 | try: 59 | yield 60 | finally: 61 | set(net, original) 62 | 63 | @contextmanager 64 | def temp_set(net, state): 65 | original = get(net) 66 | set(net, state) 67 | try: 68 | yield 69 | finally: 70 | set(net, original) 71 | 72 | @contextmanager 73 | def temp_clear_set(net, state): 74 | with temp_clear(net), temp_set(net, state): 75 | yield net 76 | 77 | class Sequential(nn.Sequential): 78 | 79 | def forward(self, input, **kwargs): 80 | for module in self: 81 | input = module(input, **kwargs) 82 | return input -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Generating Diverse Cooperative Agents by Learning Incompatible Policies 2 | 3 | [Paper](https://openreview.net/forum?id=UkU05GOH7_6) | [Project page](https://sites.google.com/view/iclr-lipo-2023) 4 | 5 | ### Installation 6 | 7 | If you don't have conda, install conda first. 8 | ``` 9 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 10 | bash Miniconda3-latest-Linux-x86_64.sh 11 | ``` 12 | 13 | Create a new environment and install cuda and torch. 14 | ``` 15 | conda create -n coop_marl python=3.8 16 | conda activate coop_marl 17 | ``` 18 | 19 | The installer will use CUDA 11.1, so make sure that your current Nvidia driver supports that. 20 | You can install Nvidia driver using `sudo apt install nvidia-driver-515`. You can change the number to install a different version. 21 | 22 | If torch can't see your GPU, add this to your `~/.bashrc`. 23 | ``` 24 | export LD_LIBRARY_PATH=$CONDA_PREFIX/lib/:$LD_LIBRARY_PATH 25 | ``` 26 | 27 | Installing dependencies. 28 | ``` 29 | ./install.sh 30 | ``` 31 | 32 | ### Training 33 | 34 | Examples of training command are in `scripts/`. The commands are based on the best searched hyperparameters of the corresponding algorithm and environment. If you want to run the scripts, make sure that you are currently at the root of the project. 35 | 36 | For generalist agents, you can use the following commands: 37 | ``` 38 | xvfb-run -a python main.py --config_file config/algs/meta/overcooked.yaml \ 39 | --env_config_file config/envs/overcooked.yaml --config {"partner_dir": ["..."], "render": 0} 40 | ``` 41 | where `partner_dir` is the path to the training partners e.g., `training_partners_8/overcooked_full_divider_salad_4/trajedi/20220919-233301`. 42 | Generalist agents can only be trained after you obtained the training partners. 43 | 44 | ### BibTeX 45 | ``` 46 | @inproceedings{charakorn2023generating, 47 | title={Generating Diverse Cooperative Agents by Learning Incompatible Policies}, 48 | author={Rujikorn Charakorn and Poramate Manoonpong and Nat Dilokthanakul}, 49 | booktitle={The Eleventh International Conference on Learning Representations }, 50 | year={2023}, 51 | url={https://openreview.net/forum?id=UkU05GOH7_6} 52 | } 53 | ``` 54 | -------------------------------------------------------------------------------- /coop_marl/envs/gym_maker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | from copy import copy 4 | 5 | from coop_marl.utils import Arrdict, Dotdict 6 | from coop_marl.envs.wrappers import SARDConsistencyChecker 7 | 8 | class GymMaker: 9 | def __init__(self, env_name): 10 | self._env = gym.make(env_name) 11 | self.players = ['player_0'] 12 | self.action_spaces = Dotdict({self.players[0]:copy(self._env.action_space)}) 13 | self.observation_spaces = Dotdict({self.players[0]:Dotdict(obs=copy(self._env.observation_space))}) 14 | self.total_steps = 0 15 | 16 | def get_action_space(self): 17 | return self._env.action_space 18 | 19 | def get_observation_space(self): 20 | return Dotdict(obs=self._env.observation_space) 21 | 22 | def reset(self): 23 | obs = self._env.reset() 24 | data = Arrdict() 25 | data[self.players[0]] = Arrdict(obs=obs.astype(np.float32), reward=np.float32(0), done=False) 26 | return data 27 | 28 | def step(self,decision): 29 | self.total_steps += 1 30 | action = decision[self.players[0]]['action'] 31 | obs, reward, done, info = self._env.step(action) 32 | data = Arrdict() 33 | data[self.players[0]] = Arrdict(obs=obs.astype(np.float32), reward=reward.astype(np.float32), done=done) 34 | return data, Dotdict(info) 35 | 36 | def render(self, mode): 37 | return self._env.render(mode) 38 | 39 | @staticmethod 40 | def make_env(*args,**kwargs): 41 | env = GymMaker(*args,**kwargs) 42 | env = SARDConsistencyChecker(env) 43 | return env 44 | 45 | if __name__ == '__main__': 46 | from coop_marl.controllers import RandomController 47 | from coop_marl.runners import StepsRunner 48 | import argparse 49 | 50 | parser = argparse.ArgumentParser(description='DQN agent') 51 | # Common arguments 52 | parser.add_argument('--env_name', type=str, default='CartPole-v1', 53 | help='name of the env') 54 | args = parser.parse_args() 55 | 56 | env = GymMaker(args.env_name) 57 | action_spaces = env.action_spaces 58 | controller = RandomController(action_spaces) 59 | runner = StepsRunner(env, controller) 60 | for i in range(20): 61 | traj, *_ = runner.rollout(1) 62 | print(traj.data.player_1) 63 | -------------------------------------------------------------------------------- /coop_marl/utils/rebar/rebar/stats/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | from contextlib import contextmanager 4 | from functools import partial 5 | from torch import nn 6 | import logging 7 | import pandas as pd 8 | from .. import paths 9 | 10 | log = logging.getLogger(__name__) 11 | 12 | # For re-export 13 | from .writing import * 14 | from .writing import to_dir, mean 15 | from .reading import from_dir, Reader 16 | from . import gpu 17 | 18 | @contextmanager 19 | def via_dir(run_name, *args, **kwargs): 20 | with to_dir(run_name), from_dir(run_name, *args, **kwargs): 21 | yield 22 | 23 | def normhook(name, t): 24 | 25 | def hook(grad): 26 | mean(name, grad.pow(2).sum().pow(.5)) 27 | 28 | t.register_hook(hook) 29 | 30 | def total_gradient_norm(params): 31 | if isinstance(params, nn.Module): 32 | return total_gradient_norm(params.parameters()) 33 | norms = [p.grad.data.float().pow(2).sum() for p in params if p.grad is not None] 34 | return torch.sum(torch.tensor(norms)).pow(.5) 35 | 36 | def total_norm(params): 37 | if isinstance(params, nn.Module): 38 | return total_norm(params.parameters()) 39 | return sum([p.data.float().pow(2).sum() for p in params if p is not None]).pow(.5) 40 | 41 | def rel_gradient_norm(name, agent): 42 | mean(name, total_gradient_norm(agent), total_norm(agent)) 43 | 44 | def funcduty(name): 45 | def factory(f): 46 | def g(self, *args, **kwargs): 47 | start = time.time() 48 | result = f(self, *args, **kwargs) 49 | record('duty', f'duty/{name}', time.time() - start) 50 | return result 51 | return g 52 | return factory 53 | 54 | def compare(run_names=[-1], prefix='', rule='60s'): 55 | return pd.concat({paths.resolve(run): Reader(run, prefix).resample(rule) for run in run_names}, 1) 56 | 57 | ## TESTS 58 | 59 | def test_from_dir(): 60 | from .. import paths, widgets, logging 61 | paths.clear('test-run', 'stats') 62 | paths.clear('test-run', 'logs') 63 | 64 | compositor = widgets.Compositor() 65 | with logging.from_dir('test-run', compositor), \ 66 | to_dir('test-run'), \ 67 | from_dir('test-run', compositor): 68 | for i in range(10): 69 | mean('count', i) 70 | mean('twocount', 2*i) 71 | time.sleep(.25) 72 | -------------------------------------------------------------------------------- /gif_view.py: -------------------------------------------------------------------------------- 1 | from glob import glob 2 | 3 | def get_it(path): 4 | return int(path.split('/')[-3][3:]) 5 | 6 | def get_pair(path): 7 | home,away = path.split('.')[0].split('/')[-1].split('-') 8 | return (int(home),int(away)) 9 | 10 | def sort_by_it(gifs): 11 | dec = [(-get_it(g),get_pair(g),g) for g in gifs] 12 | dec.sort() 13 | out = [g for it,pair,g in dec] 14 | return out 15 | 16 | def generate(path=None, folder_name=None, env_name=None, algo_name=None, run_name=None): 17 | if path is None: 18 | # get the latest run 19 | # the path is env_name/algo_name/path 20 | path = sorted(glob(f'{folder_name}/{env_name}/{algo_name}/{run_name}'), key=os.path.getmtime)[-1] 21 | 22 | with open(f'{path}/gif_view.html', 'w') as f: 23 | cur_it = -1 24 | cur_player = -1 25 | f.write(f'