├── .gitignore ├── MAML ├── DockerFile.subm ├── README.md ├── all.csv ├── config.yaml ├── config_debug.yaml ├── config_test.yaml ├── config_test_local.yaml ├── config_train.yaml ├── make_image.sh ├── meta_learner.py ├── model.py ├── requirements.txt ├── setup.sh ├── sonic_utils.py ├── start_workers.sh ├── test.sh ├── train.py ├── train_debug.csv ├── train_large.csv ├── train_small.csv ├── utils.py ├── validation.csv └── worker.py ├── README.md ├── actor_critic ├── actor_critic_test.py ├── actor_critic_train.py ├── envs.py └── weights │ └── actor_critic_sonic1.pt ├── baseline ├── README.md ├── all.csv ├── config.yaml ├── config_test.yaml ├── config_train.yaml ├── jerk.docker ├── jerk_agent.py ├── make_image.sh ├── ppo2.docker ├── ppo2.subm.docker ├── ppo2_agent.py ├── rainbow.docker ├── rainbow_agent.py ├── requirements.txt ├── roms │ └── .gitkeep ├── setup.sh ├── simple-agent.docker ├── simple-agent.py ├── sonic_util.py ├── test.sh ├── train.sh ├── train_large.csv ├── train_nodocker.sh ├── train_small.csv ├── utils.py └── validation.csv └── deepneuroevolution ├── README.md ├── configurations ├── frostbite_es.json ├── frostbite_ga.json ├── frostbite_nses.json ├── frostbite_nsres.json ├── humanoid.json ├── humanoid_nses.json ├── humanoid_nsres.json ├── sonic_es.json ├── sonic_ga.json └── sonic_nsres.json ├── es_distributed ├── __init__.py ├── atari_wrappers.py ├── dist.py ├── es.py ├── es_modified.py ├── ga.py ├── ga_modified.py ├── main.py ├── nses.py ├── optimizers.py ├── policies.py ├── rs.py ├── tabular_logger.py └── tf_util.py ├── extra └── humanoid_maze.xml ├── gpu_implementation ├── README.md ├── configurations │ ├── es_atari_config.json │ ├── ga_atari_config.json │ └── rs_atari_config.json ├── es.py ├── ga.py ├── gym_tensorflow │ ├── Makefile │ ├── README.md │ ├── __init__.py │ ├── atari │ │ ├── README.md │ │ ├── __init__.py │ │ ├── tf_atari.cpp │ │ └── tf_atari.py │ ├── maze │ │ ├── __init__.py │ │ ├── hard_maze.txt │ │ ├── hard_maze.txt.npy │ │ ├── maze.h │ │ ├── tf_maze.cpp │ │ └── tf_maze.py │ ├── ops │ │ ├── __init__.py │ │ └── indexedmatmul.cpp │ ├── tf_env.cpp │ ├── tf_env.h │ ├── tf_env.py │ └── wrappers │ │ ├── __init__.py │ │ └── stack_frames.py ├── neuroevolution │ ├── __init__.py │ ├── concurrent_worker.py │ ├── display.py │ ├── distributed_helpers.py │ ├── helper.py │ ├── models │ │ ├── __init__.py │ │ ├── base.py │ │ ├── batchnorm.py │ │ ├── dqn.py │ │ ├── dqn_xavier.py │ │ └── simple.py │ ├── optimizers.py │ └── tf_util.py └── tabular_logger.py ├── redis_config ├── redis_local_mirror.conf └── redis_master.conf ├── requirements.txt ├── scripts ├── dependency.sh ├── ec2ctl ├── launch.py ├── local_env_setup.sh ├── local_run_exp.sh ├── local_run_redis.sh ├── packer.json └── viz.py ├── train_large.csv ├── train_small.csv ├── train_spring_yard.csv └── visual_inspector ├── README.md ├── dimen_red ├── assemble.py ├── disassemble.py └── reduce.py ├── figure_base ├── __init__.py ├── buttons.py ├── cloud_figures.py ├── figure_control.py ├── fitness_figures.py ├── load_data.py ├── mouse_event.py ├── rollout_base.py └── settings.py ├── figure_custom ├── __init__.py ├── cloud_figures_custom.py ├── rollout_custom.py └── rollout_trajectory.py ├── main_atari.py ├── main_mujoco.py ├── process_bc.py └── sample_data └── mujoco └── final_xy_bc └── snapshots ├── snapshot_gen_0090 ├── snapshot_offspring_0090.dat └── snapshot_parent_0090.dat ├── snapshot_gen_0091 ├── snapshot_offspring_0091.dat └── snapshot_parent_0091.dat ├── snapshot_gen_0092 ├── snapshot_offspring_0092.dat └── snapshot_parent_0092.dat ├── snapshot_gen_0093 ├── snapshot_offspring_0093.dat └── snapshot_parent_0093.dat ├── snapshot_gen_0094 ├── snapshot_offspring_0094.dat └── snapshot_parent_0094.dat ├── snapshot_gen_0095 ├── snapshot_offspring_0095.dat └── snapshot_parent_0095.dat ├── snapshot_gen_0096 ├── snapshot_offspring_0096.dat └── snapshot_parent_0096.dat ├── snapshot_gen_0097 ├── snapshot_offspring_0097.dat └── snapshot_parent_0097.dat ├── snapshot_gen_0098 ├── snapshot_offspring_0098.dat └── snapshot_parent_0098.dat └── snapshot_gen_0099 ├── snapshot_offspring_0099.dat └── snapshot_parent_0099.dat /.gitignore: -------------------------------------------------------------------------------- 1 | baseline/roms/Sonic* 2 | *.pkl 3 | */results 4 | baseline/logs 5 | .ipynb_checkpoints 6 | *.ipynb 7 | *.pt 8 | logs/ 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | *.ipynb 15 | *.tar.gz 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | env/ 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | .hypothesis/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # SageMath parsed files 91 | *.sage.py 92 | 93 | # dotenv 94 | .env 95 | 96 | # virtualenv 97 | .venv 98 | venv/ 99 | ENV/ 100 | 101 | # Spyder project settings 102 | .spyderproject 103 | .spyproject 104 | 105 | # Rope project settings 106 | .ropeproject 107 | 108 | # mkdocs documentation 109 | /site 110 | 111 | # mypy 112 | .mypy_cache/ 113 | 114 | .idea 115 | -------------------------------------------------------------------------------- /MAML/DockerFile.subm: -------------------------------------------------------------------------------- 1 | FROM openai/retro-agent:pytorch 2 | 3 | # Needed for OpenCV. 4 | RUN apt-get update && \ 5 | apt-get install -y libgtk2.0-dev && \ 6 | rm -rf /var/lib/apt/lists/* 7 | 8 | COPY requirements.txt /tmp 9 | RUN . ~/venv/bin/activate && \ 10 | pip install -r /tmp/requirements.txt && \ 11 | pip install --no-deps git+https://github.com/fgvbrt/baselines.git@1659068fdeb5fd4859fa598634008a84afe3616e && \ 12 | git clone https://github.com/openai/retro-contest.git && cd retro-contest/support && \ 13 | pip install . 14 | 15 | ADD *.py ./ 16 | ADD *.yaml ./ 17 | ADD test.sh ./ 18 | ADD *.pt ./ 19 | 20 | CMD ["/bin/bash", "test.sh"] 21 | -------------------------------------------------------------------------------- /MAML/README.md: -------------------------------------------------------------------------------- 1 | # Algorithms 2 | 3 | Attempt to implement [MAML](https://arxiv.org/abs/1703.03400) and [REPTILE](https://arxiv.org/abs/1803.02999) 4 | meta learning algorithms. 5 | 6 | # Setup 7 | 8 | Setup environment with script setup.sh (you need conda for it otherwise change it) 9 | 10 | # Run training 11 | 12 | Meta algo uses Pyro4 for distributed training, so you should read something about 13 | it for example [here](https://pythonhosted.org/Pyro4/tutorials.html) 14 | 15 | 16 | Below is basic example how to run it locally 17 | 18 | 1) start parameter server locally 19 | 20 | $ pyro4-ns & 21 | 22 | 2) start workers (for example 6 workers locally) 23 | 24 | $ ./start_workers.sh localhost 9000 6 localhost 25 | 26 | 3) start meta learner 27 | 28 | $ python meta_learner.py --config config.yaml config_train.yaml 29 | -------------------------------------------------------------------------------- /MAML/all.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2 4 | SonicTheHedgehog-Genesis,GreenHillZone.Act3 5 | SonicTheHedgehog-Genesis,GreenHillZone.Act1 6 | SonicTheHedgehog-Genesis,StarLightZone.Act2 7 | SonicTheHedgehog-Genesis,StarLightZone.Act1 8 | SonicTheHedgehog-Genesis,MarbleZone.Act2 9 | SonicTheHedgehog-Genesis,MarbleZone.Act1 10 | SonicTheHedgehog-Genesis,MarbleZone.Act3 11 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2 12 | SonicTheHedgehog-Genesis,LabyrinthZone.Act2 13 | SonicTheHedgehog-Genesis,LabyrinthZone.Act1 14 | SonicTheHedgehog-Genesis,LabyrinthZone.Act3 15 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act1 16 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act2 17 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2 18 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act1 19 | SonicTheHedgehog2-Genesis,MetropolisZone.Act1 20 | SonicTheHedgehog2-Genesis,MetropolisZone.Act2 21 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1 22 | SonicTheHedgehog2-Genesis,OilOceanZone.Act2 23 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act2 24 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act1 25 | SonicTheHedgehog2-Genesis,HillTopZone.Act1 26 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1 27 | SonicTheHedgehog2-Genesis,WingFortressZone 28 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2 29 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act1 30 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2 31 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act2 32 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act1 33 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act1 34 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act2 35 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2 36 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act1 37 | SonicAndKnuckles3-Genesis,DeathEggZone.Act1 38 | SonicAndKnuckles3-Genesis,DeathEggZone.Act2 39 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1 40 | SonicAndKnuckles3-Genesis,SandopolisZone.Act1 41 | SonicAndKnuckles3-Genesis,SandopolisZone.Act2 42 | SonicAndKnuckles3-Genesis,HiddenPalaceZone 43 | SonicAndKnuckles3-Genesis,HydrocityZone.Act2 44 | SonicAndKnuckles3-Genesis,IcecapZone.Act1 45 | SonicAndKnuckles3-Genesis,IcecapZone.Act2 46 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act1 47 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act2 48 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act1 49 | SonicTheHedgehog-Genesis,SpringYardZone.Act1 50 | SonicTheHedgehog-Genesis,GreenHillZone.Act2 51 | SonicTheHedgehog-Genesis,StarLightZone.Act3 52 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act1 53 | SonicTheHedgehog2-Genesis,MetropolisZone.Act3 54 | SonicTheHedgehog2-Genesis,HillTopZone.Act2 55 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act2 56 | SonicAndKnuckles3-Genesis,LavaReefZone.Act1 57 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act2 58 | SonicAndKnuckles3-Genesis,HydrocityZone.Act1 59 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act2 60 | -------------------------------------------------------------------------------- /MAML/config.yaml: -------------------------------------------------------------------------------- 1 | train_params: 2 | max_steps: 100000000 3 | lr: 0.0002 4 | lr_meta: 0.0002 5 | vf_coef: 0.5 6 | ent_coef: 0.01 7 | cliprange: 0.1 8 | gamma: 0.99 9 | lam: 0.95 10 | n_steps: 4500 11 | n_opt_epochs: 3 12 | batch_size: 4500 13 | max_grad_norm: 0.5 14 | n_traj1: 3 15 | n_traj2: 3 16 | meta_algo: "maml" # reptile or maml 17 | weights: "last.pt" 18 | load_adam_params: "all" 19 | ep_info_len: null 20 | 21 | env_params: 22 | exp_type: "x" 23 | exp_const: 0.005 24 | color: False 25 | stack: 2 26 | scale_rew: True 27 | 28 | log: 29 | log: True 30 | log_interval: 1 31 | save_interval: 10 32 | log_dir: logs 33 | save_last: True -------------------------------------------------------------------------------- /MAML/config_debug.yaml: -------------------------------------------------------------------------------- 1 | train_params: 2 | n_steps: 4500 3 | n_opt_epochs: 3 4 | batch_size: 4500 5 | n_traj1: 1 6 | n_traj2: 1 7 | 8 | env_params: 9 | game_states: "train_debug.csv" 10 | max_episode_steps: 4500 -------------------------------------------------------------------------------- /MAML/config_test.yaml: -------------------------------------------------------------------------------- 1 | train_params: 2 | max_steps: 10000000 3 | weights: "last.pt" 4 | ep_info_len: null 5 | 6 | env_params: 7 | socket_dir: "tmp/sock" 8 | 9 | log: 10 | log: True 11 | log_dir: null -------------------------------------------------------------------------------- /MAML/config_test_local.yaml: -------------------------------------------------------------------------------- 1 | train_params: 2 | max_steps: 500000 3 | weights: "last.pt" 4 | 5 | env_params: 6 | game_states: "validation.csv" 7 | max_episode_steps: 4500 8 | 9 | log: 10 | log: False 11 | log_dir: null -------------------------------------------------------------------------------- /MAML/config_train.yaml: -------------------------------------------------------------------------------- 1 | train_params: 2 | ep_info_len: 100 3 | 4 | env_params: 5 | game_states: "train_large.csv" 6 | max_episode_steps: 4500 -------------------------------------------------------------------------------- /MAML/make_image.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DOCKER_REGISTRY="retrocontestrtidfxqehvzsuwpo.azurecr.io" 3 | docker build -f DockerFile.subm -t $DOCKER_REGISTRY/$1 . -------------------------------------------------------------------------------- /MAML/meta_learner.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['OMP_NUM_THREADS'] = '1' 3 | 4 | import Pyro4 5 | import train 6 | import pickle 7 | import sonic_utils 8 | from model import CNNPolicy 9 | import utils 10 | from time import sleep 11 | import torch 12 | from pathlib import Path 13 | torch.set_num_threads(1) 14 | 15 | 16 | def find_workers(prefix): 17 | workers = [] 18 | with Pyro4.locateNS() as ns: 19 | for sampler, sampler_uri in ns.list(prefix="{}.".format(prefix)).items(): 20 | print("found {}".format(prefix), sampler) 21 | workers.append(Pyro4.Proxy(sampler_uri)) 22 | if not workers: 23 | raise ValueError("no {} found!".format(prefix)) 24 | print('found total {} {}s'.format(len(workers), prefix)) 25 | return workers 26 | 27 | 28 | def init_workers(workers, config, weights): 29 | results = [] 30 | print('start workers initialization') 31 | for worker in workers: 32 | res = Pyro4.Future(worker.initialize)(config, pickle.dumps(weights)) 33 | results.append(res) 34 | 35 | while len(results) > 0: 36 | for res in results: 37 | if res.ready: 38 | results.remove(res) 39 | 40 | print('finish workers initialization') 41 | 42 | 43 | def wait_run_end(workers_results, model, timeout=None): 44 | # TODO: use timeout 45 | weights = pickle.dumps(model.get_weights()) 46 | 47 | for w, res in workers_results.items(): 48 | 49 | while not res.ready: 50 | sleep(1) 51 | 52 | res = utils.unpickle(res.value) 53 | grads = res["grads"] 54 | model.add_grads(grads) 55 | 56 | new_res = Pyro4.Future(w.run)(weights) 57 | workers_results[w] = new_res 58 | 59 | 60 | def run_maml(args): 61 | config = utils.load_config(args.config) 62 | train_params = config["train_params"] 63 | 64 | # open and close env just to get right action and obs space 65 | env = sonic_utils.make_from_config(config['env_params'], True) 66 | env.close() 67 | 68 | # init model 69 | model = CNNPolicy( 70 | env.observation_space, env.action_space, train_params["vf_coef"], 71 | train_params["ent_coef"], train_params["lr_meta"], train_params["max_grad_norm"] 72 | 73 | ) 74 | 75 | workers = find_workers("worker") 76 | init_workers(workers, config, model.get_weights()) 77 | 78 | # start run 79 | workers_results = {w: Pyro4.Future(w.run)() for w in workers} 80 | 81 | savedir = utils.prepare_exp_dir(config, args.exp_name) 82 | 83 | updates = 0 84 | while True: 85 | # first zero all grads 86 | model.optimizer.zero_grad() 87 | 88 | # then apply add grads from remote workers 89 | wait_run_end(workers_results, model) 90 | 91 | # apply gradient 92 | model.optimizer.step() 93 | 94 | updates += 1 95 | 96 | # save last weights 97 | if config['log']['save_last']: 98 | fpath = savedir / 'last.pt' 99 | model.save(fpath) 100 | 101 | # save on save period 102 | if updates % config['log']["save_interval"] == 0 or updates == 1: 103 | fpath = savedir / '{}.pt'.format(updates) 104 | model.save(fpath) 105 | 106 | 107 | if __name__ == '__main__': 108 | try: 109 | args = utils.get_args() 110 | run_maml(args) 111 | except: 112 | print("Pyro traceback:") 113 | print("".join(Pyro4.util.getPyroTraceback())) 114 | raise 115 | -------------------------------------------------------------------------------- /MAML/requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | tqdm 3 | joblib 4 | zmq 5 | dill 6 | progressbar2 7 | cloudpickle 8 | opencv-python 9 | pandas 10 | Pyro4 11 | gym-retro 12 | torchvision 13 | pyyaml -------------------------------------------------------------------------------- /MAML/setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | conda create -n retro python=3.5 -y 3 | source activate retro 4 | pip install http://download.pytorch.org/whl/cu91/torch-0.4.0-cp35-cp35m-linux_x86_64.whl 5 | pip install -r requirements.txt 6 | pip install --no-deps git+https://github.com/fgvbrt/baselines.git@1659068fdeb5fd4859fa598634008a84afe3616e 7 | git clone https://github.com/openai/retro-contest.git && cd retro-contest/support && pip install . 8 | 9 | # download roms 10 | wget -qO - https://www.dropbox.com/s/8i0mh0bn2bbe1w5/roms.tar.gz?dl=0 | tar xzv 11 | find ./roms/ -name 'Sonic*' -type d -exec python -m retro.import {} \; -------------------------------------------------------------------------------- /MAML/start_workers.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # usage: 4 | # ./start_samplers.sh host port_start num_samplers ns_host 5 | 6 | host=$1 7 | port_start=$2 8 | num_samplers=$3 9 | ns_host=$4 10 | 11 | for i in `seq 1 $num_samplers`; do 12 | python worker.py --name $HOSTNAME --host $host --port $((port_start+i-1)) --ns_host $ns_host & 13 | done -------------------------------------------------------------------------------- /MAML/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python -u /root/compo/train.py --config config.yaml config_test.yaml -------------------------------------------------------------------------------- /MAML/train.py: -------------------------------------------------------------------------------- 1 | import sonic_utils 2 | import utils 3 | from model import CNNPolicy 4 | import numpy as np 5 | from time import time 6 | from baselines import logger 7 | from collections import deque 8 | import pandas as pd 9 | 10 | 11 | def traj_segment_generator(model, env, horizon, sample): 12 | t = 0 13 | ac = env.action_space.sample() # not used, just so we have the datatype 14 | new = True # marks if we're on first timestep of an episode 15 | ob = env.reset() 16 | 17 | ep_infos = [] 18 | 19 | # Initialize history arrays 20 | obs = np.array([ob for _ in range(horizon)]) 21 | rews = np.zeros(horizon, 'float32') 22 | vpreds = np.zeros(horizon, 'float32') 23 | news = np.zeros(horizon, 'int32') 24 | acs = np.array([ac for _ in range(horizon)]) 25 | ac_logits = np.zeros(horizon, 'float32') 26 | 27 | while True: 28 | ac, ac_logit, vpred = model.step(ob, sample) 29 | # Slight weirdness here because we need value function at time T 30 | # before returning segment [0, T-1] so we get the correct 31 | # terminal value 32 | if t > 0 and t % horizon == 0: 33 | yield { 34 | "ob": obs, "rew": rews, 35 | "vpred": vpreds, "new": news, 36 | "ac": acs, "nextvpred": float(vpred) * (1 - new), 37 | "ac_logits": ac_logits, "ep_infos": ep_infos, 38 | } 39 | # Be careful!!! if you change the downstream algorithm to aggregate 40 | # several of these batches, then be sure to do a deepcopy 41 | ep_infos = [] 42 | 43 | i = t % horizon 44 | obs[i] = ob 45 | vpreds[i] = vpred 46 | news[i] = new 47 | acs[i] = ac 48 | ac_logits[i] = ac_logit 49 | 50 | ob, rew, new, info = env.step(ac) 51 | rews[i] = rew 52 | 53 | if new: 54 | # game_name = env.unwrapped.game_name 55 | # state_name = env.unwrapped.state_name 56 | if "episode" in info: 57 | ep_infos.append(info["episode"]) 58 | 59 | ob = env.reset() 60 | 61 | t += 1 62 | 63 | 64 | def add_vtarg(seg, gamma, lam): 65 | """ 66 | Compute target value using TD(lambda) estimator, and advantage with GAE(lambda) 67 | """ 68 | new = np.append(seg["new"], 0) # last element is only used for last vtarg, but we already zeroed it if last new = 1 69 | vpred = np.append(seg["vpred"], seg["nextvpred"]) 70 | T = len(seg["rew"]) 71 | gaelam = np.empty(T, 'float32') 72 | rew = seg["rew"] 73 | lastgaelam = 0 74 | for t in reversed(range(T)): 75 | nonterminal = 1 - new[t+1] 76 | delta = rew[t] + gamma * vpred[t+1] * nonterminal - vpred[t] 77 | gaelam[t] = lastgaelam = delta + gamma * lam * nonterminal * lastgaelam 78 | seg["tdlamret"] = gaelam + seg["vpred"] 79 | 80 | 81 | def train(config, exp_name='test'): 82 | 83 | train_params = config['train_params'] 84 | env_params = config['env_params'] 85 | log_params = config["log"] 86 | 87 | savedir = None 88 | if log_params["log_dir"] is not None: 89 | savedir = utils.prepare_exp_dir(config, exp_name) 90 | 91 | env = sonic_utils.make_from_config(env_params) 92 | 93 | model = CNNPolicy( 94 | env.observation_space, env.action_space, train_params["vf_coef"], 95 | train_params["ent_coef"], train_params["lr"], train_params["max_grad_norm"] 96 | ) 97 | 98 | if train_params["weights"] is not None: 99 | model.load(train_params["weights"], train_params["load_adam_params"]) 100 | 101 | seg_gen = traj_segment_generator( 102 | model, env, train_params['n_steps'], sample=True) 103 | 104 | total_steps = 0 105 | updates = 0 106 | t0 = time() 107 | epinfobuf = deque(maxlen=train_params["ep_info_len"]) 108 | seg_inds = np.arange(train_params['n_steps']) 109 | n_batches = train_params["n_steps"] // train_params["batch_size"] 110 | loss_vals = [] 111 | while True: 112 | if total_steps > train_params['max_steps']: 113 | break 114 | 115 | # get batch 116 | seg = seg_gen.__next__() 117 | add_vtarg(seg, train_params['gamma'], train_params['lam']) 118 | 119 | # add episode info 120 | epinfobuf.extend(seg['ep_infos']) 121 | 122 | for _ in range(train_params["n_opt_epochs"]): 123 | np.random.shuffle(seg_inds) 124 | for i in range(n_batches): 125 | start = i * train_params["batch_size"] 126 | end = (i + 1) * train_params["batch_size"] 127 | inds = seg_inds[start:end] 128 | 129 | losses = model.train( 130 | train_params['cliprange'], seg['ob'][inds], 131 | seg['tdlamret'][inds], seg['ac'][inds], 132 | seg['vpred'][inds], seg["ac_logits"][inds] 133 | ) 134 | loss_vals.append([l.detach().numpy() for l in losses]) 135 | 136 | total_steps += train_params['n_steps'] 137 | updates += 1 138 | 139 | if log_params["log"] and (updates % log_params["log_interval"] == 0 or updates == 1): 140 | 141 | tnow = time() 142 | fps = int(total_steps / (tnow - t0)) 143 | # ev = explained_variance(values, returns) 144 | logger.logkv("total_steps", total_steps) 145 | logger.logkv("nupdates", updates) 146 | logger.logkv("fps", fps) 147 | logger.logkv('eprewmean', np.mean([epinfo['r'] for epinfo in epinfobuf if 'r' in epinfo])) 148 | logger.logkv('eprewmean_exp', np.mean([epinfo['r_exp'] for epinfo in epinfobuf if 'r_exp' in epinfo])) 149 | logger.logkv('eplenmean', np.mean([epinfo['l'] for epinfo in epinfobuf if 'l' in epinfo])) 150 | logger.logkv('time_elapsed', tnow - t0) 151 | 152 | for loss_val, loss_name in zip(np.mean(loss_vals, axis=0), model.loss_names): 153 | logger.logkv(loss_name, loss_val) 154 | logger.dumpkvs() 155 | 156 | del loss_vals[:] 157 | 158 | # save last weights 159 | if log_params['save_last'] and savedir is not None: 160 | fpath = savedir / 'last.pt' 161 | model.save(fpath) 162 | 163 | # save on save period 164 | if (updates % log_params["save_interval"] == 0 or updates == 1) and savedir is not None: 165 | fpath = savedir / '{}.pt'.format(updates) 166 | model.save(fpath) 167 | 168 | return epinfobuf 169 | 170 | 171 | def test(): 172 | args = utils.get_args() 173 | 174 | config = utils.load_config(args.config) 175 | 176 | if "game_states" in config['env_params']: 177 | game_states = pd.read_csv(config['env_params']["game_states"]).values.tolist() 178 | 179 | all_means = [] 180 | for game, state in game_states: 181 | config['env_params']["game_states"] = [(game, state)] 182 | epinfobuf = train(config, args.exp_name) 183 | 184 | rewards = [epinfo['r'] for epinfo in epinfobuf if 'r' in epinfo] 185 | print("{} {} {:.2f} {:.2f}".format(game, state, np.max(rewards), np.mean(rewards))) 186 | all_means.append(np.mean(rewards)) 187 | 188 | print("final result {:.2f}".format(np.mean(all_means))) 189 | else: 190 | train(config, args.exp_name) 191 | 192 | 193 | if __name__ == '__main__': 194 | test() 195 | -------------------------------------------------------------------------------- /MAML/train_debug.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3 3 | -------------------------------------------------------------------------------- /MAML/train_large.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2 4 | SonicTheHedgehog-Genesis,GreenHillZone.Act3 5 | SonicTheHedgehog-Genesis,GreenHillZone.Act1 6 | SonicTheHedgehog-Genesis,StarLightZone.Act2 7 | SonicTheHedgehog-Genesis,StarLightZone.Act1 8 | SonicTheHedgehog-Genesis,MarbleZone.Act2 9 | SonicTheHedgehog-Genesis,MarbleZone.Act1 10 | SonicTheHedgehog-Genesis,MarbleZone.Act3 11 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2 12 | SonicTheHedgehog-Genesis,LabyrinthZone.Act2 13 | SonicTheHedgehog-Genesis,LabyrinthZone.Act1 14 | SonicTheHedgehog-Genesis,LabyrinthZone.Act3 15 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act1 16 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act2 17 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2 18 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act1 19 | SonicTheHedgehog2-Genesis,MetropolisZone.Act1 20 | SonicTheHedgehog2-Genesis,MetropolisZone.Act2 21 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1 22 | SonicTheHedgehog2-Genesis,OilOceanZone.Act2 23 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act2 24 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act1 25 | SonicTheHedgehog2-Genesis,HillTopZone.Act1 26 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1 27 | SonicTheHedgehog2-Genesis,WingFortressZone 28 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2 29 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act1 30 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2 31 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act2 32 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act1 33 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act1 34 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act2 35 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2 36 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act1 37 | SonicAndKnuckles3-Genesis,DeathEggZone.Act1 38 | SonicAndKnuckles3-Genesis,DeathEggZone.Act2 39 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1 40 | SonicAndKnuckles3-Genesis,SandopolisZone.Act1 41 | SonicAndKnuckles3-Genesis,SandopolisZone.Act2 42 | SonicAndKnuckles3-Genesis,HiddenPalaceZone 43 | SonicAndKnuckles3-Genesis,HydrocityZone.Act2 44 | SonicAndKnuckles3-Genesis,IcecapZone.Act1 45 | SonicAndKnuckles3-Genesis,IcecapZone.Act2 46 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act1 47 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act2 48 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act1 49 | -------------------------------------------------------------------------------- /MAML/train_small.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3 3 | SonicTheHedgehog-Genesis,StarLightZone.Act2 4 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2 5 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2 6 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1 7 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1 8 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2 9 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2 10 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2 11 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1 12 | -------------------------------------------------------------------------------- /MAML/utils.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import pickle 3 | from copy import deepcopy 4 | from datetime import datetime 5 | import argparse 6 | from pathlib import Path 7 | import yaml 8 | 9 | 10 | def load_config(fnames): 11 | 12 | config = {} 13 | for fname in fnames: 14 | with open(fname) as f: 15 | config = merge_dictionaries(config, yaml.load(f)) 16 | 17 | return config 18 | 19 | 20 | def add_boolean_flag(parser, name, default=False, help=None): 21 | """Add a boolean flag to argparse parser. 22 | Parameters 23 | ---------- 24 | parser: argparse.Parser 25 | parser to add the flag to 26 | name: str 27 | -- will enable the flag, while --no- will disable it 28 | default: bool or None 29 | default value of the flag 30 | help: str 31 | help string for the flag 32 | """ 33 | dest = name.replace('-', '_') 34 | parser.add_argument("--" + name, action="store_true", default=default, dest=dest, help=help) 35 | parser.add_argument("--no-" + name, action="store_false", dest=dest) 36 | 37 | 38 | def prepare_exp_dir(config, exp_name): 39 | # directory for logs 40 | logdir = Path(config['log']['log_dir']) / exp_name 41 | logdir.mkdir(parents=True, exist_ok=True) 42 | 43 | with open(str(logdir / 'run_config.yaml'), 'w') as f: 44 | yaml.dump(config, f) 45 | 46 | savedir = logdir / 'weights' 47 | savedir.mkdir(parents=True, exist_ok=True) 48 | 49 | return savedir 50 | 51 | 52 | def get_args(): 53 | parser = argparse.ArgumentParser(description="Run commands") 54 | parser.add_argument( 55 | '--config', type=str, default=None, nargs='+', 56 | help="Yaml files with configs") 57 | parser.add_argument( 58 | '--exp_name', type=str, 59 | default=datetime.now().strftime("%d.%m.%Y-%H:%M"), 60 | help='Experiment name') 61 | return parser.parse_args() 62 | 63 | 64 | def merge_dictionaries(a, b, path_to_root=None, extend_lists=False): 65 | """ 66 | создает копию словаря `a` и рекурсивно апдейтит ее элементы элементами из `b` 67 | :param extend_lists: 68 | if True и в обоих словарях это листы (если в обоих такой элемент есть) то элементы из b добавляются в конец 69 | к элементам из a, если в одном из словарей это не лист, то бросатеся ValueError 70 | if False - значения типа list трактуются как обычные значения - заменяют/перетирают друг друга 71 | """ 72 | res = deepcopy(a) 73 | 74 | if path_to_root is None: 75 | path_to_root = [] 76 | 77 | for key in b: 78 | if key not in res: 79 | res[key] = b[key] 80 | continue 81 | if isinstance(res[key], dict): 82 | if isinstance(b[key], dict): 83 | res[key] = merge_dictionaries(res[key], b[key], path_to_root + [str(key)], extend_lists=extend_lists) 84 | else: 85 | raise TypeError('Conflict at {}'.format('.'.join(path_to_root + [str(key)]))) 86 | elif extend_lists and isinstance(res[key], list): 87 | if isinstance(b[key], list): 88 | res[key].extend(b[key]) 89 | else: 90 | raise ValueError( 91 | "Cannot extend list with not list. Path: {}".format('.'.join(path_to_root + [str(key)]))) 92 | else: 93 | if extend_lists and isinstance(b[key], list): 94 | raise ValueError( 95 | "Cannot extend non list with list. Path: {}".format('.'.join(path_to_root + [str(key)]))) 96 | elif not isinstance(b[key], dict): 97 | res[key] = b[key] 98 | else: 99 | raise TypeError('Conflict at {}'.format('.'.join(path_to_root + [str(key)]))) 100 | return res 101 | 102 | 103 | def conv_out_dim(in_n, k, p, s): 104 | """ 105 | :param in_n: input dim 106 | :param k: kernel size 107 | :param p: padding size 108 | :param s: stride size 109 | :return: output dim 110 | """ 111 | return int((in_n + 2*p - k) / s + 1) 112 | 113 | 114 | def convs_out_dim(in_n, ks, ps, ss): 115 | assert len(ks) == len(ps) == len(ss) 116 | for k, p, s in zip(ks, ps, ss): 117 | in_n = conv_out_dim(in_n, k, p, s) 118 | return in_n 119 | 120 | 121 | def unpickle(data_dict): 122 | assert isinstance(data_dict, dict) and 'data' in data_dict and 'encoding' in data_dict 123 | 124 | data = data_dict["data"] 125 | encoding = data_dict["encoding"] 126 | 127 | if encoding == "base64": 128 | res = pickle.loads(base64.b64decode(data)) 129 | else: 130 | raise ValueError('unsopported encoding {}'.format(encoding)) 131 | 132 | return res 133 | -------------------------------------------------------------------------------- /MAML/validation.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act1 3 | SonicTheHedgehog-Genesis,GreenHillZone.Act2 4 | SonicTheHedgehog-Genesis,StarLightZone.Act3 5 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act1 6 | SonicTheHedgehog2-Genesis,MetropolisZone.Act3 7 | SonicTheHedgehog2-Genesis,HillTopZone.Act2 8 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act2 9 | SonicAndKnuckles3-Genesis,LavaReefZone.Act1 10 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act2 11 | SonicAndKnuckles3-Genesis,HydrocityZone.Act1 12 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act2 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Retro contest solution 2 | This is repository for 4th place soulution for [retro contest](https://blog.openai.com/first-retro-contest-retrospective/). 3 | 4 | 5 | Content: 6 | - baseline - Joint PPO baseline is actual soution, see README.md in this directory for more detail. 7 | - actor_critic - baseline pytorch implementation 8 | - deepneuroevolution - evolution algorithms 9 | - MAML - MAML and reptile algorithms, see README.md for more details 10 | 11 | # Team 12 | - Ivan Sorokin ([github](https://github.com/1ytic)) 13 | - Kolesnikov Sergey ([linkedin](https://linkedin.com/in/scitator), [twitter](https://twitter.com/Scitator), [github](https://github.com/Scitator)) 14 | - Sergeev Ilya ([linkedin](https://www.linkedin.com/in/ilya-sergeev/) [twitter](https://twitter.com/sergeevii123), [github](https://github.com/sergeevii123)) 15 | - Mikhail Pavlov ([twitter](https://twitter.com/MikhailPavlov5), [github](https://github.com/fgvbrt)) -------------------------------------------------------------------------------- /actor_critic/actor_critic_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gym 3 | from gym import wrappers 4 | import numpy as np 5 | from itertools import count 6 | from collections import namedtuple 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import torch.optim as optim 12 | from torch.autograd import Variable 13 | from torch.distributions import Categorical 14 | from envs import make_retro 15 | import pandas as pd 16 | 17 | parser = argparse.ArgumentParser(description='PyTorch actor-critic example') 18 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G', 19 | help='discount factor (default: 0.99)') 20 | parser.add_argument('--seed', type=int, default=42, metavar='N', 21 | help='random seed (default: 1)') 22 | parser.add_argument('--log-interval', type=int, default=1, metavar='N', 23 | help='interval between training status logs (default: 10)') 24 | 25 | args = parser.parse_args() 26 | game_states = pd.read_csv("train_large.csv").values.tolist() 27 | 28 | env = make_retro('SonicTheHedgehog-Genesis', 'LabyrinthZone.Act1', game_states) 29 | env.seed(args.seed) 30 | torch.manual_seed(args.seed) 31 | 32 | 33 | SavedAction = namedtuple('SavedAction', ['log_prob', 'value']) 34 | 35 | class Policy(nn.Module): 36 | def __init__(self, num_inputs, action_space): 37 | super(Policy, self).__init__() 38 | self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1) 39 | self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) 40 | self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1) 41 | self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1) 42 | self.affine1 = nn.Linear(32*6, 256) 43 | self.action_head = nn.Linear(256, action_space) 44 | self.value_head = nn.Linear(256, 1) 45 | 46 | self.saved_actions = [] 47 | self.rewards = [] 48 | 49 | def forward(self, x): 50 | x = F.elu(self.conv1(x)) 51 | x = F.elu(self.conv2(x)) 52 | x = F.elu(self.conv3(x)) 53 | x = F.elu(self.conv4(x)) 54 | x = x.view(-1, 32*6) 55 | x = F.elu(self.affine1(x)) 56 | action_scores = self.action_head(x) 57 | state_values = self.value_head(x) 58 | return F.softmax(action_scores, dim=-1), state_values 59 | 60 | 61 | model = Policy(env.observation_space.shape[0], env.action_space.n) 62 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 63 | 64 | 65 | def select_action(state): 66 | state = torch.from_numpy(state).float().unsqueeze(0) 67 | probs, state_value = model(Variable(state)) 68 | m = Categorical(probs) 69 | action = m.sample() 70 | return action.data[0] 71 | 72 | model.load_state_dict(torch.load('weights/{}.pt'.format("actor_critic_sonic1"))) 73 | 74 | running_length = 10 75 | max_reward = -100 76 | 77 | for i_episode in count(1): 78 | state = env.reset() 79 | current_reward = 0 80 | done = False 81 | t = 0 82 | while not done: 83 | action = select_action(np.array(state)) 84 | state, reward, done, _ = env.step(action) 85 | env.render() 86 | current_reward+=reward 87 | t+=1 88 | 89 | running_length = running_length * 0.99 + t * 0.01 90 | print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}\tReward: {:.5f}'.format( 91 | i_episode, t, running_length, current_reward)) -------------------------------------------------------------------------------- /actor_critic/actor_critic_train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | from itertools import count 4 | from collections import namedtuple 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import torch.optim as optim 10 | from torch.autograd import Variable 11 | from torch.distributions import Categorical 12 | from envs import make_retro 13 | import pandas as pd 14 | parser = argparse.ArgumentParser(description='PyTorch actor-critic example') 15 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G', 16 | help='discount factor (default: 0.99)') 17 | parser.add_argument('--seed', type=int, default=42, metavar='N', 18 | help='random seed (default: 1)') 19 | parser.add_argument('--log-interval', type=int, default=1, metavar='N', 20 | help='interval between training status logs (default: 10)') 21 | parser.add_argument('--record', action='store_true', 22 | help='save video') 23 | 24 | args = parser.parse_args() 25 | game_states = pd.read_csv("train_large.csv").values.tolist() 26 | 27 | env = make_retro('SonicTheHedgehog-Genesis', 'LabyrinthZone.Act1', game_states) 28 | env.seed(args.seed) 29 | torch.manual_seed(args.seed) 30 | 31 | SavedAction = namedtuple('SavedAction', ['log_prob', 'value']) 32 | 33 | class Policy(nn.Module): 34 | def __init__(self, num_inputs, action_space): 35 | super(Policy, self).__init__() 36 | self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1) 37 | self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) 38 | self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1) 39 | self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1) 40 | self.affine1 = nn.Linear(32*6, 256) 41 | self.action_head = nn.Linear(256, action_space) 42 | self.value_head = nn.Linear(256, 1) 43 | 44 | self.saved_actions = [] 45 | self.rewards = [] 46 | 47 | def forward(self, x): 48 | x = F.elu(self.conv1(x)) 49 | x = F.elu(self.conv2(x)) 50 | x = F.elu(self.conv3(x)) 51 | x = F.elu(self.conv4(x)) 52 | x = x.view(-1, 32*6) 53 | x = F.elu(self.affine1(x)) 54 | action_scores = self.action_head(x) 55 | state_values = self.value_head(x) 56 | return F.softmax(action_scores, dim=-1), state_values 57 | 58 | model = Policy(env.observation_space.shape[0], env.action_space.n) 59 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 60 | 61 | def select_action(state): 62 | state = torch.from_numpy(state).float().unsqueeze(0) 63 | probs, state_value = model(Variable(state)) 64 | m = Categorical(probs) 65 | action = m.sample() 66 | model.saved_actions.append(SavedAction(m.log_prob(action), state_value)) 67 | return action.data[0] 68 | 69 | 70 | def finish_episode(): 71 | R = 0 72 | saved_actions = model.saved_actions 73 | policy_losses = [] 74 | value_losses = [] 75 | rewards = [] 76 | for r in model.rewards[::-1]: 77 | R = r + args.gamma * R 78 | rewards.insert(0, R) 79 | rewards = torch.Tensor(rewards) 80 | rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps) 81 | for (log_prob, value), r in zip(saved_actions, rewards): 82 | reward = r - value.data[0] 83 | policy_losses.append(-log_prob * Variable(reward)) 84 | value_losses.append(F.smooth_l1_loss(value, Variable(torch.Tensor([r])))) 85 | optimizer.zero_grad() 86 | loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum() 87 | loss.backward() 88 | optimizer.step() 89 | del model.rewards[:] 90 | del model.saved_actions[:] 91 | 92 | # model.load_state_dict(torch.load('weights/{}.pt'.format("actor_critic_sonic1"))) 93 | 94 | running_length = 10 95 | max_reward = -100 96 | for i_episode in count(1): 97 | state = env.reset() 98 | current_reward = 0 99 | done = False 100 | t = 0 101 | flip = 0 102 | while not done: 103 | action = select_action(np.array(state)) 104 | state, reward, done, _ = env.step(action) 105 | # env.render() 106 | model.rewards.append(reward) 107 | current_reward+=reward 108 | t+=1 109 | 110 | running_length = running_length * 0.99 + t * 0.01 111 | finish_episode() 112 | if i_episode % args.log_interval == 0: 113 | if current_reward > max_reward: 114 | max_reward = current_reward 115 | torch.save(model.state_dict(), 'weights/{}.pt'.format("actor_critic_sonic1")) 116 | print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}\tReward: {:.5f}'.format( 117 | i_episode, t, running_length, current_reward)) -------------------------------------------------------------------------------- /actor_critic/weights/actor_critic_sonic1.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/actor_critic/weights/actor_critic_sonic1.pt -------------------------------------------------------------------------------- /baseline/README.md: -------------------------------------------------------------------------------- 1 | # Algorithm 2 | Key features: 3 | - joint [PPO](https://arxiv.org/abs/1707.06347) training on all train games 4 | - mixup 5 | - exploration bonus to reward based on observations and x distance 6 | - training on test level 7 | - choosing best weights among several candidates during first few test episodes 8 | 9 | 10 | # Training 11 | To run PPO training: 12 | 13 | 1) [Install docker](https://docs.docker.com/install/) 14 | 15 | 2) [Install nvidia docker](https://github.com/NVIDIA/nvidia-docker) 16 | 17 | 3) build image: 18 | 19 | $ docker build -t retro-ppo -f ppo2.docker . 20 | 21 | 4) Run training: 22 | 23 | $ docker run --runtime=nvidia retro-ppo 24 | -------------------------------------------------------------------------------- /baseline/all.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2 4 | SonicTheHedgehog-Genesis,GreenHillZone.Act3 5 | SonicTheHedgehog-Genesis,GreenHillZone.Act1 6 | SonicTheHedgehog-Genesis,StarLightZone.Act2 7 | SonicTheHedgehog-Genesis,StarLightZone.Act1 8 | SonicTheHedgehog-Genesis,MarbleZone.Act2 9 | SonicTheHedgehog-Genesis,MarbleZone.Act1 10 | SonicTheHedgehog-Genesis,MarbleZone.Act3 11 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2 12 | SonicTheHedgehog-Genesis,LabyrinthZone.Act2 13 | SonicTheHedgehog-Genesis,LabyrinthZone.Act1 14 | SonicTheHedgehog-Genesis,LabyrinthZone.Act3 15 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act1 16 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act2 17 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2 18 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act1 19 | SonicTheHedgehog2-Genesis,MetropolisZone.Act1 20 | SonicTheHedgehog2-Genesis,MetropolisZone.Act2 21 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1 22 | SonicTheHedgehog2-Genesis,OilOceanZone.Act2 23 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act2 24 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act1 25 | SonicTheHedgehog2-Genesis,HillTopZone.Act1 26 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1 27 | SonicTheHedgehog2-Genesis,WingFortressZone 28 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2 29 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act1 30 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2 31 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act2 32 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act1 33 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act1 34 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act2 35 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2 36 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act1 37 | SonicAndKnuckles3-Genesis,DeathEggZone.Act1 38 | SonicAndKnuckles3-Genesis,DeathEggZone.Act2 39 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1 40 | SonicAndKnuckles3-Genesis,SandopolisZone.Act1 41 | SonicAndKnuckles3-Genesis,SandopolisZone.Act2 42 | SonicAndKnuckles3-Genesis,HiddenPalaceZone 43 | SonicAndKnuckles3-Genesis,HydrocityZone.Act2 44 | SonicAndKnuckles3-Genesis,IcecapZone.Act1 45 | SonicAndKnuckles3-Genesis,IcecapZone.Act2 46 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act1 47 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act2 48 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act1 49 | SonicTheHedgehog-Genesis,SpringYardZone.Act1 50 | SonicTheHedgehog-Genesis,GreenHillZone.Act2 51 | SonicTheHedgehog-Genesis,StarLightZone.Act3 52 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act1 53 | SonicTheHedgehog2-Genesis,MetropolisZone.Act3 54 | SonicTheHedgehog2-Genesis,HillTopZone.Act2 55 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act2 56 | SonicAndKnuckles3-Genesis,LavaReefZone.Act1 57 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act2 58 | SonicAndKnuckles3-Genesis,HydrocityZone.Act1 59 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act2 60 | -------------------------------------------------------------------------------- /baseline/config.yaml: -------------------------------------------------------------------------------- 1 | train_params: 2 | policy: "cnn" 3 | cnn: "openai1" 4 | max_steps: 100000000 5 | lr: 0.0002 6 | vf_coef: 0.5 7 | ent_coef: 0.01 8 | cliprange: 0.1 9 | gamma: 0.99 10 | lam: 0.95 11 | n_steps: 4500 12 | n_opt_epochs: 3 13 | batch_size: 4500 14 | max_grad_norm: 0.5 15 | n_envs: 12 16 | nmixup: 2 17 | log_interval: 5 18 | save_interval: 20 19 | weights_path: null 20 | adam_stats: "weight_stats" 21 | weights_choose_eps: 5 22 | 23 | env_params: 24 | exp_type: ["x", "obs"] 25 | exp_const: [0.005, 0.0001] 26 | color: False 27 | stack: 4 28 | scale_rew: True 29 | small_size: False 30 | -------------------------------------------------------------------------------- /baseline/config_test.yaml: -------------------------------------------------------------------------------- 1 | train_params: 2 | cnn: "nature" 3 | weights_path: ["weights1.pkl", "weights2.pkl", "weights3.pkl", "weights4.pkl"] 4 | n_envs: 1 5 | nmixup: 0 6 | save_interval: 0 7 | log_interval: 1 8 | weights_choose_eps: 5 9 | 10 | env_params: 11 | stack: 2 12 | exp_type: ["x", "obs"] 13 | exp_const: [0.005, 0.001] 14 | socket_dir: "tmp/sock" -------------------------------------------------------------------------------- /baseline/config_train.yaml: -------------------------------------------------------------------------------- 1 | train_params: 2 | cnn: "nature" 3 | 4 | env_params: 5 | game_states: "all.csv" 6 | max_episode_steps: 4500 7 | small_size: False 8 | stack: 2 -------------------------------------------------------------------------------- /baseline/jerk.docker: -------------------------------------------------------------------------------- 1 | FROM openai/retro-agent:bare 2 | 3 | ADD jerk_agent.py ./agent.py 4 | 5 | CMD ["python", "-u", "/root/compo/agent.py"] 6 | -------------------------------------------------------------------------------- /baseline/jerk_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | A scripted agent called "Just Enough Retained Knowledge". 5 | """ 6 | 7 | import random 8 | 9 | import gym 10 | import numpy as np 11 | 12 | import gym_remote.client as grc 13 | import gym_remote.exceptions as gre 14 | 15 | EMA_RATE = 0.2 16 | EXPLOIT_BIAS = 0.25 17 | TOTAL_TIMESTEPS = int(1e6) 18 | 19 | def main(): 20 | """Run JERK on the attached environment.""" 21 | env = grc.RemoteEnv('tmp/sock') 22 | env = TrackedEnv(env) 23 | new_ep = True 24 | solutions = [] 25 | while True: 26 | if new_ep: 27 | if (solutions and 28 | random.random() < EXPLOIT_BIAS + env.total_steps_ever / TOTAL_TIMESTEPS): 29 | solutions = sorted(solutions, key=lambda x: np.mean(x[0])) 30 | best_pair = solutions[-1] 31 | new_rew = exploit(env, best_pair[1]) 32 | best_pair[0].append(new_rew) 33 | print('replayed best with reward %f' % new_rew) 34 | continue 35 | else: 36 | env.reset() 37 | new_ep = False 38 | rew, new_ep = move(env, 100) 39 | if not new_ep and rew <= 0: 40 | print('backtracking due to negative reward: %f' % rew) 41 | _, new_ep = move(env, 70, left=True) 42 | if new_ep: 43 | solutions.append(([max(env.reward_history)], env.best_sequence())) 44 | 45 | def move(env, num_steps, left=False, jump_prob=1.0 / 10.0, jump_repeat=4): 46 | """ 47 | Move right or left for a certain number of steps, 48 | jumping periodically. 49 | """ 50 | total_rew = 0.0 51 | done = False 52 | steps_taken = 0 53 | jumping_steps_left = 0 54 | while not done and steps_taken < num_steps: 55 | action = np.zeros((12,), dtype=np.bool) 56 | action[6] = left 57 | action[7] = not left 58 | if jumping_steps_left > 0: 59 | action[0] = True 60 | jumping_steps_left -= 1 61 | else: 62 | if random.random() < jump_prob: 63 | jumping_steps_left = jump_repeat - 1 64 | action[0] = True 65 | _, rew, done, _ = env.step(action) 66 | total_rew += rew 67 | steps_taken += 1 68 | if done: 69 | break 70 | return total_rew, done 71 | 72 | def exploit(env, sequence): 73 | """ 74 | Replay an action sequence; pad with NOPs if needed. 75 | 76 | Returns the final cumulative reward. 77 | """ 78 | env.reset() 79 | done = False 80 | idx = 0 81 | while not done: 82 | if idx >= len(sequence): 83 | _, _, done, _ = env.step(np.zeros((12,), dtype='bool')) 84 | else: 85 | _, _, done, _ = env.step(sequence[idx]) 86 | idx += 1 87 | return env.total_reward 88 | 89 | class TrackedEnv(gym.Wrapper): 90 | """ 91 | An environment that tracks the current trajectory and 92 | the total number of timesteps ever taken. 93 | """ 94 | def __init__(self, env): 95 | super(TrackedEnv, self).__init__(env) 96 | self.action_history = [] 97 | self.reward_history = [] 98 | self.total_reward = 0 99 | self.total_steps_ever = 0 100 | 101 | def best_sequence(self): 102 | """ 103 | Get the prefix of the trajectory with the best 104 | cumulative reward. 105 | """ 106 | max_cumulative = max(self.reward_history) 107 | for i, rew in enumerate(self.reward_history): 108 | if rew == max_cumulative: 109 | return self.action_history[:i+1] 110 | raise RuntimeError('unreachable') 111 | 112 | # pylint: disable=E0202 113 | def reset(self, **kwargs): 114 | self.action_history = [] 115 | self.reward_history = [] 116 | self.total_reward = 0 117 | return self.env.reset(**kwargs) 118 | 119 | def step(self, action): 120 | self.total_steps_ever += 1 121 | self.action_history.append(action.copy()) 122 | obs, rew, done, info = self.env.step(action) 123 | self.total_reward += rew 124 | self.reward_history.append(self.total_reward) 125 | return obs, rew, done, info 126 | 127 | if __name__ == '__main__': 128 | try: 129 | main() 130 | except gre.GymRemoteError as exc: 131 | print('exception', exc) 132 | -------------------------------------------------------------------------------- /baseline/make_image.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | DOCKER_REGISTRY="retrocontestrtidfxqehvzsuwpo.azurecr.io" 3 | docker build -f ppo2.subm.docker -t $DOCKER_REGISTRY/$1 . -------------------------------------------------------------------------------- /baseline/ppo2.docker: -------------------------------------------------------------------------------- 1 | FROM openai/retro-agent:tensorflow 2 | 3 | # Needed for OpenCV. 4 | RUN apt-get update && \ 5 | apt-get install -y libgtk2.0-dev wget && \ 6 | rm -rf /var/lib/apt/lists/* 7 | 8 | # Baselines has some unneeded and cumbersome dependencies, 9 | # so we manually fetch the deps we need. 10 | # Baselines has some unneeded and cumbersome dependencies, 11 | # so we manually fetch the deps we need. 12 | RUN . ~/venv/bin/activate && \ 13 | pip install scipy tqdm joblib zmq dill progressbar2 cloudpickle opencv-python pandas pyyaml && \ 14 | pip install gym-retro && \ 15 | git clone https://github.com/openai/retro-contest.git && cd retro-contest/support && \ 16 | pip install . 17 | 18 | # ADD roms /roms/ 19 | # RUN . ~/venv/bin/activate && find /roms/ -name 'Sonic*' -type d -exec python -m retro.import {} \; 20 | RUN wget -qO - https://www.dropbox.com/s/8i0mh0bn2bbe1w5/roms.tar.gz?dl=0 | tar xzv && \ 21 | . ~/venv/bin/activate && find ./roms/ -name 'Sonic*' -type d -exec python -m retro.import {} \; 22 | 23 | RUN . ~/venv/bin/activate && \ 24 | pip install --no-deps git+https://github.com/fgvbrt/baselines.git@1e3f646f1859d2447348c647d42c48b7d6cc4423 25 | 26 | ADD sonic_util.py ./ 27 | ADD ppo2_agent.py ./ 28 | ADD utils.py ./ 29 | ADD train.sh ./ 30 | ADD *.yaml ./ 31 | ADD *.csv ./ 32 | 33 | CMD ["/bin/bash", "train.sh"] 34 | -------------------------------------------------------------------------------- /baseline/ppo2.subm.docker: -------------------------------------------------------------------------------- 1 | FROM openai/retro-agent:tensorflow 2 | 3 | # Needed for OpenCV. 4 | RUN apt-get update && \ 5 | apt-get install -y libgtk2.0-dev && \ 6 | rm -rf /var/lib/apt/lists/* 7 | 8 | # Baselines has some unneeded and cumbersome dependencies, 9 | # so we manually fetch the deps we need. 10 | RUN . ~/venv/bin/activate && \ 11 | pip install scipy tqdm joblib zmq dill progressbar2 cloudpickle opencv-python pandas pyyaml && \ 12 | pip install gym-retro && \ 13 | git clone https://github.com/openai/retro-contest.git && cd retro-contest/support && \ 14 | pip install . 15 | 16 | RUN . ~/venv/bin/activate && \ 17 | pip install --no-deps git+https://github.com/fgvbrt/baselines.git@1e3f646f1859d2447348c647d42c48b7d6cc4423 18 | 19 | ADD *.pkl ./ 20 | 21 | ADD sonic_util.py ./ 22 | ADD ppo2_agent.py ./ 23 | ADD utils.py ./ 24 | ADD test.sh ./ 25 | ADD *.yaml ./ 26 | 27 | CMD ["/bin/bash", "test.sh"] 28 | -------------------------------------------------------------------------------- /baseline/ppo2_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Train an agent on Sonic using PPO2 from OpenAI Baselines. 5 | """ 6 | 7 | import tensorflow as tf 8 | 9 | from baselines.common.vec_env.dummy_vec_env import DummyVecEnv 10 | from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv 11 | import baselines.ppo2.ppo2 as ppo2 12 | import gym_remote.exceptions as gre 13 | import functools 14 | import argparse 15 | import sonic_util 16 | from baselines import logger 17 | from baselines.ppo2.policies import LstmPolicy, CnnPolicy 18 | import utils 19 | import os 20 | import yaml 21 | import warnings 22 | from datetime import datetime 23 | 24 | 25 | def add_boolean_flag(parser, name, default=False, help=None): 26 | """Add a boolean flag to argparse parser. 27 | Parameters 28 | ---------- 29 | parser: argparse.Parser 30 | parser to add the flag to 31 | name: str 32 | -- will enable the flag, while --no- will disable it 33 | default: bool or None 34 | default value of the flag 35 | help: str 36 | help string for the flag 37 | """ 38 | dest = name.replace('-', '_') 39 | parser.add_argument("--" + name, action="store_true", default=default, dest=dest, help=help) 40 | parser.add_argument("--no-" + name, action="store_false", dest=dest) 41 | 42 | 43 | def main(policy, env, params): 44 | """Run PPO until the environment throws an exception.""" 45 | config = tf.ConfigProto() 46 | config.gpu_options.allow_growth = True # pylint: disable=E1101 47 | with tf.Session(config=config): 48 | # Take more timesteps than we need to be sure that 49 | # we stop due to an exception. 50 | ppo2.learn(policy=policy, 51 | env=env, 52 | nsteps=params['n_steps'], 53 | nminibatches=(params['n_steps']*env.num_envs) // params["batch_size"], 54 | lam=params["lam"], 55 | gamma=params['gamma'], 56 | noptepochs=params["n_opt_epochs"], 57 | log_interval=params["log_interval"], 58 | ent_coef=params["ent_coef"], 59 | vf_coef=params['vf_coef'], 60 | lr=lambda _: params["lr"], 61 | cliprange=lambda _: params['cliprange'], 62 | max_grad_norm=params['max_grad_norm'], 63 | total_timesteps=params["max_steps"], 64 | save_interval=params["save_interval"], 65 | weights_path=params["weights_path"], 66 | adam_stats=params["adam_stats"], 67 | nmixup=params["nmixup"], 68 | weights_choose_eps=params["weights_choose_eps"], 69 | cnn=params['cnn']) 70 | 71 | 72 | def run_train(): 73 | def _parse_args(): 74 | parser = argparse.ArgumentParser(description="Run commands") 75 | parser.add_argument('--config', type=str, default=None, nargs='+', 76 | help="file with config") 77 | return parser.parse_args() 78 | 79 | args = _parse_args() 80 | config = utils.load_config(args.config) 81 | 82 | env_params = config['env_params'] 83 | train_params = config['train_params'] 84 | 85 | if train_params["policy"] == 'lstm': 86 | policy = LstmPolicy 87 | elif train_params["policy"] == 'cnn': 88 | policy = CnnPolicy 89 | else: 90 | raise ValueError("unknown policy {}".format(train_params["policy"])) 91 | 92 | if train_params['cnn'] == "openai1" and not env_params['small_size']: 93 | warnings.warn('asked for openai1 policy, but dont set small size for env params') 94 | 95 | # create environments funcitons 96 | n_envs = train_params['n_envs'] 97 | if n_envs == 1: 98 | vec_fn = DummyVecEnv 99 | elif n_envs > 1: 100 | vec_fn = SubprocVecEnv 101 | else: 102 | raise ValueError('number of environments less than 1: {}'.format(n_envs)) 103 | env = vec_fn([functools.partial(sonic_util.make_from_config, env_params) for _ in range(n_envs)]) 104 | 105 | logdir = os.path.join("logs", str(datetime.now())) 106 | logger.configure(logdir) 107 | 108 | # save run config 109 | with open(os.path.join(logdir, "run_config.yaml"), 'w') as f: 110 | yaml.dump(config, f) 111 | 112 | main(policy, env, train_params) 113 | 114 | 115 | if __name__ == '__main__': 116 | try: 117 | run_train() 118 | except gre.GymRemoteError as exc: 119 | print('exception', exc) 120 | -------------------------------------------------------------------------------- /baseline/rainbow.docker: -------------------------------------------------------------------------------- 1 | FROM openai/retro-agent:tensorflow 2 | 3 | # Needed for OpenCV. 4 | RUN apt-get update && \ 5 | apt-get install -y libgtk2.0-dev && \ 6 | rm -rf /var/lib/apt/lists/* 7 | 8 | # Baselines has some unneeded and cumbersome dependencies, 9 | # so we manually fetch the deps we need. 10 | RUN . ~/venv/bin/activate && \ 11 | pip install scipy tqdm joblib zmq dill progressbar2 cloudpickle opencv-python && \ 12 | pip install --no-deps git+https://github.com/openai/baselines.git 13 | 14 | # Use the anyrl open source RL framework. 15 | RUN . ~/venv/bin/activate && \ 16 | pip install anyrl==0.11.17 17 | 18 | ADD rainbow_agent.py ./agent.py 19 | ADD sonic_util.py . 20 | 21 | CMD ["python", "-u", "/root/compo/agent.py"] 22 | -------------------------------------------------------------------------------- /baseline/rainbow_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Train an agent on Sonic using an open source Rainbow DQN 5 | implementation. 6 | """ 7 | 8 | import tensorflow as tf 9 | 10 | from anyrl.algos import DQN 11 | from anyrl.envs import BatchedGymEnv 12 | from anyrl.envs.wrappers import BatchedFrameStack 13 | from anyrl.models import rainbow_models 14 | from anyrl.rollouts import BatchedPlayer, PrioritizedReplayBuffer, NStepPlayer 15 | from anyrl.spaces import gym_space_vectorizer 16 | import gym_remote.exceptions as gre 17 | 18 | from sonic_util import AllowBacktracking, make_env 19 | 20 | def main(): 21 | """Run DQN until the environment throws an exception.""" 22 | env = AllowBacktracking(make_env(stack=False, scale_rew=False)) 23 | env = BatchedFrameStack(BatchedGymEnv([[env]]), num_images=4, concat=False) 24 | config = tf.ConfigProto() 25 | config.gpu_options.allow_growth = True # pylint: disable=E1101 26 | with tf.Session(config=config) as sess: 27 | dqn = DQN(*rainbow_models(sess, 28 | env.action_space.n, 29 | gym_space_vectorizer(env.observation_space), 30 | min_val=-200, 31 | max_val=200)) 32 | player = NStepPlayer(BatchedPlayer(env, dqn.online_net), 3) 33 | optimize = dqn.optimize(learning_rate=1e-4) 34 | sess.run(tf.global_variables_initializer()) 35 | dqn.train(num_steps=2000000, # Make sure an exception arrives before we stop. 36 | player=player, 37 | replay_buffer=PrioritizedReplayBuffer(500000, 0.5, 0.4, epsilon=0.1), 38 | optimize_op=optimize, 39 | train_interval=1, 40 | target_interval=8192, 41 | batch_size=32, 42 | min_buffer_size=20000) 43 | 44 | if __name__ == '__main__': 45 | try: 46 | main() 47 | except gre.GymRemoteError as exc: 48 | print('exception', exc) 49 | -------------------------------------------------------------------------------- /baseline/requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | tqdm 3 | joblib 4 | zmq 5 | dill 6 | progressbar2 7 | cloudpickle 8 | opencv-python 9 | pandas 10 | gym-retro 11 | pyyaml 12 | -------------------------------------------------------------------------------- /baseline/roms/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/baseline/roms/.gitkeep -------------------------------------------------------------------------------- /baseline/setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | conda create -n retro python=3.5 -y 3 | source activate retro 4 | pip install http://download.pytorch.org/whl/cu91/torch-0.4.0-cp35-cp35m-linux_x86_64.whl 5 | pip install -r requirements.txt 6 | pip install --no-deps git+https://github.com/fgvbrt/baselines.git@1e3f646f1859d2447348c647d42c48b7d6cc4423 7 | git clone https://github.com/openai/retro-contest.git && cd retro-contest/support && pip install . 8 | 9 | # download roms 10 | wget -qO - https://www.dropbox.com/s/8i0mh0bn2bbe1w5/roms.tar.gz?dl=0 | tar xzv 11 | find ./roms/ -name 'Sonic*' -type d -exec python -m retro.import {} \; -------------------------------------------------------------------------------- /baseline/simple-agent.docker: -------------------------------------------------------------------------------- 1 | FROM openai/retro-agent 2 | ADD simple-agent.py . 3 | CMD ["python", "-u", "/root/compo/simple-agent.py"] 4 | -------------------------------------------------------------------------------- /baseline/simple-agent.py: -------------------------------------------------------------------------------- 1 | import gym_remote.exceptions as gre 2 | import gym_remote.client as grc 3 | 4 | 5 | def main(): 6 | print('connecting to remote environment') 7 | env = grc.RemoteEnv('tmp/sock') 8 | print('starting episode') 9 | env.reset() 10 | while True: 11 | action = env.action_space.sample() 12 | action[7] = 1 13 | ob, reward, done, _ = env.step(action) 14 | if done: 15 | print('episode complete') 16 | env.reset() 17 | 18 | 19 | if __name__ == '__main__': 20 | try: 21 | main() 22 | except gre.GymRemoteError as e: 23 | print('exception', e) -------------------------------------------------------------------------------- /baseline/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python -u /root/compo/ppo2_agent.py --config config.yaml config_test.yaml -------------------------------------------------------------------------------- /baseline/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python -u /root/compo/ppo2_agent.py --config config.yaml config_train.yaml -------------------------------------------------------------------------------- /baseline/train_large.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2 4 | SonicTheHedgehog-Genesis,GreenHillZone.Act3 5 | SonicTheHedgehog-Genesis,GreenHillZone.Act1 6 | SonicTheHedgehog-Genesis,StarLightZone.Act2 7 | SonicTheHedgehog-Genesis,StarLightZone.Act1 8 | SonicTheHedgehog-Genesis,MarbleZone.Act2 9 | SonicTheHedgehog-Genesis,MarbleZone.Act1 10 | SonicTheHedgehog-Genesis,MarbleZone.Act3 11 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2 12 | SonicTheHedgehog-Genesis,LabyrinthZone.Act2 13 | SonicTheHedgehog-Genesis,LabyrinthZone.Act1 14 | SonicTheHedgehog-Genesis,LabyrinthZone.Act3 15 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act1 16 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act2 17 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2 18 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act1 19 | SonicTheHedgehog2-Genesis,MetropolisZone.Act1 20 | SonicTheHedgehog2-Genesis,MetropolisZone.Act2 21 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1 22 | SonicTheHedgehog2-Genesis,OilOceanZone.Act2 23 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act2 24 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act1 25 | SonicTheHedgehog2-Genesis,HillTopZone.Act1 26 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1 27 | SonicTheHedgehog2-Genesis,WingFortressZone 28 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2 29 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act1 30 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2 31 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act2 32 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act1 33 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act1 34 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act2 35 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2 36 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act1 37 | SonicAndKnuckles3-Genesis,DeathEggZone.Act1 38 | SonicAndKnuckles3-Genesis,DeathEggZone.Act2 39 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1 40 | SonicAndKnuckles3-Genesis,SandopolisZone.Act1 41 | SonicAndKnuckles3-Genesis,SandopolisZone.Act2 42 | SonicAndKnuckles3-Genesis,HiddenPalaceZone 43 | SonicAndKnuckles3-Genesis,HydrocityZone.Act2 44 | SonicAndKnuckles3-Genesis,IcecapZone.Act1 45 | SonicAndKnuckles3-Genesis,IcecapZone.Act2 46 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act1 47 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act2 48 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act1 49 | -------------------------------------------------------------------------------- /baseline/train_nodocker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | CUDA_DEVICE_ORDER="PCI_BUS_ID" CUDA_VISIBLE_DEVICES="$1" python ppo2_agent.py --config config.yaml config_train.yaml 3 | -------------------------------------------------------------------------------- /baseline/train_small.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3 3 | SonicTheHedgehog-Genesis,StarLightZone.Act2 4 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2 5 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2 6 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1 7 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1 8 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2 9 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2 10 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2 11 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1 12 | -------------------------------------------------------------------------------- /baseline/utils.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import yaml 3 | 4 | 5 | def load_config(fnames): 6 | 7 | config = {} 8 | for fname in fnames: 9 | with open(fname) as f: 10 | config = merge_dictionaries(config, yaml.load(f)) 11 | 12 | return config 13 | 14 | 15 | def add_boolean_flag(parser, name, default=False, help=None): 16 | """Add a boolean flag to argparse parser. 17 | Parameters 18 | ---------- 19 | parser: argparse.Parser 20 | parser to add the flag to 21 | name: str 22 | -- will enable the flag, while --no- will disable it 23 | default: bool or None 24 | default value of the flag 25 | help: str 26 | help string for the flag 27 | """ 28 | dest = name.replace('-', '_') 29 | parser.add_argument("--" + name, action="store_true", default=default, dest=dest, help=help) 30 | parser.add_argument("--no-" + name, action="store_false", dest=dest) 31 | 32 | 33 | def merge_dictionaries(a, b, path_to_root=None, extend_lists=False): 34 | """ 35 | создает копию словаря `a` и рекурсивно апдейтит ее элементы элементами из `b` 36 | :param extend_lists: 37 | if True и в обоих словарях это листы (если в обоих такой элемент есть) то элементы из b добавляются в конец 38 | к элементам из a, если в одном из словарей это не лист, то бросатеся ValueError 39 | if False - значения типа list трактуются как обычные значения - заменяют/перетирают друг друга 40 | """ 41 | res = deepcopy(a) 42 | 43 | if path_to_root is None: 44 | path_to_root = [] 45 | 46 | for key in b: 47 | if key not in res: 48 | res[key] = b[key] 49 | continue 50 | if isinstance(res[key], dict): 51 | if isinstance(b[key], dict): 52 | res[key] = merge_dictionaries(res[key], b[key], path_to_root + [str(key)], extend_lists=extend_lists) 53 | else: 54 | raise TypeError('Conflict at {}'.format('.'.join(path_to_root + [str(key)]))) 55 | elif extend_lists and isinstance(res[key], list): 56 | if isinstance(b[key], list): 57 | res[key].extend(b[key]) 58 | else: 59 | raise ValueError( 60 | "Cannot extend list with not list. Path: {}".format('.'.join(path_to_root + [str(key)]))) 61 | else: 62 | if extend_lists and isinstance(b[key], list): 63 | raise ValueError( 64 | "Cannot extend non list with list. Path: {}".format('.'.join(path_to_root + [str(key)]))) 65 | elif not isinstance(b[key], dict): 66 | res[key] = b[key] 67 | else: 68 | raise TypeError('Conflict at {}'.format('.'.join(path_to_root + [str(key)]))) 69 | return res 70 | -------------------------------------------------------------------------------- /baseline/validation.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act1 3 | SonicTheHedgehog-Genesis,GreenHillZone.Act2 4 | SonicTheHedgehog-Genesis,StarLightZone.Act3 5 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act1 6 | SonicTheHedgehog2-Genesis,MetropolisZone.Act3 7 | SonicTheHedgehog2-Genesis,HillTopZone.Act2 8 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act2 9 | SonicAndKnuckles3-Genesis,LavaReefZone.Act1 10 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act2 11 | SonicAndKnuckles3-Genesis,HydrocityZone.Act1 12 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act2 13 | -------------------------------------------------------------------------------- /deepneuroevolution/README.md: -------------------------------------------------------------------------------- 1 | ## AI Labs Neuroevolution Algorithms 2 | 3 | This repo contains distributed implementations of the algorithms described in: 4 | 5 | [1] [Deep Neuroevolution: Genetic Algorithms Are a Competitive Alternative for Training Deep Neural Networks for Reinforcement Learning](https://arxiv.org/abs/1712.06567) 6 | 7 | [2] [Improving Exploration in Evolution Strategies for Deep Reinforcement Learning via a Population of Novelty-Seeking Agents](https://arxiv.org/abs/1712.06560) 8 | 9 | Our code is based off of code from OpenAI, who we thank. The original code and related paper from OpenAI can be found [here](https://github.com/openai/evolution-strategies-starter). The repo has been modified to run both ES and our algorithms, including our Deep Genetic Algorithm (DeepGA) locally and on AWS. 10 | 11 | Note: The Humanoid experiment depends on [Mujoco](http://www.mujoco.org/). Please provide your own Mujoco license and binary 12 | 13 | The article describing these papers can be found [here](https://eng.uber.com/deep-neuroevolution/) 14 | 15 | ## Visual Inspector for NeuroEvolution (VINE) 16 | The folder `./visual_inspector` contains implementations of VINE, i.e., Visual Inspector for NeuroEvolution, an interactive data visualization tool for neuroevolution. Refer to `README.md` in that folder for further instructions on running and customizing your visualization. An article describing this visualization tool can be found [here](https://eng.uber.com/vine/). 17 | 18 | ## Accelerated Deep Neurevolution 19 | The folder `./gpu_implementation` contains an implementation that uses GPU more efficiently. Refer to `README.md` in that folder for further instructions. 20 | 21 | ## How to run locally 22 | 23 | clone repo 24 | 25 | ``` 26 | git clone https://github.com/uber-common/deep-neuroevolution.git 27 | ``` 28 | 29 | create python3 virtual env 30 | 31 | ``` 32 | python3 -m venv env 33 | . env/bin/activate 34 | ``` 35 | 36 | install requirements 37 | ``` 38 | pip install -r requirements.txt 39 | ``` 40 | If you plan to use the mujoco env, make sure to follow [mujoco-py](https://github.com/openai/mujoco-py)'s readme about how to install mujoco correctly 41 | 42 | launch redis 43 | ``` 44 | . scripts/local_run_redis.sh 45 | ``` 46 | 47 | launch sample ES experiment 48 | ``` 49 | . scripts/local_run_exp.sh es configurations/frostbite_es.json # For the Atari game Frostbite 50 | . scripts/local_run_exp.sh es configurations/humanoid.json # For the MuJoCo Humanoid-v1 environment 51 | ``` 52 | 53 | launch sample NS-ES experiment 54 | ``` 55 | . scripts/local_run_exp.sh ns-es configurations/frostbite_nses.json 56 | . scripts/local_run_exp.sh ns-es configurations/humanoid_nses.json 57 | ``` 58 | 59 | launch sample NSR-ES experiment 60 | ``` 61 | . scripts/local_run_exp.sh nsr-es configurations/frostbite_nsres.json 62 | . scripts/local_run_exp.sh nsr-es configurations/humanoid_nsres.json 63 | ``` 64 | 65 | launch sample GA experiment 66 | ``` 67 | . scripts/local_run_exp.sh ga configurations/frostbite_ga.json # For the Atari game Frostbite 68 | ``` 69 | 70 | launch sample Random Search experiment 71 | ``` 72 | . scripts/local_run_exp.sh rs configurations/frostbite_ga.json # For the Atari game Frostbite 73 | ``` 74 | 75 | 76 | visualize results by running a policy file 77 | ``` 78 | python -m scripts.viz 'FrostbiteNoFrameskip-v4' 79 | python -m scripts.viz 'Humanoid-v1' 80 | ``` 81 | 82 | ### extra folder 83 | The extra folder holds the XML specification file for the Humanoid 84 | Locomotion with Deceptive Trap domain used in https://arxiv.org/abs/1712.06560. Use this XML file in gym to recreate the environment. 85 | -------------------------------------------------------------------------------- /deepneuroevolution/configurations/frostbite_es.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "calc_obstat_prob": 0.0, 4 | "episodes_per_batch": 5000, 5 | "eval_prob": 0.01, 6 | "l2coeff": 0.005, 7 | "noise_stdev": 0.005, 8 | "snapshot_freq": 20, 9 | "timesteps_per_batch": 10000, 10 | "return_proc_mode": "centered_rank", 11 | "episode_cutoff_mode": 5000 12 | }, 13 | "env_id": "FrostbiteNoFrameskip-v4", 14 | "optimizer": { 15 | "args": { 16 | "stepsize": 0.01 17 | }, 18 | "type": "adam" 19 | }, 20 | "policy": { 21 | "args" : {}, 22 | "type": "ESAtariPolicy" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /deepneuroevolution/configurations/frostbite_ga.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "calc_obstat_prob": 0.0, 4 | "episodes_per_batch": 5000, 5 | "eval_prob": 0.01, 6 | "l2coeff": 0.005, 7 | "noise_stdev": 0.005, 8 | "snapshot_freq": 20, 9 | "timesteps_per_batch": 10000, 10 | "return_proc_mode": "centered_rank", 11 | "episode_cutoff_mode": 5000 12 | }, 13 | "population_size": 10, 14 | "num_elites": 1, 15 | "env_id": "FrostbiteNoFrameskip-v4", 16 | "policy": { 17 | "args" : { 18 | "nonlin_type": "relu" 19 | }, 20 | "type": "GAAtariPolicy" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /deepneuroevolution/configurations/frostbite_nses.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "calc_obstat_prob": 0.0, 4 | "episodes_per_batch": 100, 5 | "eval_prob": 0.03, 6 | "l2coeff": 0.005, 7 | "noise_stdev": 0.02, 8 | "snapshot_freq": 10, 9 | "timesteps_per_batch": 1000, 10 | "return_proc_mode": "centered_sign_rank", 11 | "episode_cutoff_mode": 5000 12 | }, 13 | "env_id": "FrostbiteNoFrameskip-v4", 14 | "algo_type": "ns", 15 | "novelty_search": { 16 | "k": 10, 17 | "population_size": 3, 18 | "num_rollouts": 1, 19 | "selection_method": "novelty_prob" 20 | }, 21 | "optimizer": { 22 | "args": { 23 | "stepsize": 0.01 24 | }, 25 | "type": "adam" 26 | }, 27 | "policy": { 28 | "args": {}, 29 | "type": "ESAtariPolicy" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /deepneuroevolution/configurations/frostbite_nsres.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "calc_obstat_prob": 0.0, 4 | "episodes_per_batch": 1000, 5 | "eval_prob": 0.1, 6 | "l2coeff": 0.005, 7 | "noise_stdev": 0.02, 8 | "snapshot_freq": 10, 9 | "timesteps_per_batch": 10000, 10 | "return_proc_mode": "centered_sign_rank", 11 | "episode_cutoff_mode": 5000 12 | }, 13 | "env_id": "FrostbiteNoFrameskip-v4", 14 | "algo_type": "nsr", 15 | "novelty_search": { 16 | "k": 10, 17 | "population_size": 3, 18 | "num_rollouts": 1, 19 | "selection_method": "novelty_prob" 20 | }, 21 | "optimizer": { 22 | "args": { 23 | "stepsize": 0.01 24 | }, 25 | "type": "adam" 26 | }, 27 | "policy": { 28 | "args": {}, 29 | "type": "ESAtariPolicy" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /deepneuroevolution/configurations/humanoid.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "calc_obstat_prob": 0.01, 4 | "episodes_per_batch": 1000, 5 | "eval_prob": 0.03, 6 | "l2coeff": 0.005, 7 | "noise_stdev": 0.02, 8 | "snapshot_freq": 10, 9 | "timesteps_per_batch": 100000, 10 | "return_proc_mode": "centered_rank", 11 | "episode_cutoff_mode": "env_default" 12 | }, 13 | "env_id": "Humanoid-v1", 14 | "exp_prefix": "humanoid", 15 | "optimizer": { 16 | "args": { 17 | "stepsize": 0.01 18 | }, 19 | "type": "adam" 20 | }, 21 | "policy": { 22 | "args": { 23 | "ac_bins": "continuous:", 24 | "ac_noise_std": 0.01, 25 | "connection_type": "ff", 26 | "hidden_dims": [ 27 | 256, 28 | 256 29 | ], 30 | "nonlin_type": "tanh" 31 | }, 32 | "type": "MujocoPolicy" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /deepneuroevolution/configurations/humanoid_nses.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "calc_obstat_prob": 0.01, 4 | "episodes_per_batch": 1000, 5 | "eval_prob": 0.03, 6 | "l2coeff": 0.005, 7 | "noise_stdev": 0.02, 8 | "snapshot_freq": 10, 9 | "timesteps_per_batch": 100000, 10 | "return_proc_mode": "centered_sign_rank", 11 | "episode_cutoff_mode": "env_default" 12 | }, 13 | "env_id": "Humanoid-v1", 14 | "algo_type": "ns", 15 | "exp_prefix": "humanoid", 16 | "novelty_search": { 17 | "k": 10, 18 | "population_size": 5, 19 | "num_rollouts": 5, 20 | "selection_method": "novelty_prob" 21 | }, 22 | "optimizer": { 23 | "args": { 24 | "stepsize": 0.01 25 | }, 26 | "type": "adam" 27 | }, 28 | "policy": { 29 | "args": { 30 | "ac_bins": "continuous:", 31 | "ac_noise_std": 0.01, 32 | "connection_type": "ff", 33 | "hidden_dims": [ 34 | 256, 35 | 256 36 | ], 37 | "nonlin_type": "tanh" 38 | }, 39 | "type": "MujocoPolicy" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /deepneuroevolution/configurations/humanoid_nsres.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "calc_obstat_prob": 0.01, 4 | "episodes_per_batch": 1000, 5 | "eval_prob": 0.03, 6 | "l2coeff": 0.005, 7 | "noise_stdev": 0.02, 8 | "snapshot_freq": 10, 9 | "timesteps_per_batch": 100000, 10 | "return_proc_mode": "centered_sign_rank", 11 | "episode_cutoff_mode": "env_default" 12 | }, 13 | "env_id": "Humanoid-v1", 14 | "algo_type": "nsr", 15 | "exp_prefix": "humanoid", 16 | "novelty_search": { 17 | "k": 10, 18 | "population_size": 5, 19 | "num_rollouts": 5, 20 | "selection_method": "novelty_prob" 21 | }, 22 | "optimizer": { 23 | "args": { 24 | "stepsize": 0.01 25 | }, 26 | "type": "adam" 27 | }, 28 | "policy": { 29 | "args": { 30 | "ac_bins": "continuous:", 31 | "ac_noise_std": 0.01, 32 | "connection_type": "ff", 33 | "hidden_dims": [ 34 | 256, 35 | 256 36 | ], 37 | "nonlin_type": "tanh" 38 | }, 39 | "type": "MujocoPolicy" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /deepneuroevolution/configurations/sonic_es.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "calc_obstat_prob": 0.0, 4 | "episodes_per_batch": 2000, 5 | "eval_prob": 0.01, 6 | "l2coeff": 0.005, 7 | "noise_stdev": 0.005, 8 | "snapshot_freq": 20, 9 | "timesteps_per_batch": 10000, 10 | "return_proc_mode": "centered_rank", 11 | "episode_cutoff_mode": 5000 12 | }, 13 | "env_id": "LabyrinthZone.Act1", 14 | "optimizer": { 15 | "args": { 16 | "stepsize": 0.01 17 | }, 18 | "type": "adam" 19 | }, 20 | "policy": { 21 | "args" : {}, 22 | "type": "ESAtariPolicy" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /deepneuroevolution/configurations/sonic_ga.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "calc_obstat_prob": 0.0, 4 | "episodes_per_batch": 1000, 5 | "eval_prob": 0.01, 6 | "l2coeff": 0.005, 7 | "noise_stdev": 0.005, 8 | "snapshot_freq": 20, 9 | "timesteps_per_batch": 10000, 10 | "return_proc_mode": "centered_rank", 11 | "episode_cutoff_mode": 5000 12 | }, 13 | "population_size": 10, 14 | "num_elites": 1, 15 | "env_id": "LabyrinthZone.Act1", 16 | "policy": { 17 | "args" : { 18 | "nonlin_type": "elu" 19 | }, 20 | "type": "GAAtariPolicy" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /deepneuroevolution/configurations/sonic_nsres.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "calc_obstat_prob": 0.0, 4 | "episodes_per_batch": 1000, 5 | "eval_prob": 0.1, 6 | "l2coeff": 0.005, 7 | "noise_stdev": 0.02, 8 | "snapshot_freq": 10, 9 | "timesteps_per_batch": 10000, 10 | "return_proc_mode": "centered_sign_rank", 11 | "episode_cutoff_mode": 5000 12 | }, 13 | "env_id": "LabyrinthZone.Act1", 14 | "algo_type": "nsr", 15 | "novelty_search": { 16 | "k": 10, 17 | "population_size": 3, 18 | "num_rollouts": 1, 19 | "selection_method": "novelty_prob" 20 | }, 21 | "optimizer": { 22 | "args": { 23 | "stepsize": 0.01 24 | }, 25 | "type": "adam" 26 | }, 27 | "policy": { 28 | "args": {}, 29 | "type": "ESAtariPolicy" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /deepneuroevolution/es_distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/deepneuroevolution/es_distributed/__init__.py -------------------------------------------------------------------------------- /deepneuroevolution/es_distributed/main.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import json 3 | import logging 4 | import os 5 | import sys 6 | 7 | import click 8 | 9 | from .dist import RelayClient 10 | from .es import run_master, run_worker, SharedNoiseTable 11 | os.environ['CUDA_VISIBLE_DEVICES'] = '' 12 | 13 | def mkdir_p(path): 14 | try: 15 | os.makedirs(path) 16 | except OSError as exc: 17 | if exc.errno == errno.EEXIST and os.path.isdir(path): 18 | pass 19 | else: 20 | raise 21 | 22 | @click.group() 23 | def cli(): 24 | logging.basicConfig( 25 | format='[%(asctime)s pid=%(process)d] %(message)s', 26 | level=logging.INFO, 27 | stream=sys.stderr) 28 | 29 | def import_algo(name): 30 | if name == 'es': 31 | from . import es as algo 32 | elif name == 'ns-es' or name == "nsr-es": 33 | from . import nses as algo 34 | elif name == 'ga': 35 | from . import ga as algo 36 | elif name == 'rs': 37 | from . import rs as algo 38 | else: 39 | raise NotImplementedError() 40 | return algo 41 | 42 | @cli.command() 43 | @click.option('--algo') 44 | @click.option('--exp_str') 45 | @click.option('--exp_file') 46 | @click.option('--master_socket_path', required=True) 47 | @click.option('--log_dir') 48 | def master(algo, exp_str, exp_file, master_socket_path, log_dir): 49 | # Start the master 50 | assert (exp_str is None) != (exp_file is None), 'Must provide exp_str xor exp_file to the master' 51 | if exp_str: 52 | exp = json.loads(exp_str) 53 | elif exp_file: 54 | with open(exp_file, 'r') as f: 55 | exp = json.loads(f.read()) 56 | else: 57 | assert False 58 | log_dir = os.path.expanduser(log_dir) if log_dir else '/tmp/es_master_{}'.format(os.getpid()) 59 | mkdir_p(log_dir) 60 | algo = import_algo(algo) 61 | algo.run_master({'unix_socket_path': master_socket_path}, log_dir, exp) 62 | 63 | 64 | @cli.command() 65 | @click.option('--algo') 66 | @click.option('--master_host', required=True) 67 | @click.option('--master_port', default=6379, type=int) 68 | @click.option('--relay_socket_path', required=True) 69 | @click.option('--num_workers', type=int, default=0) 70 | def workers(algo, master_host, master_port, relay_socket_path, num_workers): 71 | # Start the relay 72 | master_redis_cfg = {'host': master_host, 'port': master_port} 73 | relay_redis_cfg = {'unix_socket_path': relay_socket_path} 74 | if os.fork() == 0: 75 | RelayClient(master_redis_cfg, relay_redis_cfg).run() 76 | return 77 | # Start the workers 78 | algo = import_algo(algo) 79 | noise = algo.SharedNoiseTable() # Workers share the same noise 80 | num_workers = num_workers if num_workers else os.cpu_count() 81 | logging.info('Spawning {} workers'.format(num_workers)) 82 | for _ in range(num_workers): 83 | if os.fork() == 0: 84 | algo.run_worker(master_redis_cfg, relay_redis_cfg, noise=noise) 85 | return 86 | os.wait() 87 | 88 | 89 | if __name__ == '__main__': 90 | cli() 91 | -------------------------------------------------------------------------------- /deepneuroevolution/es_distributed/optimizers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Optimizer(object): 5 | def __init__(self, theta): 6 | self.theta = theta 7 | self.dim = len(self.theta) 8 | self.t = 0 9 | 10 | def update(self, globalg): 11 | self.t += 1 12 | step = self._compute_step(globalg) 13 | theta = self.theta 14 | ratio = np.linalg.norm(step) / np.linalg.norm(theta) 15 | new_theta = self.theta + step 16 | self.theta = new_theta 17 | return ratio, new_theta 18 | 19 | def _compute_step(self, globalg): 20 | raise NotImplementedError 21 | 22 | 23 | class SGD(Optimizer): 24 | def __init__(self, theta, stepsize, momentum=0.9): 25 | Optimizer.__init__(self, theta) 26 | self.v = np.zeros(self.dim, dtype=np.float32) 27 | self.stepsize, self.momentum = stepsize, momentum 28 | 29 | def _compute_step(self, globalg): 30 | self.v = self.momentum * self.v + (1. - self.momentum) * globalg 31 | step = -self.stepsize * self.v 32 | return step 33 | 34 | 35 | class Adam(Optimizer): 36 | def __init__(self, theta, stepsize, beta1=0.9, beta2=0.999, epsilon=1e-08): 37 | Optimizer.__init__(self, theta) 38 | self.stepsize = stepsize 39 | self.beta1 = beta1 40 | self.beta2 = beta2 41 | self.epsilon = epsilon 42 | self.m = np.zeros(self.dim, dtype=np.float32) 43 | self.v = np.zeros(self.dim, dtype=np.float32) 44 | 45 | def _compute_step(self, globalg): 46 | a = self.stepsize * np.sqrt(1 - self.beta2 ** self.t) / (1 - self.beta1 ** self.t) 47 | self.m = self.beta1 * self.m + (1 - self.beta1) * globalg 48 | self.v = self.beta2 * self.v + (1 - self.beta2) * (globalg * globalg) 49 | step = -a * self.m / (np.sqrt(self.v) + self.epsilon) 50 | return step 51 | 52 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/README.md: -------------------------------------------------------------------------------- 1 | ## AI Labs - GPU Neuroevolution 2 | This folder contains preliminary work done to implement GPU-based deep neuroevolution. 3 | For problems like Atari where the policy evaluation takes a considerable amount of time it is advantageous to make use of GPUs to evaluate the Neural Networks. This code shows how it is possible to run Atari simulations in parallel using the GPU in a way where we can evaluate neural networks in batches and have both CPU and GPU operating at the same time. 4 | 5 | This folder has code in prototype stage and still requires a lot of changes to optimize performance, maintanability, and testing. We welcome pull requests to this repo and have plans to improve it in the future. Although it can run on CPU-only, it is slower than our original implementation due to overhead. Once this implementation has matured we plan on distributing it as a package for easy installation. We included an implementation of the HardMaze, but the GA-NS implementation will be added later on. 6 | 7 | ## Installation 8 | 9 | clone repo 10 | 11 | ``` 12 | git clone https://github.com/uber-common/deep-neuroevolution.git 13 | ``` 14 | 15 | create python3 virtual env 16 | 17 | ``` 18 | python3 -m venv env 19 | . env/bin/activate 20 | ``` 21 | 22 | install tensorflow or tensorflow-gpu > 1.2. 23 | ``` 24 | pip install tensorflow-gpu 25 | ``` 26 | Follow instructions under ./gym_tensorflow/README on how to compile the optimized interfaces. 27 | 28 | To train GA on Atari just run: 29 | ``` 30 | python ga.py ga_atari_config.json 31 | ``` 32 | Random search (It's a special case of GA where 0 individuals become parents): 33 | ``` 34 | python ga.py rs_atari_config.json 35 | ``` 36 | 37 | Evolution Strategies: 38 | ``` 39 | python es.py es_atari_config.json 40 | ``` 41 | 42 | Visualizing policies is possible if you install gym with `pip install gym` and run: 43 | ``` 44 | python -m neuroevolution.display 45 | ``` 46 | We currently have one example policy but more will be added in the future. 47 | 48 | ## Breakdown 49 | 50 | * gym_tensorflow - Folder containing TensorFlow custom ops for Reinforcement Learning (Atari, Hard Maze). 51 | * moving away from python-based environments has significant speed ups on a multithreaded environment. 52 | * neuroevolution - folder containing source code to evaluate many policies simultaneously. 53 | * concurrent_worker.py - Improved implementation where each thread can evaluate a dynamic sized batch of policies at a time. Needs custom Tensorflow ops. 54 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/configurations/es_atari_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "game": "frostbite", 3 | "model": "ModelVirtualBN", 4 | "num_validation_episodes": 30, 5 | "num_test_episodes": 200, 6 | "population_size": 5000, 7 | "timesteps": 250e6, 8 | "episode_cutoff_mode": 5000, 9 | "return_proc_mode": "centered_rank", 10 | "l2coeff": 0.005, 11 | "mutation_power": 0.02, 12 | "optimizer": { 13 | "args": { 14 | "stepsize": 0.01 15 | }, 16 | "type": "adam" 17 | } 18 | } -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/configurations/ga_atari_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "game": "frostbite", 3 | "model": "LargeModel", 4 | "num_validation_episodes": 30, 5 | "num_test_episodes": 200, 6 | "population_size": 1000, 7 | "episode_cutoff_mode": 5000, 8 | "timesteps": 1.5e9, 9 | "validation_threshold": 10, 10 | "mutation_power": 0.002, 11 | "selection_threshold": 20 12 | } -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/configurations/rs_atari_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "game": "frostbite", 3 | "model": "Model", 4 | "num_validation_episodes": 30, 5 | "num_test_episodes": 200, 6 | "population_size": 1000, 7 | "episode_cutoff_mode": 5000, 8 | "timesteps": 1.5e9, 9 | "validation_threshold": 10, 10 | "mutation_power": 0.002, 11 | "selection_threshold": 0 12 | } -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/Makefile: -------------------------------------------------------------------------------- 1 | USE_SDL := 0 2 | USE_ALE := 0 3 | USE_GPU := 1 4 | 5 | DIR := ./ 6 | 7 | TF_INC := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 8 | TF_LIB := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 9 | FLAGS := -std=c++11 -shared -fPIC -I$(TF_INC) -I$(TF_INC)/external/nsync/public -L$(TF_LIB) -D_GLIBCXX_USE_CXX11_ABI=0 -O2 10 | CXX := g++ 11 | LDFLAGS := -ltensorflow_framework 12 | 13 | SOURCES := $(DIR)/*.cpp $(DIR)/ops/*.cpp 14 | 15 | ifeq ($(USE_GPU), 1) 16 | FLAGS += -DGOOGLE_CUDA=1 17 | endif 18 | 19 | # This will likely need to be changed to suit your installation. 20 | ifeq ($(USE_ALE), 1) 21 | ALE := $(shell pwd)/atari-py/atari_py/ale_interface 22 | FLAGS += -I$(ALE)/src -I$(ALE)/src/controllers -I$(ALE)/src/os_dependent -I$(ALE)/src/environment -I$(ALE)/src/external -L$(ALE)/build 23 | LDFLAGS += -lale 24 | SOURCES += $(DIR)/atari/*.cpp 25 | endif 26 | 27 | UNAME_S := $(shell uname -s) 28 | ifeq ($(UNAME_S),Linux) 29 | FLAGS += -Wl,-rpath=$(ALE)/build 30 | endif 31 | ifeq ($(UNAME_S),Darwin) 32 | FLAGS += -framework Cocoa 33 | endif 34 | 35 | ifeq ($(strip $(USE_SDL)), 1) 36 | DEFINES += -D__USE_SDL -DSOUND_SUPPORT 37 | FLAGS += $(shell sdl-config --cflags) 38 | LDFLAGS += $(shell sdl-config --libs) 39 | endif 40 | 41 | 42 | all: gym_tensorflow.so 43 | 44 | gym_tensorflow.so: 45 | $(CXX) $(FLAGS) $(SOURCES) $(LDFLAGS) -o gym_tensorflow.so 46 | 47 | clean: 48 | rm -rf gym_tensorflow.so 49 | 50 | remake: clean all 51 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/README.md: -------------------------------------------------------------------------------- 1 | Instructions 2 | ----------------- 3 | 4 | This module provides C++/TensorFlow interfaces that operated similarly to OpenAI's gym library. Since it was built to remove python from the critical portion of the code (simulations) it provides a significant speed up when operating in a multithreading environment. 5 | We currently provide 2 environments utilizing the interface, Atari and Hard Maze. The Atari environment is supported but optional. Our Atari support is licensed under GPLv2 and instructions on how to use it can be found under the `./atari` folder. 6 | 7 | To compile this module simply open the `Makefile` to adjust settings (USE_GPU, USE_ALE, etc), once configured run `make` to build from source. 8 | 9 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from .tf_env import GymEnv 4 | from.import atari, maze 5 | from .wrappers import StackFramesWrapper 6 | 7 | def make(game, batch_size, *args, **kwargs): 8 | if game == 'maze': 9 | return maze.MazeEnv(batch_size) 10 | if game in atari.games: 11 | return StackFramesWrapper(atari.AtariEnv(game, batch_size, *args, **kwargs)) 12 | if game.startswith('gym.'): 13 | return GymEnv(game[4:], batch_size, *args, **kwargs) 14 | raise NotImplementedError(game) 15 | 16 | 17 | def get_ref_batch(make_env_f, sess, batch_size): 18 | env = make_env_f(1) 19 | assert env.discrete_action 20 | actions = tf.random_uniform((1,), minval=0, maxval=env.action_space, dtype=tf.int32) 21 | 22 | reset_op = env.reset() 23 | obs_op = env.observation() 24 | rew_op, done_op=env.step(actions) 25 | 26 | sess.run(tf.global_variables_initializer()) 27 | 28 | sess.run(reset_op) 29 | 30 | ref_batch = [] 31 | while len(ref_batch) < batch_size: 32 | obs, done = sess.run([obs_op, done_op]) 33 | ref_batch.append(obs) 34 | if done.any(): 35 | sess.run(reset_op) 36 | 37 | return np.concatenate(ref_batch) 38 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/atari/README.md: -------------------------------------------------------------------------------- 1 | Notice 2 | ----------------- 3 | The ALE/atari-py is not part of deep-neuroevolution. 4 | This folder provides the instructions and sample code if you are interested in running the ALE. 5 | It depends on atari-py. atari-py is licensed under GPLv2. 6 | 7 | Instructions 8 | ----------------- 9 | 10 | The first thing to do is clone the atari-py repository into the `gym_tensorflow` folder using 11 | ``` 12 | git clone https://github.com/fps7806/atari-py.git 13 | ``` 14 | The relative path is important but can be changed inside the `Makefile` as necessary. 15 | 16 | We will be using slightly different settings for the build, so you need to go to ./atari-py/atari_py/ale_interface/CMakeLists.txt file and change the first lines to: 17 | 18 | ``` 19 | cmake_minimum_required (VERSION 2.6) 20 | project(ale) 21 | set(ALEVERSION "0.5") 22 | 23 | 24 | option(USE_SDL "Use SDL" OFF) 25 | option(USE_RLGLUE "Use RL-Glue" OFF) 26 | option(BUILD_EXAMPLES "Build Example Agents" OFF) 27 | option(BUILD_CPP_LIB "Build C++ Shared Library" ON) 28 | option(BUILD_CLI "Build ALE Command Line Interface" OFF) 29 | option(BUILD_C_LIB "Build ALE C Library (needed for Python interface)" OFF) 30 | 31 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused -fPIC -O3 -fomit-frame-pointer -D__STDC_CONSTANT_MACROS -D_GLIBCXX_USE_CXX11_ABI=0") 32 | ``` 33 | 34 | This will ensure that the C++ lib is compiled as well as adding `-D_GLIBCXX_USE_CXX11_ABI=0` which is required for compatibility with TensorFlow. 35 | Once modified you can build the library with `cd ./atari-py && make`. 36 | 37 | Once built successfully, the `USE_ALE := 1` flag can be set on the ./gym_tensorflow/Makefile so that the necessary files are compiled. 38 | 39 | Building `cd ./gym_tensorflow && make` should give you access to the Atari games as a set of TensorFlow ops. -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/atari/__init__.py: -------------------------------------------------------------------------------- 1 | from..import tf_env 2 | 3 | from .tf_atari import * 4 | 5 | if not hasattr(tf_env.gym_tensorflow_module, 'atari_make'): 6 | class AtariEnv(TensorFlowEnv): 7 | def __init__(self, * args, ** kwargs): 8 | raise NotImplementedError("gym_tensorflow was not compiled with ALE support.") 9 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/atari/tf_atari.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "tensorflow/core/framework/op_kernel.h" 5 | #include "tensorflow/core/framework/op.h" 6 | #include "tensorflow/core/framework/shape_inference.h" 7 | #include "tensorflow/core/framework/resource_mgr.h" 8 | #include "tensorflow/core/framework/resource_op_kernel.h" 9 | #include "tensorflow/core/lib/core/blocking_counter.h" 10 | #include "tensorflow/core/lib/core/threadpool.h" 11 | #include "tensorflow/core/platform/mutex.h" 12 | #include "../tf_env.h" 13 | 14 | #ifdef __USE_SDL 15 | #include 16 | #endif 17 | 18 | using namespace tensorflow; 19 | using namespace std; 20 | using namespace ale; 21 | 22 | #define RAM_SIZE (128) 23 | 24 | class AtariEnvironment : public Environment, public StepInterface 25 | { 26 | public: 27 | AtariEnvironment(int batch_size) 28 | { 29 | m_numNoops.resize(batch_size, 0); 30 | m_maxFrames.resize(batch_size, 100000); 31 | m_pInterfaces = new ALEInterface[batch_size]; 32 | } 33 | void load_rom(string game, int i) 34 | { 35 | assert(m_numNoops[i] == 0); 36 | m_numNoops[i] = 1; 37 | m_pInterfaces[i].setFloat("repeat_action_probability", 0.0f); 38 | m_pInterfaces[i].setInt("random_seed", 0); 39 | m_pInterfaces[i].loadROM(game); 40 | } 41 | virtual ~AtariEnvironment() { 42 | delete[] m_pInterfaces; 43 | } 44 | 45 | TensorShape get_action_shape() override 46 | { 47 | return TensorShape(); 48 | } 49 | 50 | TensorShape get_observation_shape() override 51 | { 52 | return TensorShape({2, 53 | static_cast(m_pInterfaces[0].getScreen().height()), 54 | static_cast(m_pInterfaces[0].getScreen().width())}); 55 | } 56 | 57 | void get_observation(uint8 *data, int idx) override 58 | { 59 | const auto ssize = m_pInterfaces[idx].getScreen().height() * m_pInterfaces[idx].getScreen().width(); 60 | memcpy(data, m_pInterfaces[idx].theOSystem->console().mediaSource().previousFrameBuffer(), ssize); 61 | memcpy(data + ssize, m_pInterfaces[idx].theOSystem->console().mediaSource().currentFrameBuffer(), ssize); 62 | } 63 | 64 | float step(int idx, const int* action) override 65 | { 66 | int rewards = 0; 67 | for (int i = 0; i < m_repeat; ++i) 68 | { 69 | assert(m_pInterfaces[idx].getMinimalActionSet().size() > (*action)); 70 | rewards += m_pInterfaces[idx].act(m_pInterfaces[idx].getMinimalActionSet()[*action]); 71 | if (is_done(idx)) 72 | break; 73 | } 74 | return rewards; 75 | } 76 | 77 | bool is_done(int idx) override 78 | { 79 | return m_pInterfaces[idx].game_over() || 80 | m_pInterfaces[idx].getEpisodeFrameNumber() - m_numNoops[idx] >= m_maxFrames[idx]; 81 | } 82 | 83 | void reset(int i, int numNoops=0, int maxFrames=100000) override 84 | { 85 | m_pInterfaces[i].reset_game(); 86 | if(numNoops > 0) 87 | { 88 | assert(m_pInterfaces[i].getMinimalActionSet()[0] == Action::PLAYER_A_NOOP); 89 | for (int s = 0; s < numNoops;++s) 90 | { 91 | m_pInterfaces[i].act(Action::PLAYER_A_NOOP); 92 | if (m_pInterfaces[i].game_over()) 93 | m_pInterfaces[i].reset_game(); 94 | } 95 | } 96 | // Check if FIRE is part of the minimal action set 97 | if (m_pInterfaces[i].getMinimalActionSet()[1] == Action::PLAYER_A_FIRE) 98 | { 99 | assert(m_pInterfaces[i].getMinimalActionSet().size() >= 3); 100 | int action = 1; 101 | step(i, &action); 102 | if (m_pInterfaces[i].game_over()) 103 | m_pInterfaces[i].reset_game(); 104 | 105 | action = 2; 106 | step(i, &action); 107 | if (m_pInterfaces[i].game_over()) 108 | m_pInterfaces[i].reset_game(); 109 | } 110 | m_numNoops[i] = m_pInterfaces[i].getEpisodeFrameNumber(); 111 | m_maxFrames[i] = maxFrames; 112 | } 113 | 114 | void get_final_state(float *data, int idx) 115 | { 116 | auto ram = m_pInterfaces[idx].getRAM(); 117 | for (auto i = 0; i < RAM_SIZE; ++ i) 118 | data[i] = ram.get(i); 119 | } 120 | 121 | string DebugString() override { return "AtariEnvironment"; } 122 | private: 123 | ALEInterface* m_pInterfaces; 124 | bool m_initialized; 125 | int m_repeat = 4; 126 | std::vector m_numNoops; 127 | std::vector m_maxFrames; 128 | }; 129 | 130 | class AtariMakeOp : public EnvironmentMakeOp { 131 | public: 132 | explicit AtariMakeOp(OpKernelConstruction* context) : EnvironmentMakeOp(context) { 133 | OP_REQUIRES_OK(context, context->GetAttr("game", &m_game)); 134 | ale::Logger::setMode(ale::Logger::mode(2)); 135 | } 136 | 137 | private: 138 | virtual Status CreateResource(OpKernelContext* context, BaseEnvironment** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { 139 | AtariEnvironment* env = new AtariEnvironment(batch_size); 140 | if (env == nullptr) 141 | return errors::ResourceExhausted("Failed to allocate"); 142 | *ret = env; 143 | 144 | const auto thread_pool = context->device()->tensorflow_cpu_worker_threads(); 145 | const int num_threads = std::min(thread_pool->num_threads, batch_size); 146 | auto f = [&](int thread_id) { 147 | for(int b =thread_id; b < batch_size;b+=num_threads) 148 | { 149 | env->load_rom(m_game, b); 150 | } 151 | }; 152 | 153 | BlockingCounter counter(num_threads-1); 154 | for (int i = 1; i < num_threads; ++i) { 155 | thread_pool->workers->Schedule([&, i]() { 156 | f(i); 157 | counter.DecrementCount(); 158 | }); 159 | } 160 | f(0); 161 | counter.Wait(); 162 | return Status::OK(); 163 | } 164 | std::string m_game; 165 | }; 166 | 167 | REGISTER_OP("AtariMake") 168 | .Attr("batch_size: int") 169 | .Attr("game: string") 170 | .Attr("container: string = ''") 171 | .Attr("shared_name: string = ''") 172 | .Output("handle: resource") 173 | .SetIsStateful() 174 | .SetShapeFn(shape_inference::ScalarShape); 175 | 176 | REGISTER_KERNEL_BUILDER(Name("AtariMake").Device(DEVICE_CPU), AtariMakeOp); 177 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/maze/__init__.py: -------------------------------------------------------------------------------- 1 | from .tf_maze import * -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/maze/hard_maze.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 400 3 | 13 4 | 36 184 5 | 0 6 | 31 20 7 | 31 20 8 | 41 5 3 8 9 | 3 8 4 49 10 | 4 49 57 53 11 | 4 49 7 202 12 | 7 202 195 198 13 | 195 198 186 8 14 | 186 8 39 5 15 | 56 54 56 157 16 | 57 106 158 162 17 | 77 201 108 164 18 | 6 80 33 121 19 | 192 146 87 91 20 | 56 55 133 30 21 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/maze/hard_maze.txt.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/deepneuroevolution/gpu_implementation/gym_tensorflow/maze/hard_maze.txt.npy -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/maze/tf_maze.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | import tensorflow as tf 20 | from gym_tensorflow.tf_env import TensorFlowEnv, gym_tensorflow_module 21 | 22 | 23 | class MazeEnv(TensorFlowEnv): 24 | def __init__(self, batch_size, name=None): 25 | self.batch_size = batch_size 26 | self.obs_variable = None 27 | with tf.variable_scope(name, default_name='MazeInstance'): 28 | self.instances = gym_tensorflow_module.maze_make(batch_size=batch_size, filename='hard_maze.txt') 29 | 30 | @property 31 | def env_default_timestep_cutoff(self): 32 | return 400 33 | 34 | @property 35 | def action_space(self): 36 | return 2 37 | 38 | @property 39 | def discrete_action(self): 40 | return False 41 | 42 | def step(self, action, indices=None, name=None): 43 | with tf.variable_scope(name, default_name='MazeStep'): 44 | #action = tf.Print(action, [action], 'action=') 45 | return gym_tensorflow_module.environment_step(self.instances, indices=indices, action=action) 46 | 47 | def reset(self, indices=None, max_frames=None, name=None): 48 | '''Resets Atari instances with a random noop start (1-30) and set the maximum number of frames for the episode (default 100,000 * frameskip) 49 | ''' 50 | with tf.variable_scope(name, default_name='MazeReset'): 51 | noops = tf.random_uniform(tf.shape(indices), minval=1, maxval=31, dtype=tf.int32) 52 | if max_frames is None: 53 | max_frames = self.env_default_timestep_cutoff 54 | return gym_tensorflow_module.environment_reset(self.instances, indices, noops=noops, max_frames=max_frames) 55 | 56 | def observation(self, indices=None, name=None): 57 | with tf.variable_scope(name, default_name='MazeObservation'): 58 | with tf.device('/cpu:0'): 59 | obs = gym_tensorflow_module.environment_observation(self.instances, indices, T=tf.float32) 60 | obs.set_shape((None,) + (11,)) 61 | #obs = tf.Print(obs, [obs], "obs=") 62 | return tf.expand_dims(obs, axis=1) 63 | 64 | def final_state(self, indices, name=None): 65 | with tf.variable_scope(name, default_name='MazeFinalState'): 66 | return gym_tensorflow_module.maze_final_state(self.instances, indices) 67 | 68 | def close(self): 69 | pass -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/ops/__init__.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from ..tf_env import gym_tensorflow_module 3 | 4 | try: 5 | indexed_matmul = gym_tensorflow_module.indexed_batch_mat_mul 6 | except: 7 | import time 8 | print('Index MatMul implementation not available. This significantly affects performance') 9 | time.sleep(5) 10 | def indexed_matmul(a, b, idx): 11 | return tf.matmul(a, tf.gather(b, idx)) -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/tf_env.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | */ 19 | 20 | #ifndef TF_ENV_H_ 21 | #define TF_ENV_H_ 22 | #include 23 | #include "tensorflow/core/framework/resource_mgr.h" 24 | #include "tensorflow/core/framework/op_kernel.h" 25 | 26 | using namespace tensorflow; 27 | class BaseEnvironment : public ResourceBase 28 | { 29 | public: 30 | virtual bool is_done(int idx) = 0; 31 | virtual void reset(int i, int numNoops = 0, int maxFrames = 100000) = 0; 32 | }; 33 | 34 | template 35 | class StepInterface 36 | { 37 | public: 38 | virtual TensorShape get_action_shape() = 0; 39 | virtual float step(int idx, const T* action) = 0; 40 | }; 41 | 42 | template 43 | class Environment : public BaseEnvironment 44 | { 45 | public: 46 | virtual void get_observation(T* data, int idx) = 0; 47 | virtual TensorShape get_observation_shape() = 0; 48 | }; 49 | 50 | class EnvironmentMakeOp : public OpKernel { 51 | public: 52 | explicit EnvironmentMakeOp(OpKernelConstruction *context); 53 | 54 | // The resource is deleted from the resource manager only when it is private 55 | // to kernel. Ideally the resource should be deleted when it is no longer held 56 | // by anyone, but it would break backward compatibility. 57 | virtual ~EnvironmentMakeOp() override; 58 | 59 | void Compute(OpKernelContext *context) override LOCKS_EXCLUDED(mu_); 60 | 61 | protected: 62 | // Variables accessible from subclasses. 63 | tensorflow::mutex mu_; 64 | ContainerInfo cinfo_ GUARDED_BY(mu_); 65 | BaseEnvironment* resource_ GUARDED_BY(mu_) = nullptr; 66 | int batch_size; 67 | 68 | private: 69 | // During the first Compute(), resource is either created or looked up using 70 | // shared_name. In the latter case, the resource found should be verified if 71 | // it is compatible with this op's configuration. The verification may fail in 72 | // cases such as two graphs asking queues of the same shared name to have 73 | // inconsistent capacities. 74 | virtual Status VerifyResource(BaseEnvironment *resource); 75 | 76 | PersistentTensor handle_ GUARDED_BY(mu_); 77 | 78 | virtual Status CreateResource(OpKernelContext *context, BaseEnvironment **ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) = 0; 79 | 80 | TF_DISALLOW_COPY_AND_ASSIGN(EnvironmentMakeOp); 81 | }; 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/tf_env.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | import numpy as np 20 | import os 21 | 22 | import tensorflow as tf 23 | 24 | gym_tensorflow_module = tf.load_op_library(os.path.join(os.path.dirname(__file__), 'gym_tensorflow.so')) 25 | 26 | 27 | class TensorFlowEnv(object): 28 | pass 29 | 30 | 31 | class PythonEnv(TensorFlowEnv): 32 | def step(self, action, indices=None, name=None): 33 | with tf.variable_scope(name, default_name='PythonStep'): 34 | reward, done = tf.py_func(self._step, [action, indices], [tf.float32, tf.bool]) 35 | reward.set_shape(indices.get_shape()) 36 | done.set_shape(indices.get_shape()) 37 | return reward, done 38 | 39 | def _reset(self, indices): 40 | raise NotImplementedError() 41 | 42 | def reset(self, indices=None, max_frames=None, name=None): 43 | with tf.variable_scope(name, default_name='PythonReset'): 44 | return tf.py_func(self._reset, [indices], tf.int64).op 45 | 46 | def _step(self, action, indices): 47 | raise NotImplementedError() 48 | 49 | def _obs(self, indices): 50 | raise NotImplementedError() 51 | 52 | def observation(self, indices=None, name=None): 53 | with tf.variable_scope(name, default_name='PythonObservation'): 54 | obs = tf.py_func(self._obs, [indices], tf.float32) 55 | obs.set_shape(tuple(indices.get_shape()) + self.observation_space) 56 | return tf.expand_dims(obs, axis=1) 57 | 58 | def final_state(self, indices, name=None): 59 | with tf.variable_scope(name, default_name='PythonFinalState'): 60 | return tf.zeros([tf.shape(indices)[0], 2], dtype=tf.float32) 61 | 62 | @property 63 | def unwrapped(self): 64 | return self 65 | 66 | def close(self): 67 | pass 68 | 69 | 70 | class GymEnv(PythonEnv): 71 | def __init__(self, name, batch_size): 72 | import gym 73 | self.env = [gym.make(name) for _ in range(batch_size)] 74 | self.obs = [None] * batch_size 75 | 76 | @property 77 | def action_space(self): 78 | return np.prod(self.env[0].action_space.shape) 79 | 80 | @property 81 | def observation_space(self): 82 | return self.env[0].observation_space.shape 83 | 84 | @property 85 | def discrete_action(self): 86 | return False 87 | 88 | def _step(self, action, indices): 89 | assert self.discrete_action == False 90 | results = map(lambda i: self.env[indices[i]].step(action[i]), range(len(indices))) 91 | obs, reward, done, _ = zip(*results) 92 | for i in range(len(indices)): 93 | self.obs[indices[i]] = obs[i].astype(np.float32) 94 | 95 | return np.array(reward, dtype=np.float32), np.array(done, dtype=np.bool) 96 | 97 | def _reset(self, indices): 98 | for i in indices: 99 | self.obs[i] = self.env[i].reset().astype(np.float32) 100 | return 0 101 | 102 | def _obs(self, indices): 103 | return np.array([self.obs[i] for i in indices]).astype(np.float32) 104 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from .stack_frames import StackFramesWrapper -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/gym_tensorflow/wrappers/stack_frames.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | from gym_tensorflow.tf_env import TensorFlowEnv 6 | 7 | class StackFramesWrapper(TensorFlowEnv): 8 | def __init__(self, env, num_stacked_frames=4): 9 | self.env = env 10 | self.num_stacked_frames = num_stacked_frames 11 | self.obs_variable = tf.Variable(tf.zeros(shape=self.observation_space, dtype=tf.float32), trainable=False) 12 | 13 | @property 14 | def batch_size(self): 15 | return self.env.batch_size 16 | 17 | @property 18 | def env_default_timestep_cutoff(self): 19 | return self.env.env_default_timestep_cutoff 20 | 21 | @property 22 | def action_space(self): 23 | return self.env.action_space 24 | 25 | @property 26 | def observation_space(self): 27 | return self.env.observation_space[:-1] + (self.env.observation_space[-1] * self.num_stacked_frames, ) 28 | 29 | @property 30 | def discrete_action(self): 31 | return self.env.discrete_action 32 | 33 | def stack_observation(self, indices, reset=False): 34 | obs = self.env.observation(indices) 35 | 36 | if reset: 37 | obs_batch = tf.zeros((tf.shape(indices)[0],) +self.env.observation_space[1:-1] + (self.env.observation_space[-1] * self.num_stacked_frames-1, ), dtype=tf.float32) 38 | obs_batch = tf.concat([obs_batch, obs], axis=-1) 39 | else: 40 | obs_batch = tf.gather(self.obs_variable, indices) 41 | obs_batch = tf.slice(obs_batch, (0, 0, 0, 1), (-1, -1, -1, -1)) 42 | obs_batch = tf.concat([obs_batch, obs], axis=-1) 43 | return tf.scatter_update(self.obs_variable, indices, obs_batch) 44 | 45 | def step(self, action, indices=None, name=None): 46 | if indices is None: 47 | indices = np.arange(self.batch_size) 48 | rew, done = self.env.step(action=action, indices=indices, name=name) 49 | with tf.control_dependencies([rew, done]): 50 | with tf.control_dependencies([self.stack_observation(indices)]): 51 | return tf.identity(rew), tf.identity(done) 52 | 53 | def reset(self, indices=None, max_frames=None, name=None): 54 | '''Resets Atari instances with a random noop start (1-30) and set the maximum number of frames for the episode (default 100,000 * frameskip) 55 | ''' 56 | if indices is None: 57 | indices = np.arange(self.batch_size) 58 | reset_op = self.env.reset(indices=indices, max_frames=max_frames, name=name) 59 | with tf.control_dependencies([reset_op]): 60 | return self.stack_observation(indices, reset=True).op 61 | 62 | def observation(self, indices=None, name=None): 63 | '''Returns current observation after preprocessing (skip, grayscale, warp, stack).\nMust be called ONCE each time step is called if num_stacked_frames > 1 64 | ''' 65 | if indices is None: 66 | indices = np.arange(self.batch_size) 67 | return tf.gather(self.obs_variable, indices) 68 | 69 | def final_state(self, indices, name=None): 70 | return self.env.final_state(indices, name) 71 | 72 | @property 73 | def unwrapped(self): 74 | return self.env 75 | 76 | def close(self): 77 | return self.env.close() 78 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/neuroevolution/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/deepneuroevolution/gpu_implementation/neuroevolution/__init__.py -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/neuroevolution/distributed_helpers.py: -------------------------------------------------------------------------------- 1 | 2 | import threading 3 | from queue import Queue 4 | from multiprocessing.pool import ApplyResult 5 | 6 | import tabular_logger as tlogger 7 | 8 | class AsyncWorker(object): 9 | @property 10 | def concurrent_tasks(self): 11 | raise NotImplementedError() 12 | 13 | def run_async(self, task_id, task, callback): 14 | raise NotImplementedError() 15 | 16 | 17 | class WorkerHub(object): 18 | def __init__(self, workers, input_queue, done_queue): 19 | self.done_buffer = Queue() 20 | self.workers = workers 21 | self.available_workers = Queue() 22 | self.done_queue = done_queue 23 | self._cache = {} 24 | self.input_queue = input_queue 25 | 26 | for w in workers: 27 | for t in w.concurrent_tasks: 28 | self.available_workers.put((w, t)) 29 | 30 | self.__initialize_handlers() 31 | 32 | def __initialize_handlers(self): 33 | self._input_handler = threading.Thread( 34 | target=WorkerHub._handle_input, 35 | args=(self,) 36 | ) 37 | self._input_handler._state = 0 38 | 39 | self._output_handler = threading.Thread( 40 | target=WorkerHub._handle_output, 41 | args=(self,) 42 | ) 43 | self._output_handler._state = 0 44 | 45 | def worker_callback(self, worker, subworker, result): 46 | worker_task = (worker, subworker) 47 | self.available_workers.put(worker_task) 48 | task_id = self._cache[worker_task] 49 | del self._cache[worker_task] 50 | self.done_buffer.put((task_id, result)) 51 | 52 | @staticmethod 53 | def _handle_input(self): 54 | try: 55 | while True: 56 | worker_task = self.available_workers.get() 57 | if worker_task is None: 58 | tlogger.info('WorkerHub._handle_input done') 59 | break 60 | worker, subworker = worker_task 61 | 62 | task = self.input_queue.get() 63 | if task is None: 64 | tlogger.info('WorkerHub._handle_input done') 65 | break 66 | task_id, task = task 67 | self._cache[worker_task] = task_id 68 | 69 | worker.run_async(subworker, task, self.worker_callback) 70 | except: 71 | tlogger.exception('WorkerHub._handle_input exception thrown') 72 | raise 73 | 74 | @staticmethod 75 | def _handle_output(self): 76 | try: 77 | while True: 78 | result = self.done_buffer.get() 79 | if result is None: 80 | tlogger.info('WorkerHub._handle_output done') 81 | break 82 | self.done_queue.put(result) 83 | except: 84 | tlogger.exception('WorkerHub._handle_output exception thrown') 85 | raise 86 | 87 | def initialize(self): 88 | self._input_handler.start() 89 | self._output_handler.start() 90 | 91 | def close(self): 92 | self.available_workers.put(None) 93 | self.input_queue.put(None) 94 | self.done_buffer.put(None) 95 | 96 | class AsyncTaskHub(object): 97 | def __init__(self, input_queue=None, results_queue=None): 98 | if input_queue is None: 99 | input_queue = Queue(64) 100 | self.input_queue = input_queue 101 | self._cache = {} 102 | self.results_queue = None 103 | if results_queue is not None: 104 | self.results_queue = results_queue 105 | 106 | self._output_handler = threading.Thread( 107 | target=AsyncTaskHub._handle_output, 108 | args=(self,) 109 | ) 110 | self._output_handler.daemon = True 111 | self._output_handler._state = 0 112 | self._output_handler.start() 113 | 114 | @staticmethod 115 | def _handle_output(self): 116 | try: 117 | while True: 118 | result = self.results_queue.get() 119 | if result is None: 120 | tlogger.info('AsyncTaskHub._handle_output done') 121 | break 122 | self.put(result) 123 | except: 124 | tlogger.exception('AsyncTaskHub._handle_output exception thrown') 125 | raise 126 | 127 | def run_async(self, task, callback=None, error_callback=None): 128 | result = ApplyResult(self._cache, callback, error_callback) 129 | self.input_queue.put((result._job, task)) 130 | return result 131 | 132 | def put(self, result): 133 | job, result=result 134 | self._cache[job]._set(0, (True, result)) 135 | 136 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/neuroevolution/helper.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import numbers 21 | import threading 22 | from queue import Queue 23 | import numpy as np 24 | import math 25 | 26 | 27 | class SharedNoiseTable(object): 28 | def __init__(self): 29 | import ctypes, multiprocessing 30 | seed = 123 31 | count = 250000000 # 1 gigabyte of 32-bit numbers. Will actually sample 2 gigabytes below. 32 | print('Sampling {} random numbers with seed {}'.format(count, seed)) 33 | self._shared_mem = multiprocessing.Array(ctypes.c_float, count) 34 | self.noise = np.ctypeslib.as_array(self._shared_mem.get_obj()) 35 | assert self.noise.dtype == np.float32 36 | self.noise[:] = np.random.RandomState(seed).randn(count) # 64-bit to 32-bit conversion here 37 | print('Sampled {} bytes'.format(self.noise.size * 4)) 38 | 39 | def get(self, i, dim): 40 | return self.noise[i:i + dim] 41 | 42 | def sample_index(self, stream, dim): 43 | return stream.randint(0, len(self.noise) - dim + 1) 44 | 45 | 46 | class ConstantSchedule(object): 47 | def __init__(self, value): 48 | self._value = value 49 | 50 | def value(self, **kwargs): 51 | return self._value 52 | 53 | 54 | class LinearSchedule(object): 55 | def __init__(self, schedule, final_p, initial_p, field): 56 | self.schedule = schedule 57 | self.field = field 58 | self.final_p = final_p 59 | self.initial_p = initial_p 60 | 61 | def value(self, **kwargs): 62 | assert self.field in kwargs, "Argument {} not provided to scheduler Available: {}".format(self.field, kwargs) 63 | fraction = min(float(kwargs[self.field]) / self.schedule, 1.0) 64 | return self.initial_p + fraction * (self.final_p - self.initial_p) 65 | 66 | 67 | class ExponentialSchedule(object): 68 | def __init__(self, initial_p, final_p, schedule, field): 69 | self.initial_p = initial_p 70 | self.final_p = final_p 71 | self.schedule = schedule 72 | self.field = field 73 | 74 | self.linear = LinearSchedule( 75 | initial_p=math.log(self.initial_p), 76 | final_p=math.log(self.final_p), 77 | schedule=self.schedule, 78 | field=self.field) 79 | 80 | def value(self, **kwargs): 81 | return math.exp(self.linear(**kwargs)) 82 | 83 | 84 | def make_schedule(args): 85 | if isinstance(args, numbers.Number): 86 | return ConstantSchedule(args) 87 | else: 88 | return globals()[args['type']](**{key: value for key, value in args.items() if key != 'type'}) 89 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/neuroevolution/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .dqn_xavier import SmallDQN, LargeDQN 2 | from .dqn import Model, LargeModel 3 | from .batchnorm import ModelBN, ModelVirtualBN 4 | from .simple import LinearClassifier, SimpleClassifier 5 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/neuroevolution/models/batchnorm.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | from .dqn import Model 21 | import tensorflow as tf 22 | 23 | 24 | class ModelBN(Model): 25 | def __init__(self): 26 | super(ModelBN, self).__init__() 27 | self.nonlin = lambda x: tf.nn.relu(self.batchnorm(x)) 28 | def batchnorm(self, x): 29 | with tf.variable_scope(None, default_name='BatchNorm'): 30 | ret = tf.layers.batch_normalization(x, center=False, scale=False, training=True) 31 | 32 | if len(x.get_shape()) == 4: 33 | b = self.create_bias_variable('b', (1, 1, ret.get_shape()[-1].value)) 34 | else: 35 | b = self.create_bias_variable('b', (1, ret.get_shape()[-1].value)) 36 | if self.indices is not None: 37 | b = tf.gather(b, self.indices) 38 | 39 | ret = ret + b 40 | return ret 41 | 42 | def _make_net(self, x, num_actions): 43 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4, bias=False)) 44 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2, bias=False)) 45 | x = self.flattenallbut0(x) 46 | x = self.nonlin(self.dense(x, 256, 'fc', bias=False)) 47 | 48 | ret = self.dense(x, num_actions, 'out', std=0.1) 49 | return ret 50 | 51 | 52 | class ModelVirtualBN(Model): 53 | def __init__(self): 54 | super(ModelVirtualBN, self).__init__() 55 | self.is_ref_batch = False 56 | self.nonlin = lambda x: tf.nn.relu(self.batchnorm(x)) 57 | self.device = None 58 | 59 | @property 60 | def requires_ref_batch(self): 61 | return True 62 | 63 | # This got a little out of hand, but it maintains a set of mean/var variables that are updated on load and used during inference. 64 | def batchnorm(self, x): 65 | with tf.variable_scope('BatchNorm'): 66 | if len(x.get_shape()) == 5: 67 | vbn_mean = tf.get_variable('mean', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False) 68 | vbn_var = tf.get_variable('var', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False) 69 | else: 70 | vbn_mean = tf.get_variable('mean', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False) 71 | vbn_var = tf.get_variable('var', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False) 72 | 73 | if self.is_ref_batch: 74 | mean, var = tf.nn.moments(x, list(range(1, len(x.get_shape())-1))) 75 | var = 1 / tf.sqrt(var + 1e-3) 76 | mean, var = tf.scatter_update(vbn_mean, self.indices, mean), tf.scatter_update(vbn_var, self.indices, var) 77 | else: 78 | mean, var = vbn_mean, vbn_var 79 | while len(mean.get_shape()) < len(x.get_shape()): 80 | mean, var = tf.expand_dims(mean, 1), tf.expand_dims(var, 1) 81 | 82 | if self.indices is not None: 83 | mean, var = tf.gather(mean, self.indices), tf.gather(var, self.indices) 84 | 85 | ret = (x-mean) * var 86 | 87 | if len(x.get_shape()) == 5: 88 | b = self.create_bias_variable('b', (1, 1, 1, ret.get_shape()[-1].value)) 89 | else: 90 | b = self.create_bias_variable('b', (1, ret.get_shape()[-1].value)) 91 | if self.indices is not None: 92 | b = tf.gather(b, self.indices) 93 | return ret + b 94 | 95 | def _make_net(self, x, num_actions, ): 96 | with tf.variable_scope('layer1'): 97 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4, bias=False)) 98 | with tf.variable_scope('layer2'): 99 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2, bias=False)) 100 | x = self.flattenallbut0(x) 101 | with tf.variable_scope('layer3'): 102 | x = self.nonlin(self.dense(x, 256, 'fc', bias=False)) 103 | 104 | with tf.variable_scope('layer4'): 105 | return self.dense(x, num_actions, 'out') 106 | 107 | def make_weights(self): 108 | super(ModelVirtualBN, self).make_weights() 109 | self.ref_batch_idx = tf.placeholder(tf.int32, ()) 110 | tmp = self.indices 111 | self.indices = [self.ref_batch_idx] 112 | with tf.device(self.device): 113 | with tf.variable_scope(self.scope, reuse=True): 114 | ref_batch = tf.stack([self.ref_batch]) 115 | self.is_ref_batch = True 116 | self.ref_batch_assign = self._make_net(ref_batch, self.num_actions) 117 | self.is_ref_batch = False 118 | self.indices = tmp 119 | 120 | def load(self, sess, i, *args, **kwargs): 121 | ret = super(ModelVirtualBN, self).load(sess, i, *args, **kwargs) 122 | sess.run(self.ref_batch_assign, {self.ref_batch_idx: i}) 123 | return ret 124 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/neuroevolution/models/dqn.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import numpy as np 21 | import tensorflow as tf 22 | from .base import BaseModel 23 | 24 | 25 | class Model(BaseModel): 26 | def create_weight_variable(self, name, shape, std): 27 | scale_by = std / np.sqrt(np.prod(shape[:-1])) 28 | return self.create_variable(name, shape, scale_by) 29 | 30 | def _make_net(self, x, num_actions): 31 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4)) 32 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2)) 33 | x = self.flattenallbut0(x) 34 | x = self.nonlin(self.dense(x, 256, 'fc')) 35 | 36 | return self.dense(x, num_actions, 'out', std=0.1) 37 | 38 | 39 | class LargeModel(Model): 40 | def _make_net(self, x, num_actions): 41 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=32, kernel_size=8, stride=4, std=1.0)) 42 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=64, kernel_size=4, stride=2, std=1.0)) 43 | x = self.nonlin(self.conv(x, name='conv3', num_outputs=64, kernel_size=3, stride=1, std=1.0)) 44 | x = self.flattenallbut0(x) 45 | x = self.nonlin(self.dense(x, 512, 'fc')) 46 | 47 | return self.dense(x, num_actions, 'out', std=0.1) 48 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/neuroevolution/models/dqn_xavier.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import tensorflow as tf 21 | from .base import BaseModel 22 | 23 | 24 | class SmallDQN(BaseModel): 25 | def _make_net(self, x, num_actions): 26 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4)) 27 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2)) 28 | x = self.flattenallbut0(x) 29 | x = self.nonlin(self.dense(x, 256, 'fc')) 30 | 31 | return self.dense(x, num_actions, 'out', std=0.1) 32 | 33 | 34 | class LargeDQN(BaseModel): 35 | def _make_net(self, x, num_actions): 36 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=32, kernel_size=8, stride=4, std=1.0)) 37 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=64, kernel_size=4, stride=2, std=1.0)) 38 | x = self.nonlin(self.conv(x, name='conv3', num_outputs=64, kernel_size=3, stride=1, std=1.0)) 39 | x = self.flattenallbut0(x) 40 | x = self.nonlin(self.dense(x, 512, 'fc')) 41 | 42 | return self.dense(x, num_actions, 'out', std=0.1) 43 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/neuroevolution/models/simple.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | from .dqn import Model 21 | 22 | 23 | class LinearClassifier(Model): 24 | def _make_net(self, x, num_actions): 25 | x = self.flattenallbut0(x) 26 | ret = self.dense(x, num_actions, 'out') 27 | return ret 28 | 29 | class SimpleClassifier(Model): 30 | def _make_net(self, x, num_actions): 31 | x = self.flattenallbut0(x) 32 | x = self.nonlin(self.dense(x, 16, 'fc1')) 33 | x = self.nonlin(self.dense(x, 16, 'fc2')) 34 | ret = self.dense(x, num_actions, 'out', std=0.1) 35 | return ret 36 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/neuroevolution/optimizers.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import numpy as np 21 | 22 | 23 | class Optimizer(object): 24 | def __init__(self, theta): 25 | self.theta = theta 26 | self.dim = len(self.theta) 27 | self.t = 0 28 | 29 | def update(self, globalg): 30 | self.t += 1 31 | step = self._compute_step(globalg) 32 | theta = self.theta 33 | ratio = np.linalg.norm(step) / np.linalg.norm(theta) 34 | new_theta = self.theta + step 35 | self.theta = new_theta 36 | return ratio, new_theta 37 | 38 | def _compute_step(self, globalg): 39 | raise NotImplementedError 40 | 41 | 42 | class SGD(Optimizer): 43 | def __init__(self, theta, stepsize, momentum=0.9): 44 | Optimizer.__init__(self, theta) 45 | self.v = np.zeros(self.dim, dtype=np.float32) 46 | self.stepsize, self.momentum = stepsize, momentum 47 | 48 | def _compute_step(self, globalg): 49 | # NOTE: different from Open AI to match more common momentum implementations (e.g. Tensorflow) 50 | # original from OpenAI: self.v = self.momentum * self.v + (1 - self.momentum) * globalg 51 | self.v = self.momentum * self.v + globalg 52 | step = -self.stepsize * self.v 53 | return step 54 | 55 | 56 | class Adam(Optimizer): 57 | def __init__(self, theta, stepsize, beta1=0.9, beta2=0.999, epsilon=1e-08): 58 | Optimizer.__init__(self, theta) 59 | self.stepsize = stepsize 60 | self.beta1 = beta1 61 | self.beta2 = beta2 62 | self.epsilon = epsilon 63 | self.m = np.zeros(self.dim, dtype=np.float32) 64 | self.v = np.zeros(self.dim, dtype=np.float32) 65 | 66 | def _compute_step(self, globalg): 67 | a = self.stepsize * np.sqrt(1 - self.beta2 ** self.t) / (1 - self.beta1 ** self.t) 68 | self.m = self.beta1 * self.m + (1 - self.beta1) * globalg 69 | self.v = self.beta2 * self.v + (1 - self.beta2) * (globalg * globalg) 70 | step = -a * self.m / (np.sqrt(self.v) + self.epsilon) 71 | return step 72 | 73 | -------------------------------------------------------------------------------- /deepneuroevolution/gpu_implementation/neuroevolution/tf_util.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import tensorflow as tf 21 | import numpy as np 22 | 23 | import tabular_logger as tlogger 24 | 25 | def get_available_gpus(): 26 | from tensorflow.python.client import device_lib 27 | local_device_protos = device_lib.list_local_devices() 28 | return [x.name for x in local_device_protos if x.device_type == 'GPU'] 29 | 30 | 31 | class WorkerSession(object): 32 | def __init__(self, worker): 33 | self._worker = worker 34 | def __enter__(self, *args, **kwargs): 35 | self._sess = tf.Session(*args, **kwargs) 36 | self._sess.run(tf.global_variables_initializer()) 37 | self._worker.initialize(self._sess) 38 | 39 | tlogger.info(self._worker.model.description) 40 | 41 | self.coord = tf.train.Coordinator() 42 | self.threads = tf.train.start_queue_runners(self._sess, self.coord, start=True) 43 | 44 | return self._sess 45 | 46 | def __exit__(self, exception_type, exception_value, traceback): 47 | if exception_type in [tf.errors.OutOfRangeError, StopIteration]: 48 | exception_type = None 49 | try: 50 | self._worker.close() 51 | self.coord.request_stop() 52 | self.coord.join(self.threads) 53 | if self._sess is None: 54 | raise RuntimeError('Session is already closed.') 55 | self._sess.close() 56 | finally: 57 | self._sess = None 58 | return exception_type is None 59 | -------------------------------------------------------------------------------- /deepneuroevolution/requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.3 2 | click==6.7 3 | gym==0.9.4 4 | h5py==2.7.0 5 | mujoco-py==0.5.7 6 | numpy==1.12.1 7 | packaging==16.8 8 | pyglet==1.2.4 9 | PyOpenGL==3.1.0 10 | pyparsing==2.2.0 11 | redis==2.10.5 12 | requests==2.14.2 13 | six==1.10.0 14 | tensorflow==1.15.2 15 | Werkzeug==0.15.3 16 | -------------------------------------------------------------------------------- /deepneuroevolution/scripts/dependency.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # from ami-d8bdebb8 4 | 5 | set -x 6 | 7 | sudo apt-get update 8 | sudo apt-get install -y build-essential cmake git wget htop 9 | 10 | # Build and install a new version of redis 11 | # https://www.digitalocean.com/community/tutorials/how-to-install-and-configure-redis-on-ubuntu-16-04 12 | wget --quiet http://download.redis.io/releases/redis-3.2.7.tar.gz -O redis-3.2.7.tar.gz 13 | tar -xvzf redis-3.2.7.tar.gz 14 | cd redis-3.2.7 15 | make 16 | sudo make install 17 | sudo mkdir /etc/redis 18 | sudo cp redis.conf /etc/redis 19 | cd .. 20 | rm -rf redis-3.2.7 redis-3.2.7.tar.gz 21 | 22 | # Set up redis working directory 23 | sudo sed -ie 's/dir \.\//dir \/var\/lib\/redis/' /etc/redis/redis.conf 24 | sudo mkdir /var/lib/redis 25 | sudo chown ubuntu:ubuntu /var/lib/redis 26 | 27 | # rely on firewall for security 28 | sudo sed -ie "s/bind 127.0.0.1//" /etc/redis/redis.conf 29 | sudo sed -ie "s/protected-mode yes/protected-mode no/" /etc/redis/redis.conf 30 | 31 | # System settings for redis 32 | echo "vm.overcommit_memory=1" | sudo tee -a /etc/sysctl.conf 33 | sudo sysctl vm.overcommit_memory=1 34 | sudo apt-get install -y hugepages 35 | echo "sudo hugeadm --thp-never" | sudo tee /etc/profile.d/disable_thp.sh > /dev/null 36 | . /etc/profile.d/disable_thp.sh 37 | 38 | # Start redis with systemctl 39 | # sudo sed -ie "s/supervised no/supervised systemd/" /etc/redis/redis.conf 40 | # ^ doesn't seem to matter; if it's enabled, the logs show "systemd supervision requested, but NOTIFY_SOCKET not found" 41 | echo " 42 | [Unit] 43 | Description=Redis In-Memory Data Store 44 | After=network.target 45 | 46 | [Service] 47 | User=ubuntu 48 | Group=ubuntu 49 | ExecStart=/usr/local/bin/redis-server /etc/redis/redis.conf 50 | ExecStop=/usr/local/bin/redis-cli shutdown 51 | Restart=always 52 | 53 | [Install] 54 | WantedBy=multi-user.target 55 | " | sudo tee /etc/systemd/system/redis.service > /dev/null 56 | sudo systemctl start redis 57 | 58 | # anaconda 59 | sudo echo 'export PATH=/opt/conda/bin:$PATH' | sudo tee /etc/profile.d/conda.sh > /dev/null 60 | sudo wget --quiet https://repo.continuum.io/archive/Anaconda3-4.2.0-Linux-x86_64.sh -O ~/anaconda.sh 61 | sudo /bin/bash ~/anaconda.sh -b -p /opt/conda 62 | sudo rm -f ~/anaconda.sh 63 | . /etc/profile.d/conda.sh 64 | sudo /opt/conda/bin/conda update -y --all 65 | 66 | # additional python dependencies 67 | sudo /opt/conda/bin/conda install -y numpy scipy opencv 68 | 69 | # Mujoco 70 | sudo mkdir -p /opt/mujoco 71 | 72 | ####################################################### 73 | # WRITE CODE HERE TO PLACE MUJOCO 1.31 in /opt/mujoco # 74 | # The key file should be in /opt/mujoco/mjkey.txt # 75 | # Mujoco should be installed in /opt/mujoco/mjpro131 # 76 | ####################################################### 77 | 78 | sudo echo 'export MUJOCO_PY_MJKEY_PATH=/opt/mujoco/mjkey.txt' | sudo tee /etc/profile.d/mujoco.sh > /dev/null 79 | sudo echo 'export MUJOCO_PY_MJPRO_PATH=/opt/mujoco/mjpro131' | sudo tee -a /etc/profile.d/mujoco.sh > /dev/null 80 | . /etc/profile.d/mujoco.sh 81 | 82 | # ALE 83 | sudo /opt/conda/bin/conda install -y libgcc # ALE needs this for some reason 84 | sudo apt-get install -y libsdl1.2-dev 85 | git clone https://github.com/mgbellemare/Arcade-Learning-Environment 86 | cd Arcade-Learning-Environment 87 | mkdir build 88 | cd build 89 | cmake .. -DUSE_SDL=on 90 | make 91 | cd .. 92 | sudo /opt/conda/bin/pip install . 93 | cd .. 94 | rm -rf Arcade-Learning-Environment 95 | 96 | # Tensorflow 0.11.0 97 | sudo /opt/conda/bin/conda install -c conda-forge -y tensorflow=0.11.0 98 | 99 | # Gym 100 | sudo apt-get install -y zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev libboost-all-dev libsdl2-dev swig freeglut3 libgl1 libglu1 101 | sudo /opt/conda/bin/conda install -y pyopengl 102 | sudo /opt/conda/bin/pip install \ 103 | 'gym[atari,classic_control,mujoco]' \ 104 | PyYAML \ 105 | six==1.10.0 \ 106 | awscli 107 | 108 | # pip redis 109 | sudo /opt/conda/bin/pip install redis 110 | 111 | # cleanup 112 | sudo /opt/conda/bin/conda clean -y --all 113 | sudo apt-get clean 114 | sudo rm -rf /var/lib/apt/lists/* 115 | # cleanup pip cache? 116 | 117 | set +x 118 | -------------------------------------------------------------------------------- /deepneuroevolution/scripts/local_env_setup.sh: -------------------------------------------------------------------------------- 1 | # your environment setup for a new shell window 2 | echo Setting up local environment 3 | . env/bin/activate 4 | -------------------------------------------------------------------------------- /deepneuroevolution/scripts/local_run_exp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | NAME=exp_`date "+%m_%d_%H_%M_%S"` 3 | ALGO=$1 4 | EXP_FILE=$2 5 | tmux new -s $NAME -d 6 | tmux send-keys -t $NAME '. scripts/local_env_setup.sh' C-m 7 | tmux send-keys -t $NAME 'python -m es_distributed.main master --master_socket_path /tmp/es_redis_master.sock --algo '$ALGO' --exp_file '"$EXP_FILE" C-m 8 | tmux split-window -t $NAME 9 | tmux send-keys -t $NAME '. scripts/local_env_setup.sh' C-m 10 | tmux send-keys -t $NAME 'python -m es_distributed.main workers --master_host localhost --relay_socket_path /tmp/es_redis_relay.sock --algo '$ALGO' --num_workers 10' C-m 11 | tmux a -t $NAME 12 | -------------------------------------------------------------------------------- /deepneuroevolution/scripts/local_run_redis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | tmux new -s redis -d 3 | tmux send-keys -t redis 'redis-server redis_config/redis_master.conf' C-m 4 | tmux split-window -t redis 5 | tmux send-keys -t redis 'redis-server redis_config/redis_local_mirror.conf' C-m 6 | tmux a -t redis 7 | -------------------------------------------------------------------------------- /deepneuroevolution/scripts/packer.json: -------------------------------------------------------------------------------- 1 | { 2 | "variables": { 3 | "aws_access_key": "", 4 | "aws_secret_key": "" 5 | }, 6 | "builders": [ 7 | { 8 | "type": "amazon-ebs", 9 | "access_key": "{{user `aws_access_key`}}", 10 | "secret_key": "{{user `aws_secret_key`}}", 11 | "region": "us-west-1", 12 | "source_ami": "ami-d8bdebb8", 13 | "instance_type": "t2.micro", 14 | "ssh_username": "ubuntu", 15 | "ami_name": "es-dist-{{isotime \"2006-01-02-03-04-05\"}}", 16 | "ami_block_device_mappings": [ 17 | { 18 | "device_name": "/dev/sda1", 19 | "volume_size": 40, 20 | "delete_on_termination": true 21 | } 22 | ], 23 | "launch_block_device_mappings": [ 24 | { 25 | "device_name": "/dev/sda1", 26 | "volume_size": 40, 27 | "delete_on_termination": true 28 | } 29 | ], 30 | "ami_regions": [ 31 | "us-west-1", 32 | "us-west-2", 33 | "us-east-1", 34 | "us-east-2", 35 | "eu-west-1", 36 | "eu-central-1", 37 | "ap-northeast-1", 38 | "ap-northeast-2", 39 | "ap-southeast-1", 40 | "ap-southeast-2", 41 | "ap-south-1", 42 | "sa-east-1" 43 | ] 44 | } 45 | ], 46 | "provisioners": [ 47 | { 48 | "type": "shell", 49 | "scripts": [ 50 | "dependency.sh" 51 | ] 52 | } 53 | ] 54 | } 55 | -------------------------------------------------------------------------------- /deepneuroevolution/scripts/viz.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | 4 | @click.command() 5 | @click.argument('env_id') 6 | @click.argument('policy_file') 7 | @click.option('--record', is_flag=True) 8 | @click.option('--stochastic', is_flag=True) 9 | @click.option('--extra_kwargs') 10 | def main(env_id, policy_file, record, stochastic, extra_kwargs): 11 | import gym 12 | from gym import wrappers 13 | import tensorflow as tf 14 | from es_distributed.policies import MujocoPolicy, ESAtariPolicy 15 | from es_distributed.atari_wrappers import ScaledFloatFrame, wrap_deepmind 16 | from es_distributed.es import get_ref_batch 17 | import numpy as np 18 | 19 | is_atari_policy = "NoFrameskip" in env_id 20 | 21 | env = gym.make(env_id) 22 | if is_atari_policy: 23 | env = wrap_deepmind(env) 24 | 25 | if record: 26 | import uuid 27 | env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True) 28 | 29 | if extra_kwargs: 30 | import json 31 | extra_kwargs = json.loads(extra_kwargs) 32 | 33 | with tf.Session(): 34 | if is_atari_policy: 35 | pi = ESAtariPolicy.Load(policy_file, extra_kwargs=extra_kwargs) 36 | pi.set_ref_batch(get_ref_batch(env, batch_size=128)) 37 | else: 38 | pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs) 39 | 40 | while True: 41 | if is_atari_policy: 42 | rews, t, novelty_vector = pi.rollout(env, render=True, random_stream=np.random if stochastic else None) 43 | print('return={:.4f} len={}'.format(rews.sum(), t)) 44 | 45 | if record: 46 | env.close() 47 | return 48 | 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /deepneuroevolution/train_large.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2 4 | SonicTheHedgehog-Genesis,GreenHillZone.Act3 5 | SonicTheHedgehog-Genesis,GreenHillZone.Act1 6 | SonicTheHedgehog-Genesis,StarLightZone.Act2 7 | SonicTheHedgehog-Genesis,StarLightZone.Act1 8 | SonicTheHedgehog-Genesis,MarbleZone.Act2 9 | SonicTheHedgehog-Genesis,MarbleZone.Act1 10 | SonicTheHedgehog-Genesis,MarbleZone.Act3 11 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2 12 | SonicTheHedgehog-Genesis,LabyrinthZone.Act2 13 | SonicTheHedgehog-Genesis,LabyrinthZone.Act1 14 | SonicTheHedgehog-Genesis,LabyrinthZone.Act3 15 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act1 16 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act2 17 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2 18 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act1 19 | SonicTheHedgehog2-Genesis,MetropolisZone.Act1 20 | SonicTheHedgehog2-Genesis,MetropolisZone.Act2 21 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1 22 | SonicTheHedgehog2-Genesis,OilOceanZone.Act2 23 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act2 24 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act1 25 | SonicTheHedgehog2-Genesis,HillTopZone.Act1 26 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1 27 | SonicTheHedgehog2-Genesis,WingFortressZone 28 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2 29 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act1 30 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2 31 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act2 32 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act1 33 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act1 34 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act2 35 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2 36 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act1 37 | SonicAndKnuckles3-Genesis,DeathEggZone.Act1 38 | SonicAndKnuckles3-Genesis,DeathEggZone.Act2 39 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1 40 | SonicAndKnuckles3-Genesis,SandopolisZone.Act1 41 | SonicAndKnuckles3-Genesis,SandopolisZone.Act2 42 | SonicAndKnuckles3-Genesis,HiddenPalaceZone 43 | SonicAndKnuckles3-Genesis,HydrocityZone.Act2 44 | SonicAndKnuckles3-Genesis,IcecapZone.Act1 45 | SonicAndKnuckles3-Genesis,IcecapZone.Act2 46 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act1 47 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act2 48 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act1 -------------------------------------------------------------------------------- /deepneuroevolution/train_small.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3 3 | SonicTheHedgehog-Genesis,StarLightZone.Act2 4 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2 5 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2 6 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1 7 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1 8 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2 9 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2 10 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2 11 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1 -------------------------------------------------------------------------------- /deepneuroevolution/train_spring_yard.csv: -------------------------------------------------------------------------------- 1 | game,state 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2 4 | SonicTheHedgehog-Genesis,SpringYardZone.Act1 -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/README.md: -------------------------------------------------------------------------------- 1 | ## Visual Inspector for NeuroEvolution (VINE) 2 | 3 | This repo contains implementations of VINE, i.e., Visual Inspector for NeuroEvolution, an interactive data visualization tool for neuroevolution. An article describing this visualization tool can be found [here](https://eng.uber.com/vine/). 4 | 5 | ### Dependencies that need to be downloaded by end-user from a third party 6 | 7 | In addition to requirements in `../requirements.txt`: 8 | 9 | * [Matplotlib](https://matplotlib.org/) -- version 2.0.2 10 | * [Sklearn](http://scikit-learn.org/stable/) -- version 0.19.1 11 | * [Pandas](https://pandas.pydata.org/) -- version 0.22.0 12 | * [Colour](https://github.com/vaab/colour) -- version 0.1.5 13 | 14 | ### Visualize the pseudo-offspring clouds 15 | 16 | __Example 1__: visualize the sample Mujoco Humanoid 2D BC (i.e., final x-y location) data for Generations 90 to 99 17 | ``` 18 | python -m main_mujoco 90 99 sample_data/mujoco/final_xy_bc/ 19 | ``` 20 | This will bring up the GUI which consists of two interrelated plots: a pseudo-offspring cloud plot, and a fitness plot, similar to Figure 2 of the [article](https://eng.uber.com/vine/), which is described in detail there. 21 | 22 | __Example 2__: click `Movie` button on the GUI to generate a visualization of the moving cloud similar to Figure 3 of the [article](https://eng.uber.com/vine/), which can be saved as a movie clip by checking `save movie` checkbox. 23 | 24 | __Example 3__: right click any point of the pseudo-offspring cloud to view videos of the corresponding agent’s deterministic and stochastic behaviors (only available for Generation 97 in `sample_data`). Follow the steps (all "clicks" are right click) illustrated in Figure 5 of the [article](https://eng.uber.com/vine/). 25 | 26 | 27 | To see HELP for the complete description of all available options (e.g., multiple BCs, and hi-dimensional BCs): 28 | ``` 29 | python -m main_mujoco --help 30 | ``` 31 | 32 | 33 | ### Using dimensionality reduction to process high-dimensional BC 34 | 35 | Assume you would like to reduce 2000D BCs to 2D for Generations 0 to 99 using PCA: 36 | ``` 37 | python -m process_bc 0 99 2000 --method pca 38 | ``` 39 | The reduced BC data is stored at `/reduced_pca` 40 | 41 | To see HELP for the complete description of all available options: 42 | ``` 43 | python -m process_bc --help 44 | ``` 45 | 46 | ### Create and visualize your own data 47 | 48 | 1. Choose proper behavior characterizations (BCs) (refer to the [article](https://eng.uber.com/vine/) for examples). 49 | 2. Moderately modify your GA or ES code that dump out the BCs during neuroevolution. 50 | Examples of BC choices and modified version of GA and ES, namely, `es_modified.py`, `ga_modified.py`, are privode in `../es_distributed` for your references. 51 | 3. If applicable, using dimensionality reduction (see above) to reduce hi-dimensional BCs to 2D. 52 | 4. Create (if necessary) and run main_.py file to launch GUI. 53 | `main_mujoco.py` or `main_atari.py` can be used directly or used as a template for most of your use cases. 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/dimen_red/assemble.py: -------------------------------------------------------------------------------- 1 | """Assemble hi-D BCs from all generations""" 2 | import numpy as np 3 | import pandas as pd 4 | 5 | def assemble(start_iter, end_iter, path, *, bc_dim, ds_ratio): 6 | """Assemble hi-D BCs from all generations""" 7 | print("Assembling {}-D BCs... with ds_ratio={}".format(bc_dim, ds_ratio)) 8 | 9 | X, parent_options, child_options, labels = [], [], [], [] 10 | for gen in range(start_iter, end_iter+1): 11 | print('processing iter {}...'.format(gen)) 12 | parent_file = '{}/snapshots/snapshot_gen_{:04d}/snapshot_parent_{:04d}.dat'.format(path, gen, gen) 13 | pdata = np.loadtxt(parent_file) 14 | 15 | p_bc = pdata[:bc_dim] 16 | X.append(p_bc) 17 | parent_options.append(pdata[bc_dim:]) 18 | labels.append(pdata[bc_dim:bc_dim+1]) 19 | 20 | offspring_file = '{}/snapshots/snapshot_gen_{:04d}/snapshot_offspring_{:04d}.dat'.format(path, gen, gen) 21 | odata = pd.read_csv(offspring_file, sep=' ', header=None).values 22 | 23 | num_rows = odata.shape[0] 24 | selected = list(range(num_rows)) 25 | if num_rows >= 10 and ds_ratio < 1.0: 26 | rndperm = np.random.permutation(num_rows) 27 | n_ds = max(10, int(num_rows*ds_ratio)) 28 | selected = rndperm[:n_ds] 29 | 30 | o_bc = odata[selected, :bc_dim] 31 | num_os = o_bc.shape[0] 32 | X.append(o_bc) 33 | child_options.append(odata[selected, bc_dim:]) 34 | labels.append(odata[selected, bc_dim:bc_dim+1]) 35 | 36 | return np.vstack(X), parent_options, child_options, num_os, np.vstack(labels) 37 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/dimen_red/disassemble.py: -------------------------------------------------------------------------------- 1 | """disassemble into files by generation""" 2 | import os 3 | import os.path as osp 4 | import fnmatch 5 | from shutil import copyfile 6 | import numpy as np 7 | 8 | 9 | def disassemble(X, parent_options, child_options, method, *, 10 | start_iter, end_iter, path, chunk, copy_file_patterns): 11 | """Disassemble reduced BCs into each generation""" 12 | print("Disassembling and writing ...") 13 | 14 | assert len(parent_options) == len(child_options) == end_iter - start_iter + 1 15 | num_gens = len(parent_options) 16 | dir_name = "reduced_{}".format(method) 17 | 18 | for i in range(num_gens): 19 | gen = i + start_iter 20 | print('processing iter {}...'.format(gen)) 21 | 22 | dir_name_gen = '{}/{}/snapshots/snapshot_gen_{:04d}'.format(path, dir_name, gen) 23 | if not osp.exists(dir_name_gen): 24 | os.makedirs(dir_name_gen) 25 | 26 | pfile_name = '{}/snapshot_parent_{:04d}.dat'.format(dir_name_gen, gen) 27 | X_pdata = np.hstack((X[i*chunk, :], parent_options[i])) 28 | len_pdata = len(X_pdata) 29 | np.savetxt(pfile_name, X_pdata.reshape(1, len_pdata)) 30 | 31 | ofile_name = '{}/{}/snapshots/snapshot_gen_{:04d}/snapshot_offspring_{:04d}.dat'.format(path, dir_name, gen, gen) 32 | X_osdata = np.hstack((X[i*chunk+1:(i+1)*chunk, :], child_options[i])) 33 | np.savetxt(ofile_name, X_osdata) 34 | 35 | if copy_file_patterns is not None: 36 | src_dir = '{}/snapshots/snapshot_gen_{:04d}'.format(path, gen) 37 | for pattern in copy_file_patterns: 38 | for file in os.listdir(src_dir): 39 | if fnmatch.fnmatch(file, pattern): 40 | copyfile('{}/{}'.format(src_dir, file), 41 | '{}/{}'.format(dir_name_gen, file)) 42 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/dimen_red/reduce.py: -------------------------------------------------------------------------------- 1 | """dimensionality reduction""" 2 | from sklearn import decomposition, manifold, discriminant_analysis 3 | import numpy as np 4 | 5 | 6 | def reduce_dim(X, *, labels, method='pca'): 7 | """dimensionality reduction""" 8 | print("Reducing ...") 9 | 10 | if method == 'downsampling': 11 | X_r = X 12 | elif method == 'lda': 13 | X2 = X.copy() 14 | X2.flat[::X.shape[1] + 1] += 0.01 15 | X_r = discriminant_analysis.LinearDiscriminantAnalysis(n_components=2).fit_transform(X2, labels) 16 | elif method == 'tsne': 17 | X_pca = decomposition.PCA(n_components=50).fit_transform(X) 18 | X_r = manifold.TSNE(n_components=2, perplexity=30, 19 | verbose=2, random_state=0, n_iter=1000).fit_transform(X_pca) 20 | elif method == 'pca': 21 | X_r = decomposition.PCA(n_components=2).fit_transform(X) 22 | elif method == 'two_end': 23 | nrow, ncol = X.shape 24 | idx_last_x, idx_last_y = int(ncol / 2 - 1), -1 25 | X_r = np.hstack((X[:, idx_last_x].reshape(nrow, 1), X[:, idx_last_y].reshape(nrow, 1))) 26 | else: 27 | raise NotImplementedError 28 | 29 | print('Reduction Completed! X.shape={} X_r.shape={}'.format(X.shape, X_r.shape)) 30 | return X_r 31 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/deepneuroevolution/visual_inspector/figure_base/__init__.py -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_base/buttons.py: -------------------------------------------------------------------------------- 1 | """buttons""" 2 | from matplotlib.widgets import Button, CheckButtons, RadioButtons 3 | from figure_base.figure_control import FigureControl 4 | 5 | 6 | class _CheckButtons(CheckButtons): 7 | def enforce(self, bval, index): 8 | print(bval) 9 | if 0 > index >= len(self.labels): 10 | raise ValueError("Invalid CheckButton index: %d" % index) 11 | 12 | l1, l2 = self.lines[index] 13 | l1.set_visible(bval) 14 | l2.set_visible(bval) 15 | 16 | if self.drawon: 17 | self.ax.figure.canvas.draw() 18 | 19 | class _RadioButtons(RadioButtons): 20 | def __init__(self, *args, **kwargs): 21 | self.val2index = kwargs.pop('val2index') 22 | RadioButtons.__init__(self, *args, **kwargs) 23 | 24 | def enforce(self, val): 25 | index = self.val2index[val] 26 | 27 | if 0 > index >= len(self.labels): 28 | raise ValueError("Invalid RadioButton index: %d" % index) 29 | 30 | self.value_selected = self.labels[index].get_text() 31 | 32 | for i, p in enumerate(self.circles): 33 | if i == index: 34 | color = self.activecolor 35 | else: 36 | color = self.ax.get_facecolor() 37 | p.set_facecolor(color) 38 | 39 | if self.drawon: 40 | self.ax.figure.canvas.draw() 41 | 42 | class ButtonArea(): 43 | def __init__(self, fig, visible_range): 44 | 45 | self.axhome = fig.add_axes([0.46, 0.01, 0.08, 0.05]) 46 | self.axreset = fig.add_axes([0.55, 0.01, 0.08, 0.05]) 47 | self.axmovie = fig.add_axes([0.64, 0.01, 0.08, 0.05]) 48 | self.axprev = fig.add_axes([0.73, 0.01, 0.08, 0.05]) 49 | self.axnext = fig.add_axes([0.82, 0.01, 0.08, 0.05]) 50 | self.bhome = Button(self.axhome, 'Home') 51 | self.bhome.on_clicked(self.home) 52 | self.breset = Button(self.axreset, 'Reset') 53 | self.breset.on_clicked(self.reset) 54 | self.bmovie = Button(self.axmovie, 'Movie') 55 | self.bmovie.on_clicked(self.movie) 56 | self.bnext = Button(self.axnext, 'Next') 57 | self.bnext.on_clicked(self.next) 58 | self.bprev = Button(self.axprev, 'Prev') 59 | self.bprev.on_clicked(self.prev) 60 | 61 | 62 | self.checkb_ax = fig.add_axes([0., 0.0, 0.1, 0.09]) 63 | self.checkb_ax.axis('off') 64 | 65 | self.checkb_ax_pace = fig.add_axes([0.1, 0.0, 0.1, 0.09]) 66 | self.checkb_ax_pace.axis('off') 67 | 68 | self.checkb_ax_savem = fig.add_axes([0.2, 0.0, 0.1, 0.09]) 69 | self.checkb_ax_savem.axis('off') 70 | 71 | curr_stoc = FigureControl.offspring_stochastic 72 | self.check = _CheckButtons(self.checkb_ax, ['Random\nSeed'], [curr_stoc]) 73 | self.check.on_clicked(FigureControl.stochastic) 74 | 75 | self.check_pace = _CheckButtons(self.checkb_ax_pace, ['Fast\nPace'], 76 | [FigureControl.step > 1]) 77 | self.check_pace.on_clicked(FigureControl.fastMove) 78 | 79 | self.check_savem = _CheckButtons(self.checkb_ax_savem, 80 | ['Save\nMovie'], [FigureControl.save_movie]) 81 | self.check_savem.on_clicked(FigureControl.saveMovie) 82 | 83 | if not visible_range: 84 | self.rb_ax = fig.add_axes([0, 0.8, 0.15, 0.15]) 85 | self.rb_ax.axis('off') 86 | self.radio = _RadioButtons(self.rb_ax, ('1', '2', '3'), val2index={1:0, 2:1, 3:2}) 87 | self.radio.on_clicked(FigureControl.pickVR) 88 | 89 | self.rb_ax_cloud = fig.add_axes([0, 0.6, 0.15, 0.15]) 90 | self.rb_ax_cloud.axis('off') 91 | self.radio_cloud = _RadioButtons(self.rb_ax_cloud, ('All', 'Top', 'None'), 92 | val2index={'AllCloud':0, 'TopOnly':1, 'NoCloud':2}) 93 | self.radio_cloud.on_clicked(FigureControl.pickCloud) 94 | 95 | def eligibleClick(self, buttonClicked): 96 | if buttonClicked == "next": 97 | return (not FigureControl.isVisible(FigureControl.maxPossibleGenNumber), 98 | "max gen already displayed") 99 | elif buttonClicked == "prev": 100 | return (not FigureControl.isVisible(FigureControl.minPossibleGenNumber), 101 | "min gen already displayed") 102 | elif buttonClicked == "movie": 103 | return True, "" 104 | else: 105 | return False, "bad button" 106 | 107 | def next(self, event=None): 108 | ok, err = self.eligibleClick("next") 109 | if not ok: 110 | FigureControl.print_error(err) 111 | else: 112 | print("showing nextGen") 113 | nextGenNum = FigureControl.minPossibleGenNumber 114 | if FigureControl.numVisibleGenNumber() > 0: 115 | nextGenNum = min(FigureControl.maxVisibleGenNumber() + FigureControl.step, 116 | FigureControl.maxPossibleGenNumber) 117 | FigureControl.makeGenVisible(nextGenNum, True, "next") 118 | 119 | def prev(self, event=None): 120 | ok, err = self.eligibleClick("prev") 121 | if not ok: 122 | FigureControl.print_error(err) 123 | else: 124 | print("showing prevGen") 125 | nextGenNum = FigureControl.maxPossibleGenNumber 126 | if FigureControl.numVisibleGenNumber() > 0: 127 | nextGenNum = max(FigureControl.minVisibleGenNumber() - FigureControl.step, 128 | FigureControl.minPossibleGenNumber) 129 | FigureControl.makeGenVisible(nextGenNum, True, "prev") 130 | 131 | def movie(self, event): 132 | FigureControl.movie(event) 133 | 134 | def reset(self, event=None): 135 | #t1 = time.time() 136 | 137 | if FigureControl.numVisibleGenNumber() != 0: 138 | while FigureControl.numVisibleGenNumber() != 0: 139 | genNumber = FigureControl.maxVisibleGenNumber() 140 | print("cleaning ...", genNumber) 141 | FigureControl.hideOffSprings(genNumber) 142 | 143 | FigureControl.clear_labels() 144 | self.home() 145 | 146 | def home(self, event=None): 147 | FigureControl.set_home() 148 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_base/figure_control.py: -------------------------------------------------------------------------------- 1 | """Main figure components""" 2 | import numpy as np 3 | import figure_base.settings as gs 4 | import matplotlib.pyplot as p 5 | 6 | 7 | class FigureControl(): 8 | """Central control for all figures""" 9 | @classmethod 10 | def init(cls, start_iter, end_iter, visible_range): 11 | cls.minPossibleGenNumber = start_iter 12 | cls.maxPossibleGenNumber = end_iter 13 | cls.setOfVisibleGenNumber = set() 14 | cls.cloudMode = 'AllCloud' 15 | cls.offspring_stochastic = False 16 | cls.save_movie = False 17 | 18 | cls.step = 1 19 | if cls.maxPossibleGenNumber - cls.minPossibleGenNumber >= 100: 20 | cls.step = int((cls.maxPossibleGenNumber - cls.minPossibleGenNumber)/10) 21 | 22 | cls.maxVisibleRangeSize = 1 23 | if visible_range: 24 | visible_range = int(visible_range) 25 | cls.maxVisibleRangeSize = max(1, visible_range) 26 | 27 | @classmethod 28 | def numVisibleGenNumber(cls): 29 | return len(cls.setOfVisibleGenNumber) 30 | 31 | @classmethod 32 | def minVisibleGenNumber(cls): 33 | return min(cls.setOfVisibleGenNumber) 34 | 35 | @classmethod 36 | def maxVisibleGenNumber(cls): 37 | return max(cls.setOfVisibleGenNumber) 38 | 39 | @classmethod 40 | def isVisible(cls, thisGenNumber): 41 | return thisGenNumber in cls.setOfVisibleGenNumber 42 | 43 | @classmethod 44 | def plotOffSprings(cls, thisGenNumber): 45 | cls.setOfVisibleGenNumber.add(thisGenNumber) 46 | for cplot in gs.cloud_plots: 47 | cplot.plotOffSprings(thisGenNumber) 48 | gs.fitness_plot.markVisible(thisGenNumber, True) 49 | 50 | @classmethod 51 | def hideOffSprings(cls, thisGenNumber): 52 | cls.setOfVisibleGenNumber.remove(thisGenNumber) 53 | for cplot in gs.cloud_plots: 54 | cplot.hideOffSprings(thisGenNumber) 55 | gs.fitness_plot.markVisible(thisGenNumber, False) 56 | 57 | @classmethod 58 | def applyVisibleRange(cls, mode, newGen): 59 | print("calling applyVisibleRange") 60 | while cls.numVisibleGenNumber() >= cls.maxVisibleRangeSize: 61 | minVG, maxVG = cls.minVisibleGenNumber(), cls.maxVisibleGenNumber() 62 | if mode == "next": 63 | drop_gen = minVG 64 | elif mode == "prev": 65 | drop_gen = maxVG 66 | elif mode == "dist": 67 | dist_minVG, dist_maxVG = np.abs(newGen - minVG), np.abs(newGen - maxVG) 68 | drop_gen = minVG if dist_minVG >= dist_maxVG else maxVG 69 | print("hiding Gen {}", drop_gen) 70 | cls.hideOffSprings(drop_gen) 71 | 72 | @classmethod 73 | def pickVR(cls, label): 74 | hzdict = {'1': 1, '2': 2, '3': 3} 75 | cls.maxVisibleRangeSize = hzdict[label] 76 | print("you select {}".format(cls.maxVisibleRangeSize)) 77 | for cplot in gs.cloud_plots: 78 | cplot.button_area.radio.enforce(cls.maxVisibleRangeSize) 79 | 80 | @classmethod 81 | def pickCloud(cls, label): 82 | hzdict = {'All': 'AllCloud', 'Top': 'TopOnly', 'None': 'NoCloud'} 83 | selectedMode = hzdict[label] 84 | oldMode = cls.cloudMode 85 | cls.cloudMode = selectedMode 86 | print("you select {} vs old {}".format(selectedMode, oldMode)) 87 | for cplot in gs.cloud_plots: 88 | cplot.button_area.radio_cloud.enforce(cls.cloudMode) 89 | if oldMode != cls.cloudMode and cls.numVisibleGenNumber() > 0: 90 | for gen in cls.setOfVisibleGenNumber: 91 | cls.hideOffSprings(gen) 92 | cls.plotOffSprings(gen) 93 | cls.draw_all_cloud_plots() 94 | 95 | @classmethod 96 | def stochastic(cls, label): 97 | oldstoc = cls.offspring_stochastic 98 | cls.offspring_stochastic = not oldstoc 99 | print("offspring_stochastic_seed: ", cls.offspring_stochastic) 100 | for cplot in gs.cloud_plots: 101 | cplot.button_area.check.enforce(cls.offspring_stochastic, 0) 102 | 103 | @classmethod 104 | def saveMovie(cls, label): 105 | oldstoc = cls.save_movie 106 | cls.save_movie = not oldstoc 107 | print("save movie: ", cls.save_movie) 108 | for cplot in gs.cloud_plots: 109 | cplot.button_area.check_savem.enforce(cls.save_movie, 0) 110 | 111 | @classmethod 112 | def fastMove(cls, label): 113 | if cls.step > 1: 114 | cls.step = 1 115 | else: 116 | cls.step = int((cls.maxPossibleGenNumber - cls.minPossibleGenNumber)/10) 117 | cls.step = max(cls.step, 1) 118 | print("current step size: ", cls.step) 119 | for cplot in gs.cloud_plots: 120 | cplot.button_area.check_pace.enforce(cls.step > 1, 0) 121 | 122 | @classmethod 123 | def draw_all_cloud_plots(cls): 124 | '''draw all cloud plots''' 125 | for cplot in gs.cloud_plots: 126 | cplot.fig.canvas.draw() 127 | 128 | @classmethod 129 | def makeGenVisible(cls, gen, visNow, mode, *, skip_fitness_plot=False): 130 | if visNow: 131 | for cplot in gs.cloud_plots: 132 | cplot.show_new_labels_gen(gen) 133 | if cls.numVisibleGenNumber() > 0: 134 | cls.applyVisibleRange(mode, gen) 135 | cls.plotOffSprings(gen) 136 | if not skip_fitness_plot: 137 | gs.fitness_plot.setVal(gen) 138 | else: 139 | cls.hideOffSprings(gen) 140 | if not skip_fitness_plot: 141 | gs.fitness_plot.fig.canvas.draw() 142 | 143 | cls.draw_all_cloud_plots() 144 | 145 | @classmethod 146 | def print_error(cls, err): 147 | for cplot in gs.cloud_plots: 148 | cplot.text_area.show(err) 149 | cls.draw_all_cloud_plots() 150 | 151 | @classmethod 152 | def clear_labels(cls): 153 | for cplot in gs.cloud_plots: 154 | cplot.clear_labels() 155 | gs.fitness_plot.reset() 156 | 157 | @classmethod 158 | def set_home(cls): 159 | for cplot in gs.cloud_plots: 160 | cplot.reset_xy_lim() 161 | cls.draw_all_cloud_plots() 162 | 163 | @classmethod 164 | def movie(cls, event): 165 | print("you clicked movie. will be showing movie in another figure") 166 | movie_start = cls.minPossibleGenNumber 167 | if cls.numVisibleGenNumber() > 0: 168 | movie_start = cls.minVisibleGenNumber() 169 | movie_end = cls.maxPossibleGenNumber 170 | print(movie_start, movie_end) 171 | cplot = gs.canvas2cloud_plot[event.canvas] 172 | cplot.play_movie(movie_start, movie_end) 173 | 174 | @classmethod 175 | def handle_close(cls, event): 176 | print("figure closed") 177 | if event.canvas == gs.fitness_plot.fig.canvas: 178 | print("close fitness plot") 179 | p.close('all') 180 | else: 181 | cplot = gs.canvas2cloud_plot[event.canvas] 182 | print(cplot.title) 183 | gs.cloud_plots.remove(cplot) 184 | gs.canvas2cloud_plot.pop(event.canvas) 185 | if len(gs.cloud_plots) == 0: 186 | p.close('all') 187 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_base/fitness_figures.py: -------------------------------------------------------------------------------- 1 | """generation# v.s. fitness plot""" 2 | import matplotlib.pyplot as plt 3 | from matplotlib.widgets import Slider 4 | import numpy as np 5 | from figure_base.figure_control import FigureControl 6 | import figure_base.settings as gs 7 | from figure_base.mouse_event import FitnessPlotClick, MouseMove 8 | from figure_base.load_data import loadParentData 9 | 10 | class FitnessPlot(): 11 | """generation# v.s. fitness plot""" 12 | def __init__(self, title, start_iter, end_iter, snapshots_path): 13 | x, y = [], [] 14 | for iteration in range(start_iter, end_iter+1): 15 | parent, _, _ = loadParentData(snapshots_path, iteration) 16 | x.append(iteration) 17 | y.append(parent[0].fitness) 18 | 19 | self.inc = 1 20 | self.fig = plt.figure(title) 21 | self.ax = self.fig.add_subplot(111) 22 | 23 | self.sliderax = self.fig.add_axes([0.125, 0.02, 0.775, 0.03], 24 | facecolor='yellow') 25 | 26 | self.slider = DiscreteSlider(self.sliderax, 'Gen', x[0], x[-1], 27 | increment=self.inc, valinit=-x[-1], valfmt='%0.0f') 28 | self.slider.on_changed(self.update) 29 | self.x = x 30 | self.y = y 31 | self.curve, = self.ax.plot(self.x, self.y, '--', picker=3) 32 | 33 | self.floating_annot = self.ax.annotate("", xy=(0, 0), xytext=(0, -40), 34 | textcoords="offset points", 35 | arrowprops=dict(arrowstyle="->")) 36 | self.floating_annot.set_visible(False) 37 | self.floating_annot.set_fontsize(18) 38 | self.floating_annot.set_color('b') 39 | 40 | self.ax.set_xlim(x[0], x[-1]) 41 | maxy, miny = max(y), min(y) 42 | self.ax.set_ylim(miny - 0.05*abs(miny), maxy + 0.05 * abs(maxy)) 43 | self.ax.set_ylabel("Fitness") 44 | self.ax.grid(True) 45 | self.dot, = self.ax.plot(-x[-1], -1, 'o', markersize=15, markerfacecolor="None", 46 | markeredgecolor='red', markeredgewidth=3) 47 | self.mapOfGenToArtist = {} 48 | self.fig.canvas.mpl_connect('pick_event', FitnessPlotClick.onpick) 49 | self.fig.canvas.mpl_connect("motion_notify_event", MouseMove.hover) 50 | self.fig.canvas.mpl_connect('close_event', FigureControl.handle_close) 51 | 52 | def update(self, value): 53 | """update the fitness plot""" 54 | if value < 0: 55 | self.dot.set_data([[value], [-1]]) 56 | self.ax.set_title("") 57 | else: 58 | self.dot.set_data([[value], [self.y[value-self.x[0]]]]) 59 | self.ax.set_title("Gen {} Fitness {:.8f} ".format(value, self.y[value-self.x[0]])) 60 | 61 | vis_now = FigureControl.isVisible(value) 62 | if not vis_now: 63 | FigureControl.makeGenVisible(value, True, "dist", 64 | skip_fitness_plot=True) 65 | 66 | self.fig.canvas.draw() 67 | 68 | def setVal(self, val): 69 | self.slider.set_val(val) 70 | 71 | def reset(self): 72 | """reset the slider""" 73 | self.slider.reset() 74 | 75 | def markVisible(self, gen, visible): 76 | """mark a generation visible""" 77 | if not gen in self.mapOfGenToArtist: 78 | this_marker = gs.MARKERS[gen%gs.numMarkers] 79 | this_color = gs.COLOR_HEX_LISTS[gen%gs.numColors] 80 | pt, = self.ax.plot(gen, self.y[gen-self.x[0]], 81 | this_marker, 82 | color=this_color[-1], 83 | markersize=10) 84 | 85 | numdigits = int(np.log10(gen)) + 1 if gen > 0 else 1 86 | annot = self.ax.annotate(gen, xy=(gen, self.y[gen-self.x[0]]), 87 | xytext=(-5.5*numdigits, 40), textcoords="offset points", 88 | arrowprops=dict(arrowstyle="->"), fontsize=18) 89 | 90 | self.mapOfGenToArtist[gen] = (pt, annot) 91 | 92 | self.mapOfGenToArtist[gen][0].set_visible(visible) 93 | self.mapOfGenToArtist[gen][1].set_visible(visible) 94 | 95 | class DiscreteSlider(Slider): 96 | """This class is slightly adapted from the following Subscriber Content from the Stack Exchange Network 97 | https://stackoverflow.com/questions/13656387 98 | 99 | The question was asked by J Knight (https://stackoverflow.com/users/1547090/j-knight). 100 | The answer so used was answered by Joe Kington (https://stackoverflow.com/users/325565/joe-kington) 101 | and edited by Ian Campbell (https://stackoverflow.com/users/1008353/ian-campbell) 102 | 103 | Stack Exchange Network Terms of Service can be found at 104 | https://stackexchange.com/legal/terms-of-service 105 | """ 106 | """A matplotlib slider widget with discrete steps.""" 107 | def __init__(self, *args, **kwargs): 108 | """Identical to Slider.__init__, except for the "increment" kwarg. 109 | "increment" specifies the step size that the slider will be discritized 110 | to.""" 111 | self.inc = kwargs.pop('increment', 0.5) 112 | Slider.__init__(self, *args, **kwargs) 113 | self.valtext.set_text('') 114 | 115 | def set_val(self, val): 116 | discrete_val = int(val / self.inc) * self.inc 117 | # We can't just call Slider.set_val(self, discrete_val), because this 118 | # will prevent the slider from updating properly (it will get stuck at 119 | # the first step and not "slide"). Instead, we'll keep track of the 120 | # the continuous value as self.val and pass in the discrete value to 121 | # everything else. 122 | xy = self.poly.xy 123 | xy[2] = discrete_val, 1 124 | xy[3] = discrete_val, 0 125 | self.poly.xy = xy 126 | if discrete_val >= 0: 127 | self.valtext.set_text(self.valfmt % discrete_val) 128 | else: 129 | self.valtext.set_text('') 130 | if self.drawon: 131 | self.ax.figure.canvas.draw() 132 | self.val = val 133 | if not self.eventson: 134 | return 135 | for _, func in self.observers.items(): 136 | func(discrete_val) 137 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_base/load_data.py: -------------------------------------------------------------------------------- 1 | """load data from file""" 2 | import numpy as np 3 | import figure_base.settings as gs 4 | import pandas as pd 5 | import os.path as osp 6 | 7 | def color_index(fitness, minfit, maxfit): 8 | cind = (fitness - minfit)/(maxfit - minfit) * gs.numBins 9 | cind = int(cind) 10 | if cind >= gs.numBins: 11 | cind = gs.numBins-1 12 | elif cind < 0: 13 | cind = 0 14 | 15 | return cind 16 | 17 | 18 | class GenStat: 19 | def __init__(self, artist, table, filename, op_data=None): 20 | self.parentArtist = artist 21 | self.osDataTable = table 22 | self.filename = filename 23 | self.parent_op_data = op_data 24 | self.annotation = None # annotation that indicates the selected generation 25 | 26 | class DataPoint: 27 | def __init__(self, x, y, fitness, gen, parentOrNot, message, op_data=None): 28 | self.x = x 29 | self.y = y 30 | self.fitness = fitness 31 | self.gen = gen 32 | self.parentOrNot = parentOrNot 33 | self.message = message 34 | self.child_op_data = op_data 35 | 36 | def generateMessage(thisGenNumber, parentOrNot, x, y, fitness): 37 | title_message = 'Gen {} '.format(thisGenNumber) 38 | 39 | if parentOrNot: 40 | title_message = title_message + 'Parent ' 41 | else: 42 | title_message = title_message + 'Offspring ' 43 | 44 | title_message = title_message + 'x = {:.6f} y = {:.6f} fitness (on record) = {:.8f} '.format( 45 | x, y, fitness 46 | ) 47 | 48 | return title_message 49 | 50 | def loadParentData(path, gen, bc_dim=2): 51 | filename = '{}/snapshots/snapshot_gen_{:04d}/snapshot_parent_{:04d}.dat'.format(path, gen, gen) 52 | newf = np.loadtxt(filename) 53 | 54 | x_pt = newf[0: bc_dim//2] 55 | y_pt = newf[bc_dim//2 : bc_dim] 56 | area_pt = newf[bc_dim] 57 | op_data = newf[bc_dim+1:] 58 | f_pt = '{}/snapshots/snapshot_gen_{:04d}/snapshot_parent_{:04d}.h5'.format(path, gen, gen) 59 | if not osp.exists(f_pt): 60 | f_pt = None 61 | message = generateMessage(gen, True, x_pt[-1], y_pt[-1], area_pt) 62 | return [DataPoint(x_pt, y_pt, area_pt, gen, True, message)], op_data, f_pt 63 | 64 | def loadOffspringData(path, gen, pfit, bc_dim=2): 65 | filename = '{}/snapshots/snapshot_gen_{:04d}/snapshot_offspring_{:04d}.dat'.format(path, gen, gen) 66 | newf = pd.read_csv(filename, sep=' ', header=None).values 67 | 68 | if gen not in gs.gen2sorted_indices: 69 | gs.gen2sorted_indices[gen] = newf[:, bc_dim].argsort() 70 | 71 | newf = newf[gs.gen2sorted_indices[gen]] 72 | area = newf[:, bc_dim] 73 | 74 | maxfit = max(pfit, area[-1]) 75 | minfit = min(pfit, area[0]) 76 | 77 | v = np.linspace(minfit, maxfit, num=gs.numBins+1) 78 | ind = (np.searchsorted(area, v[1:gs.numBins], side='right')) 79 | assert len(ind) == gs.numBins - 1 80 | 81 | ind_bins = [] 82 | ind_bins.append(range(0, ind[0])) 83 | for i in range(0, len(ind)-1): 84 | ind_bins.append(range(ind[i], ind[i+1])) 85 | 86 | left, right = ind[-1], len(area) 87 | 88 | if right - left <= 10: 89 | ind_bins.append(range(left, right)) 90 | assert len(ind_bins) == gs.numBins 91 | else: 92 | ind_bins.append(range(left, right-10)) 93 | ind_bins.append(range(right-10, right)) 94 | assert len(ind_bins) == gs.numBins+1 95 | 96 | return newf, ind_bins, maxfit, minfit 97 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_base/mouse_event.py: -------------------------------------------------------------------------------- 1 | """mouse event""" 2 | import time 3 | from figure_base.figure_control import FigureControl 4 | import figure_base.settings as gs 5 | 6 | 7 | class FitnessPlotClick(): 8 | """mouse pick event on fitness plot""" 9 | @classmethod 10 | def onpick(cls, event): 11 | """mouse pick event on fitness plot""" 12 | event_len = len(event.ind) 13 | if not event_len: 14 | return True 15 | value = event.ind[-1] + FigureControl.minPossibleGenNumber 16 | vis_now = FigureControl.isVisible(value) 17 | FigureControl.makeGenVisible(value, not vis_now, "dist") 18 | 19 | class PointClick(): 20 | """mouse pick event on cloud plot""" 21 | last_click_time = None 22 | 23 | @classmethod 24 | def rate_limiting(cls): 25 | """limit the rate of clicking""" 26 | this_click_time = time.time() 27 | time_to_last_click = None 28 | if cls.last_click_time: 29 | time_to_last_click = this_click_time - cls.last_click_time 30 | cls.last_click_time = this_click_time 31 | return time_to_last_click and time_to_last_click < 0.7 32 | 33 | @classmethod 34 | def button_1(cls, cloud_plot, artist, ind): 35 | """click with button 1, i.e., left button""" 36 | is_parent = cloud_plot.is_parent_artist(artist, ind) 37 | gen = cloud_plot.artist2gen[artist] 38 | if is_parent: 39 | vis_now = FigureControl.isVisible(gen) 40 | FigureControl.makeGenVisible(gen, not vis_now, "dist") 41 | else: 42 | row_idx = cloud_plot.artist2data[artist][ind] 43 | for cpl in gs.cloud_plots: 44 | this_data = cpl.fetch_child_data_point(gen, row_idx) 45 | cpl.show_new_labels_dp(this_data) 46 | FigureControl.draw_all_cloud_plots() 47 | cloud_plot.button_1(artist, ind) 48 | 49 | @classmethod 50 | def button_3(cls, cloud_plot, artist, ind): 51 | """click with button 3, i.e., right button""" 52 | is_parent = cloud_plot.is_parent_artist(artist, ind) 53 | gen = cloud_plot.artist2gen[artist] 54 | 55 | for cpl in gs.cloud_plots: 56 | if is_parent: 57 | cpl.show_new_labels_gen(gen) 58 | else: 59 | row_idx = cloud_plot.artist2data[artist][ind] 60 | this_data = cpl.fetch_child_data_point(gen, row_idx) 61 | cpl.show_new_labels_dp(this_data) 62 | FigureControl.draw_all_cloud_plots() 63 | cloud_plot.button_3(artist, ind) 64 | 65 | @classmethod 66 | def onpick(cls, event): 67 | """mouse pick event on cloud plot""" 68 | if cls.rate_limiting(): 69 | return True 70 | 71 | if len(event.ind) != 1: 72 | print("Two or more points are too close! Please zoom in.") 73 | print("Showing the one with higher fitness score") 74 | 75 | cloud_plot = gs.canvas2cloud_plot[event.canvas] 76 | artist = event.artist 77 | ind = event.ind[-1] 78 | button = event.mouseevent.button 79 | 80 | if button == 1: 81 | cls.button_1(cloud_plot, artist, ind) 82 | elif button == 3: 83 | cls.button_3(cloud_plot, artist, ind) 84 | 85 | class MouseMove(): 86 | """mouse move event on plots""" 87 | @classmethod 88 | def update_annot(cls, ind): 89 | """update the parent floating annotations""" 90 | gen = ind + FigureControl.minPossibleGenNumber 91 | for cplot in gs.cloud_plots: 92 | fitness = cplot.update_annot(gen) 93 | 94 | text = "{}".format(gen) 95 | gs.fitness_plot.floating_annot.xy = (gen, fitness) 96 | gs.fitness_plot.floating_annot.set_text(text) 97 | 98 | @classmethod 99 | def update_plot(cls, vis): 100 | """update the plots""" 101 | for cplot in gs.cloud_plots: 102 | cplot.annot.set_visible(vis) 103 | gs.fitness_plot.floating_annot.set_visible(vis) 104 | FigureControl.draw_all_cloud_plots() 105 | gs.fitness_plot.fig.canvas.draw_idle() 106 | 107 | @classmethod 108 | def update(cls, event, curve, preferred_idx): 109 | """update the plots and/or annotations""" 110 | cont, ind = curve.contains(event) 111 | if cont: 112 | idx = ind['ind'][preferred_idx] 113 | cls.update_annot(idx) 114 | cls.update_plot(True) 115 | elif gs.fitness_plot.floating_annot.get_visible(): 116 | cls.update_plot(False) 117 | 118 | @classmethod 119 | def hover(cls, event): 120 | """mouse move event on plots""" 121 | if event.canvas == gs.fitness_plot.fig.canvas: 122 | if event.inaxes == gs.fitness_plot.ax: 123 | cls.update(event, gs.fitness_plot.curve, -1) 124 | else: 125 | cplot = gs.canvas2cloud_plot[event.canvas] 126 | if event.inaxes == cplot.main_ax: 127 | cls.update(event, cplot.main_curve, 0) 128 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_base/rollout_base.py: -------------------------------------------------------------------------------- 1 | """Rollout Base""" 2 | import tensorflow as tf 3 | import figure_base.settings as gs 4 | import numpy as np 5 | import sys 6 | sys.path.append("..") 7 | from es_distributed.es import SharedNoiseTable 8 | from gym import wrappers 9 | 10 | 11 | class RolloutBase(): 12 | @classmethod 13 | def make_env(cls, *args, **kwargs): 14 | raise NotImplementedError 15 | 16 | @classmethod 17 | def setup_policy(cls, *args, **kwargs): 18 | raise NotImplementedError 19 | 20 | @classmethod 21 | def print_info(cls, *args, **kwargs): 22 | pass 23 | 24 | @classmethod 25 | def post_process(cls, *args, **kwargs): 26 | return True 27 | 28 | @classmethod 29 | def setup_and_rollout_policy(cls, policy_file, thisData, *, 30 | noise_stdev=0, num_rollouts=1, fixed_seed=None, 31 | render=False, path=None, record=None, bc_choice=None): 32 | if gs.noise is None: 33 | gs.noise = SharedNoiseTable() 34 | 35 | env = cls.make_env() 36 | env.reset() 37 | tf.reset_default_graph() 38 | if path and record: 39 | env = wrappers.Monitor(env, path + record, force=True) 40 | 41 | 42 | result = [] 43 | 44 | with tf.Session(): 45 | pi = cls.setup_policy(policy_file, thisData, noise_stdev, path) 46 | for _ in range(0, num_rollouts): 47 | if fixed_seed: 48 | seed = fixed_seed 49 | else: 50 | seed = np.random.randint(2**31-1) 51 | 52 | if bc_choice: 53 | rews, t, novelty_vec = pi.rollout(env, render=render, 54 | random_stream=np.random.RandomState(), policy_seed=seed, bc_choice=bc_choice) 55 | else: 56 | rews, t, novelty_vec = pi.rollout(env, render=render, 57 | random_stream=np.random.RandomState(), policy_seed=seed) 58 | cls.print_info(seed, rews, t, novelty_vec) 59 | result.append((seed, rews, t, novelty_vec)) 60 | env.close() 61 | return cls.post_process(env, result) 62 | 63 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_base/settings.py: -------------------------------------------------------------------------------- 1 | """global data structure""" 2 | from colour import Color 3 | 4 | 5 | gen2sorted_indices = {} # Global sorted index for sorting BCs 6 | fitness_plot = None 7 | cloud_plots = set() 8 | canvas2cloud_plot = {} # Figure canvas to figure object 9 | 10 | noise = None # Global Noise Table 11 | numBins = 5 # Number of Color Bins for ColorBar 12 | assert numBins > 1 13 | 14 | COLORS = [ 15 | (Color('#f9d9d9'), Color('#d61515')), # red 16 | (Color('#d9ddfb'), Color('#0b1667')), # blue 17 | (Color('#9aecb8'), Color('#045c24')), # green 18 | (Color('#ffbef9'), Color('#ce00bb')), # pink 19 | (Color('#d0d0d0'), Color('#000000')), # black 20 | (Color('#f2d6b9'), Color('#996633')), # brown 21 | (Color('#d5b2ec'), Color('#9900FF')), # purple 22 | (Color('#baffff'), Color('#009999')), # teel 23 | (Color('#ffb27e'), Color('#fb6500')), # orange 24 | (Color('#beffcf'), Color('#33FF66')), # lime green 25 | ] 26 | 27 | COLOR_HEX_LISTS = [] 28 | for color in COLORS: 29 | color_gradient = color[0].range_to(color[1], numBins) 30 | hex_list = [c.get_hex_l() for c in color_gradient] 31 | COLOR_HEX_LISTS.append(hex_list) 32 | 33 | numColors = len(COLOR_HEX_LISTS) 34 | 35 | MARKERS = [ 36 | 'D', 'o', 'v', 's', '^', '<', 37 | '>', '*', 'h', 'H', 'd', 'X' 38 | ] 39 | numMarkers = len(MARKERS) 40 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_custom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/deepneuroevolution/visual_inspector/figure_custom/__init__.py -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_custom/cloud_figures_custom.py: -------------------------------------------------------------------------------- 1 | """Customerized Cloud Figures""" 2 | from figure_base.cloud_figures import CloudPlot 3 | 4 | 5 | class CloudPlotHDBC(CloudPlot): 6 | """Cloud plot to show trajectory as Hi Dim BCs""" 7 | def __init__(self, *args, **kwargs): 8 | CloudPlot.__init__(self, *args, **kwargs) 9 | self.hd_bc, = self.main_ax.plot([], [], color='k', linewidth=3) 10 | 11 | def show_new_labels_dp(self, thisData): 12 | CloudPlot.show_new_labels_dp(self, thisData) 13 | self.hd_bc.set_data(thisData.x, thisData.y) 14 | 15 | def clear_labels(self): 16 | CloudPlot.clear_labels(self) 17 | self.hd_bc.set_data([], []) 18 | 19 | class CloudPlotRollout(CloudPlot): 20 | """Cloud plot with policy rollout""" 21 | def __init__(self, *args, **kwargs): 22 | CloudPlot.__init__(self, *args, **kwargs) 23 | self.traj_plots = [] 24 | 25 | def button_3(self, artist, ind): 26 | from figure_custom.rollout_trajectory import rolloutMaker 27 | print("rolling out!!") 28 | gen = self.artist2gen[artist] 29 | this_data = self.fetch_data_point(artist, ind) 30 | if self.get_policy_file(gen) != None: 31 | self.traj_plots.append(rolloutMaker(gen, this_data, self)) 32 | 33 | class CloudPlotRolloutAtari(CloudPlot): 34 | """Cloud plot with policy rollout""" 35 | 36 | def button_3(self, artist, ind): 37 | from figure_custom.rollout_custom import RolloutAtari 38 | print("rolling out!!") 39 | gen = self.artist2gen[artist] 40 | print(gen) 41 | this_data = self.fetch_data_point(artist, ind) 42 | policy_file = self.get_policy_file(gen) 43 | if policy_file is None: 44 | return 45 | noise_stdev = self.get_parent_op_data(gen)[-1] 46 | 47 | if this_data.parentOrNot: 48 | seed = int(self.get_parent_op_data(gen)[-2]) 49 | print(self.get_parent_op_data(gen)) 50 | else: 51 | seed = int(this_data.child_op_data[-2]) 52 | print(this_data.child_op_data) 53 | 54 | x, y, f = this_data.x[-1], this_data.y[-1], this_data.fitness 55 | record = "snapshots/snapshot_gen_{:04}/clips/x_{:.2f}_y_{:.2f}_f{:.2f}".format( 56 | this_data.gen, x, y, f) 57 | RolloutAtari.setup_and_rollout_policy(policy_file, this_data, 58 | noise_stdev=noise_stdev, fixed_seed=seed, 59 | render=True, path=self.path, record=record) 60 | 61 | 62 | import subprocess 63 | subprocess.call(["open {}/*.mp4".format(self.path+record)], shell=True) 64 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_custom/rollout_custom.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import figure_base.settings as gs 3 | from figure_base.rollout_base import RolloutBase 4 | 5 | 6 | class RolloutAtari(RolloutBase): 7 | @classmethod 8 | def make_env(self): 9 | from es_distributed.atari_wrappers import wrap_deepmind 10 | env_id = "FrostbiteNoFrameskip-v4" 11 | env = gym.make(env_id) 12 | env = wrap_deepmind(env) 13 | return env 14 | 15 | @classmethod 16 | def setup_policy(cls, policy_file, thisData, noise_stdev, path): 17 | from es_distributed.policies import ESAtariPolicy 18 | import pickle 19 | 20 | pi = ESAtariPolicy.Load(policy_file, extra_kwargs=None) 21 | 22 | iteration = thisData.gen 23 | rb_file = path+"/snapshots/snapshot_gen_{:04}/snapshot_parent_{:04d}_rb.p".format(iteration, iteration) 24 | rb_saved = pickle.load( open( rb_file, "rb" ) ) 25 | pi.set_ref_batch(rb_saved) 26 | 27 | if not thisData.parentOrNot: 28 | noiseIdx, seed, noiseSign = thisData.child_op_data[-3:].astype(int) 29 | print(noiseIdx, seed, noiseSign) 30 | theta = pi.get_trainable_flat() + noiseSign * noise_stdev * gs.noise.get(noiseIdx, pi.num_params) 31 | pi.set_trainable_flat(theta) 32 | 33 | return pi 34 | 35 | @classmethod 36 | def print_info(cls, seed, rews, t, novelty_vec): 37 | print('return={:.4f} len={}'.format(rews.sum(), t)) 38 | 39 | @classmethod 40 | def post_process(cls, env, result): 41 | ram = env.unwrapped._get_ram() 42 | print(ram) 43 | return True 44 | 45 | class RolloutMujoco(RolloutBase): 46 | @classmethod 47 | def make_env(cls): 48 | env = gym.make('Humanoid-v1') 49 | return env 50 | 51 | @classmethod 52 | def setup_policy(cls, policy_file, thisData, noise_stdev, path): 53 | from es_distributed.policies import MujocoPolicy 54 | pi = MujocoPolicy.Load(policy_file, extra_kwargs=None) 55 | if not thisData.parentOrNot: 56 | noiseIdx, noiseSign = thisData.child_op_data[1:3].astype(int) 57 | theta = pi.get_trainable_flat() + noiseSign * noise_stdev * gs.noise.get(noiseIdx, pi.num_params) 58 | pi.set_trainable_flat(theta) 59 | return pi 60 | 61 | @classmethod 62 | def get_x_y_death_from_humanoid_bc(cls, bc): 63 | idx_last_x, idx_last_y = int(len(bc) / 2 - 1), -1 64 | x_coord, y_coord = bc[idx_last_x], bc[idx_last_y] 65 | return x_coord, y_coord 66 | 67 | @classmethod 68 | def print_info(cls, seed, rews, t, novelty_vec): 69 | x_coord, y_coord = cls.get_x_y_death_from_humanoid_bc(novelty_vec) 70 | print('seed={} x = {:.6f} y = {:.6f} reward={:.8f} len={}'.format( 71 | seed, x_coord, y_coord, rews.sum(), t) 72 | ) 73 | 74 | 75 | @classmethod 76 | def post_process(cls, env, result): 77 | xs, ys, ts, scores, seeds = [], [], [], [], [] 78 | for r in result: 79 | seed, rews, _, novelty_vec = r 80 | x_coord, y_coord = cls.get_x_y_death_from_humanoid_bc(novelty_vec) 81 | xs.append(x_coord) 82 | ys.append(y_coord) 83 | ts.append(novelty_vec) 84 | scores.append(rews.sum()) 85 | seeds.append(seed) 86 | return xs, ys, ts, scores, seeds 87 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/figure_custom/rollout_trajectory.py: -------------------------------------------------------------------------------- 1 | """rollout and obtain the trajectories""" 2 | import time 3 | import matplotlib.pyplot as p 4 | from figure_base.figure_control import FigureControl 5 | import figure_base.settings as gs 6 | import numpy as np 7 | from figure_custom.rollout_custom import RolloutMujoco 8 | 9 | def extract_traj(traj): 10 | """extract the trajectory""" 11 | length = int(len(traj) / 2) 12 | tx, ty = traj[0:length], traj[length:] 13 | tx = np.insert(tx, 0, 0.) 14 | ty = np.insert(ty, 0, 0.) 15 | return tx, ty 16 | 17 | 18 | class rolloutMaker(): 19 | def __init__(self, gen, thisData, cloud_plot): 20 | self.fig = p.figure() 21 | self.thisData = thisData 22 | self.ax_list, self.artist_list = [], [] 23 | self.artist_fixed = None 24 | self.selected_rollout = None 25 | self.policy_file = cloud_plot.get_policy_file(gen) 26 | self.noise_stdev, = cloud_plot.get_parent_op_data(gen) 27 | print(self.noise_stdev) 28 | 29 | self.fixed_seed = None 30 | if not thisData.parentOrNot and len(thisData.child_op_data) > 0: 31 | self.fixed_seed = thisData.child_op_data[0] 32 | 33 | self.fxs, self.fys, self.fts, self.fscores, self.fseeds = None, None, None, None, None 34 | if self.fixed_seed: 35 | self.fxs, self.fys, self.fts, self.fscores, self.fseeds = RolloutMujoco.setup_and_rollout_policy(self.policy_file, thisData, noise_stdev=self.noise_stdev, 36 | fixed_seed=int(self.fixed_seed), bc_choice="traj") 37 | 38 | self.xs, self.ys, self.ts, self.scores, self.seeds = None, None, None, None, None 39 | if self.fixed_seed is None or FigureControl.offspring_stochastic: 40 | self.xs, self.ys, self.ts, self.scores, self.seeds = RolloutMujoco.setup_and_rollout_policy(self.policy_file, thisData, noise_stdev=self.noise_stdev, 41 | num_rollouts=9, bc_choice="traj") 42 | 43 | self.ax1 = p.subplot2grid((3, 6), (0, 0), rowspan=3, colspan=3) 44 | self.ax1.plot(0, 0, 'ro', markersize=12, label="Origin") 45 | #self.ax1.plot(thisData.x[-1], thisData.y[-1], 'bo', markersize=12, label="Final (Fixed Seed)") 46 | self.ax1.grid(True) 47 | 48 | if self.fxs: 49 | self.artist_fixed, = self.ax1.plot(self.fxs, self.fys, 'bo', markersize=12, picker=5, label="Final (Fixed Seed)") 50 | traj = self.fts[0] 51 | tx, ty = extract_traj(traj) 52 | self.ax1.plot(tx, ty, 'b--') 53 | 54 | if self.xs: 55 | for idx, traj in enumerate(self.ts): 56 | tx, ty = extract_traj(traj) 57 | label_words = "Final (Random Seed)" if idx == 0 else None 58 | pt, = self.ax1.plot(self.xs[idx], self.ys[idx], 'C1X', markersize=12, picker=5, label=label_words) 59 | self.artist_list.append(pt) 60 | annot=self.ax1.annotate(idx+1, xy=(self.xs[idx], self.ys[idx]), xytext=(5,5),textcoords="offset points") 61 | annot.set_fontsize(16) 62 | annot.set_color('r') 63 | self.ax1.plot(tx, ty, 'C{}'.format(idx%10)) 64 | 65 | ax2 = p.subplot2grid((3, 6), (int(idx/3), idx%3+3)) 66 | ax2.plot(0, 0, 'ro', markersize=10) 67 | ax2.plot(self.fxs, self.fys, 'bo', markersize=10) 68 | ax2.plot(tx[-1], ty[-1], 'C1X', markersize=10) 69 | ax2.plot(tx, ty, 'C{}'.format(idx%10)) 70 | 71 | left, right = ax2.get_xlim() 72 | bottom, top = ax2.get_ylim() 73 | ax2.text(0.5*(left+right), 0.5*(bottom+top), '{}'.format(idx+1), 74 | horizontalalignment='center', 75 | verticalalignment='center', 76 | fontsize=32, color='red', alpha=0.5) 77 | 78 | ax2.grid(True) 79 | self.ax_list.append(ax2) 80 | self.ax1.legend() 81 | #self.ax1.set_xlim(cloud_plot.xlim) 82 | #self.ax1.set_ylim(cloud_plot.ylim) 83 | self.fig.canvas.mpl_connect('button_press_event', self.on_press) 84 | self.fig.canvas.mpl_connect('pick_event', self.on_pick) 85 | 86 | self.fig.show() 87 | 88 | def on_pick(self, event): 89 | thisevent = event.mouseevent 90 | thisArtist = event.artist 91 | self.reset() 92 | if self.artist_fixed and thisArtist == self.artist_fixed: 93 | print("you pick the fixed seed") 94 | self.fig.suptitle("x:{:.6f} y:{:.6f} fitness:{:.8f}".format(self.fxs[0], 95 | self.fys[0], self.fscores[0])) 96 | self.artist_fixed.set_markersize(18) 97 | else: 98 | for i, art_sub in enumerate(self.artist_list): 99 | if thisArtist == art_sub: 100 | self.select(i) 101 | break 102 | self.fig.canvas.draw() 103 | if thisevent.button == 3: 104 | if self.selected_rollout != None: 105 | RolloutMujoco.setup_and_rollout_policy(self.policy_file, self.thisData, 106 | noise_stdev=self.noise_stdev, 107 | fixed_seed=self.seeds[self.selected_rollout], render=True) 108 | else: 109 | RolloutMujoco.setup_and_rollout_policy(self.policy_file, self.thisData, 110 | noise_stdev=self.noise_stdev, 111 | fixed_seed=int(self.fixed_seed), render=True) 112 | 113 | def reset(self): 114 | if self.artist_fixed: 115 | self.artist_fixed.set_markersize(12) 116 | self.fig.suptitle("") 117 | if self.selected_rollout != None: 118 | rIdx = self.selected_rollout 119 | self.artist_list[rIdx].set_markersize(12) 120 | self.ax_list[rIdx].set_facecolor('1') 121 | self.selected_rollout = None 122 | 123 | def select(self, rIdx): 124 | self.fig.suptitle("#{} x:{:.6f} y:{:.6f} fitness:{:.8f}".format(rIdx+1, self.xs[rIdx], self.ys[rIdx], self.scores[rIdx])) 125 | self.artist_list[rIdx].set_markersize(18) 126 | self.ax_list[rIdx].set_facecolor('0.9') 127 | self.selected_rollout = rIdx 128 | 129 | def on_press(self, event): 130 | print('you pressed', event.button, event.xdata, event.ydata) 131 | ax_on_press = event.inaxes 132 | if ax_on_press == self.ax1: 133 | return 134 | 135 | self.reset() 136 | 137 | if ax_on_press: 138 | for i, ax_sub in enumerate(self.ax_list): 139 | if ax_on_press == ax_sub: 140 | self.select(i) 141 | break 142 | self.fig.canvas.draw() 143 | 144 | #print(event.button, self.selected_rollout) 145 | if event.button == 3 and self.selected_rollout != None: 146 | RolloutMujoco.setup_and_rollout_policy(self.policy_file, self.thisData, 147 | noise_stdev=self.noise_stdev, 148 | fixed_seed=self.seeds[self.selected_rollout], render=True) 149 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/main_atari.py: -------------------------------------------------------------------------------- 1 | """interactive analysis""" 2 | import click 3 | import matplotlib.pyplot as p 4 | import figure_base.settings as gs 5 | from figure_base.figure_control import FigureControl 6 | from figure_base.fitness_figures import FitnessPlot 7 | from figure_custom.cloud_figures_custom import CloudPlotHDBC, CloudPlotRolloutAtari 8 | 9 | 10 | @click.command() 11 | @click.argument('start_iter', nargs=1) 12 | @click.argument('end_iter', nargs=1) 13 | @click.argument('snapshots_path', nargs=-1) 14 | @click.option('--visible_range', help='Up to how many generations visible on one plot.') 15 | @click.option('--hi_dim_bc', type=(str, int), default=(None, None), 16 | help='Path to high-dimensional (> 2-D) BC and its dimension') 17 | def main(start_iter, end_iter, snapshots_path, visible_range, hi_dim_bc): 18 | """ 19 | START_ITER: Plot data that begins at this iteration (generation)\n 20 | END_ITER: Plot data that ends at this iteration (generation)\n 21 | SNAPSHOTS_PATH: Path(s) to One or multiple 2-D BCs 22 | """ 23 | start_iter = int(start_iter) 24 | end_iter = int(end_iter) 25 | 26 | FigureControl.init(start_iter, end_iter, visible_range) 27 | 28 | for idx, path in enumerate(snapshots_path): 29 | print("Generating Cloud Plot {} from {}".format(idx, path)) 30 | cplot = CloudPlotRolloutAtari("Cloud Plot {} ({})".format(idx, path), 31 | start_iter, end_iter, path, visible_range) 32 | gs.cloud_plots.add(cplot) 33 | gs.canvas2cloud_plot[cplot.fig.canvas] = cplot 34 | 35 | gs.fitness_plot = FitnessPlot("Fitness Plot", start_iter, end_iter, snapshots_path[0]) 36 | 37 | 38 | hbc_path, hbc_dim = hi_dim_bc 39 | if hbc_path != None and hbc_dim != None: 40 | print("Generating Cloud Plot H-D from {}".format(hbc_path)) 41 | hbcplot = CloudPlotHDBC("Cloud Plot {}-D BC ({})".format(hbc_dim, hbc_path), 42 | start_iter, end_iter, hbc_path, visible_range, hbc_dim) 43 | gs.cloud_plots.add(hbcplot) 44 | gs.canvas2cloud_plot[hbcplot.fig.canvas] = hbcplot 45 | 46 | p.show() 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/main_mujoco.py: -------------------------------------------------------------------------------- 1 | """interactive analysis""" 2 | import click 3 | import matplotlib.pyplot as p 4 | import figure_base.settings as gs 5 | from figure_base.figure_control import FigureControl 6 | from figure_base.fitness_figures import FitnessPlot 7 | from figure_custom.cloud_figures_custom import CloudPlotHDBC, CloudPlotRollout 8 | 9 | 10 | @click.command() 11 | @click.argument('start_iter', nargs=1) 12 | @click.argument('end_iter', nargs=1) 13 | @click.argument('snapshots_path', nargs=-1) 14 | @click.option('--visible_range', help='Up to how many generations visible on one plot.') 15 | @click.option('--hi_dim_bc', type=(str, int), default=(None, None), 16 | help='Path to high-dimensional (> 2-D) BC and its dimension') 17 | def main(start_iter, end_iter, snapshots_path, visible_range, hi_dim_bc): 18 | """ 19 | START_ITER: Plot data that begins at this iteration (generation)\n 20 | END_ITER: Plot data that ends at this iteration (generation)\n 21 | SNAPSHOTS_PATH: Path(s) to One or multiple 2-D BCs 22 | """ 23 | start_iter = int(start_iter) 24 | end_iter = int(end_iter) 25 | 26 | FigureControl.init(start_iter, end_iter, visible_range) 27 | 28 | for idx, path in enumerate(snapshots_path): 29 | print("Generating Cloud Plot {} from {}".format(idx, path)) 30 | cplot = CloudPlotRollout("Cloud Plot {} ({})".format(idx, path), 31 | start_iter, end_iter, path, visible_range) 32 | gs.cloud_plots.add(cplot) 33 | gs.canvas2cloud_plot[cplot.fig.canvas] = cplot 34 | 35 | gs.fitness_plot = FitnessPlot("Fitness Plot", start_iter, end_iter, snapshots_path[0]) 36 | 37 | 38 | hbc_path, hbc_dim = hi_dim_bc 39 | if hbc_path != None and hbc_dim != None: 40 | print("Generating Cloud Plot H-D from {}".format(hbc_path)) 41 | hbcplot = CloudPlotHDBC("Cloud Plot {}-D BC ({})".format(hbc_dim, hbc_path), 42 | start_iter, end_iter, hbc_path, visible_range, hbc_dim) 43 | gs.cloud_plots.add(hbcplot) 44 | gs.canvas2cloud_plot[hbcplot.fig.canvas] = hbcplot 45 | 46 | p.show() 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/process_bc.py: -------------------------------------------------------------------------------- 1 | """interactive analysis""" 2 | import click 3 | from dimen_red.assemble import assemble 4 | from dimen_red.reduce import reduce_dim 5 | from dimen_red.disassemble import disassemble 6 | import numpy as np 7 | 8 | @click.command() 9 | @click.argument('start_iter', nargs=1) 10 | @click.argument('end_iter', nargs=1) 11 | @click.argument('snapshots_path', nargs=1) 12 | @click.argument('bc_dim', nargs=1) 13 | @click.option('--method', default='pca', 14 | help='Methods of dimensionality reduction or downsampling.') 15 | @click.option('--downsampling_ratio', default=1.0, 16 | help='Downsampling ratio (<1) when method=downsampling.') 17 | @click.option('--copy_files', 18 | help='Files to copy over. Support Unix-style wildcards, separated in spaces') 19 | def main(start_iter, end_iter, snapshots_path, bc_dim, method, downsampling_ratio, copy_files): 20 | """ 21 | Apply dimensionality reduction or downsampling to hi-dimensional data. 22 | 23 | START_ITER: Process data that begins at this iteration (generation)\n 24 | END_ITER: Process data that ends at this iteration (generation)\n 25 | SNAPSHOTS_PATH: Path to hi-dimensional BC 26 | """ 27 | start_iter, end_iter, bc_dim = int(start_iter), int(end_iter), int(bc_dim) 28 | 29 | if method != 'downsampling': 30 | downsampling_ratio = 1.0 31 | 32 | #step 1: Assemble hi-D BCs from all generations 33 | X, p_opt, ch_opt, num_os_per_gen, labels = assemble(start_iter, end_iter, snapshots_path, 34 | bc_dim=bc_dim, ds_ratio=downsampling_ratio) 35 | print('Assembling Completed! X.shape={} #OS_per_gen={}'.format(X.shape, num_os_per_gen)) 36 | c_labels = np.round(labels/100) 37 | 38 | print(c_labels, c_labels.shape) 39 | X = X / 255.0 40 | #step 2: Hi-D BCs to 2-D BCs if method != downsampling 41 | X_r = reduce_dim(X, labels=np.ravel(c_labels), method=method) 42 | 43 | #step 3: Disassemble reduced BCs into each generation 44 | search_patterns = None 45 | if copy_files is not None: 46 | search_patterns = copy_files.split(' ') 47 | 48 | disassemble(X_r, p_opt, ch_opt, method, 49 | start_iter=start_iter, end_iter=end_iter, path=snapshots_path, 50 | chunk=1+num_os_per_gen, copy_file_patterns=search_patterns) 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0090/snapshot_parent_0090.dat: -------------------------------------------------------------------------------- 1 | 22.6325171264 -1.74513390999 6507.56591797 0.02 2 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0091/snapshot_parent_0091.dat: -------------------------------------------------------------------------------- 1 | 23.738028366 -2.08385136894 6592.18505859 0.02 2 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0092/snapshot_parent_0092.dat: -------------------------------------------------------------------------------- 1 | 23.9506591621 -2.33097182707 6609.68261719 0.02 2 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0093/snapshot_parent_0093.dat: -------------------------------------------------------------------------------- 1 | 24.5922858277 -2.54194614284 6658.01367188 0.02 2 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0094/snapshot_parent_0094.dat: -------------------------------------------------------------------------------- 1 | 25.7124246286 -2.49096325737 6749.34375 0.02 2 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0095/snapshot_parent_0095.dat: -------------------------------------------------------------------------------- 1 | 26.314792498 -2.17572508629 6798.12207031 0.02 2 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0096/snapshot_parent_0096.dat: -------------------------------------------------------------------------------- 1 | 26.5435120513 -1.91470031378 6814.58154297 0.02 2 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0097/snapshot_parent_0097.dat: -------------------------------------------------------------------------------- 1 | 27.1223119141 -1.91108718848 6860.91601562 0.02 2 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0098/snapshot_parent_0098.dat: -------------------------------------------------------------------------------- 1 | 27.8368530397 -1.66141369742 6920.83349609 0.02 2 | -------------------------------------------------------------------------------- /deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0099/snapshot_parent_0099.dat: -------------------------------------------------------------------------------- 1 | 28.2299172297 -1.29749504234 6955.35058594 0.02 2 | --------------------------------------------------------------------------------