├── .gitignore
├── MAML
    ├── DockerFile.subm
    ├── README.md
    ├── all.csv
    ├── config.yaml
    ├── config_debug.yaml
    ├── config_test.yaml
    ├── config_test_local.yaml
    ├── config_train.yaml
    ├── make_image.sh
    ├── meta_learner.py
    ├── model.py
    ├── requirements.txt
    ├── setup.sh
    ├── sonic_utils.py
    ├── start_workers.sh
    ├── test.sh
    ├── train.py
    ├── train_debug.csv
    ├── train_large.csv
    ├── train_small.csv
    ├── utils.py
    ├── validation.csv
    └── worker.py
├── README.md
├── actor_critic
    ├── actor_critic_test.py
    ├── actor_critic_train.py
    ├── envs.py
    └── weights
    │   └── actor_critic_sonic1.pt
├── baseline
    ├── README.md
    ├── all.csv
    ├── config.yaml
    ├── config_test.yaml
    ├── config_train.yaml
    ├── jerk.docker
    ├── jerk_agent.py
    ├── make_image.sh
    ├── ppo2.docker
    ├── ppo2.subm.docker
    ├── ppo2_agent.py
    ├── rainbow.docker
    ├── rainbow_agent.py
    ├── requirements.txt
    ├── roms
    │   └── .gitkeep
    ├── setup.sh
    ├── simple-agent.docker
    ├── simple-agent.py
    ├── sonic_util.py
    ├── test.sh
    ├── train.sh
    ├── train_large.csv
    ├── train_nodocker.sh
    ├── train_small.csv
    ├── utils.py
    └── validation.csv
└── deepneuroevolution
    ├── README.md
    ├── configurations
        ├── frostbite_es.json
        ├── frostbite_ga.json
        ├── frostbite_nses.json
        ├── frostbite_nsres.json
        ├── humanoid.json
        ├── humanoid_nses.json
        ├── humanoid_nsres.json
        ├── sonic_es.json
        ├── sonic_ga.json
        └── sonic_nsres.json
    ├── es_distributed
        ├── __init__.py
        ├── atari_wrappers.py
        ├── dist.py
        ├── es.py
        ├── es_modified.py
        ├── ga.py
        ├── ga_modified.py
        ├── main.py
        ├── nses.py
        ├── optimizers.py
        ├── policies.py
        ├── rs.py
        ├── tabular_logger.py
        └── tf_util.py
    ├── extra
        └── humanoid_maze.xml
    ├── gpu_implementation
        ├── README.md
        ├── configurations
        │   ├── es_atari_config.json
        │   ├── ga_atari_config.json
        │   └── rs_atari_config.json
        ├── es.py
        ├── ga.py
        ├── gym_tensorflow
        │   ├── Makefile
        │   ├── README.md
        │   ├── __init__.py
        │   ├── atari
        │   │   ├── README.md
        │   │   ├── __init__.py
        │   │   ├── tf_atari.cpp
        │   │   └── tf_atari.py
        │   ├── maze
        │   │   ├── __init__.py
        │   │   ├── hard_maze.txt
        │   │   ├── hard_maze.txt.npy
        │   │   ├── maze.h
        │   │   ├── tf_maze.cpp
        │   │   └── tf_maze.py
        │   ├── ops
        │   │   ├── __init__.py
        │   │   └── indexedmatmul.cpp
        │   ├── tf_env.cpp
        │   ├── tf_env.h
        │   ├── tf_env.py
        │   └── wrappers
        │   │   ├── __init__.py
        │   │   └── stack_frames.py
        ├── neuroevolution
        │   ├── __init__.py
        │   ├── concurrent_worker.py
        │   ├── display.py
        │   ├── distributed_helpers.py
        │   ├── helper.py
        │   ├── models
        │   │   ├── __init__.py
        │   │   ├── base.py
        │   │   ├── batchnorm.py
        │   │   ├── dqn.py
        │   │   ├── dqn_xavier.py
        │   │   └── simple.py
        │   ├── optimizers.py
        │   └── tf_util.py
        └── tabular_logger.py
    ├── redis_config
        ├── redis_local_mirror.conf
        └── redis_master.conf
    ├── requirements.txt
    ├── scripts
        ├── dependency.sh
        ├── ec2ctl
        ├── launch.py
        ├── local_env_setup.sh
        ├── local_run_exp.sh
        ├── local_run_redis.sh
        ├── packer.json
        └── viz.py
    ├── train_large.csv
    ├── train_small.csv
    ├── train_spring_yard.csv
    └── visual_inspector
        ├── README.md
        ├── dimen_red
            ├── assemble.py
            ├── disassemble.py
            └── reduce.py
        ├── figure_base
            ├── __init__.py
            ├── buttons.py
            ├── cloud_figures.py
            ├── figure_control.py
            ├── fitness_figures.py
            ├── load_data.py
            ├── mouse_event.py
            ├── rollout_base.py
            └── settings.py
        ├── figure_custom
            ├── __init__.py
            ├── cloud_figures_custom.py
            ├── rollout_custom.py
            └── rollout_trajectory.py
        ├── main_atari.py
        ├── main_mujoco.py
        ├── process_bc.py
        └── sample_data
            └── mujoco
                └── final_xy_bc
                    └── snapshots
                        ├── snapshot_gen_0090
                            ├── snapshot_offspring_0090.dat
                            └── snapshot_parent_0090.dat
                        ├── snapshot_gen_0091
                            ├── snapshot_offspring_0091.dat
                            └── snapshot_parent_0091.dat
                        ├── snapshot_gen_0092
                            ├── snapshot_offspring_0092.dat
                            └── snapshot_parent_0092.dat
                        ├── snapshot_gen_0093
                            ├── snapshot_offspring_0093.dat
                            └── snapshot_parent_0093.dat
                        ├── snapshot_gen_0094
                            ├── snapshot_offspring_0094.dat
                            └── snapshot_parent_0094.dat
                        ├── snapshot_gen_0095
                            ├── snapshot_offspring_0095.dat
                            └── snapshot_parent_0095.dat
                        ├── snapshot_gen_0096
                            ├── snapshot_offspring_0096.dat
                            └── snapshot_parent_0096.dat
                        ├── snapshot_gen_0097
                            ├── snapshot_offspring_0097.dat
                            └── snapshot_parent_0097.dat
                        ├── snapshot_gen_0098
                            ├── snapshot_offspring_0098.dat
                            └── snapshot_parent_0098.dat
                        └── snapshot_gen_0099
                            ├── snapshot_offspring_0099.dat
                            └── snapshot_parent_0099.dat


/.gitignore:
--------------------------------------------------------------------------------
  1 | baseline/roms/Sonic*
  2 | *.pkl
  3 | */results
  4 | baseline/logs
  5 | .ipynb_checkpoints
  6 | *.ipynb
  7 | *.pt
  8 | logs/
  9 | 
 10 | # Byte-compiled / optimized / DLL files
 11 | __pycache__/
 12 | *.py[cod]
 13 | *$py.class
 14 | *.ipynb
 15 | *.tar.gz
 16 | 
 17 | # C extensions
 18 | *.so
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | env/
 23 | build/
 24 | develop-eggs/
 25 | dist/
 26 | downloads/
 27 | eggs/
 28 | .eggs/
 29 | lib/
 30 | lib64/
 31 | parts/
 32 | sdist/
 33 | var/
 34 | wheels/
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | .hypothesis/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # SageMath parsed files
 91 | *.sage.py
 92 | 
 93 | # dotenv
 94 | .env
 95 | 
 96 | # virtualenv
 97 | .venv
 98 | venv/
 99 | ENV/
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # mkdocs documentation
109 | /site
110 | 
111 | # mypy
112 | .mypy_cache/
113 | 
114 | .idea
115 | 


--------------------------------------------------------------------------------
/MAML/DockerFile.subm:
--------------------------------------------------------------------------------
 1 | FROM openai/retro-agent:pytorch
 2 | 
 3 | # Needed for OpenCV.
 4 | RUN apt-get update && \
 5 |     apt-get install -y libgtk2.0-dev && \
 6 |     rm -rf /var/lib/apt/lists/*
 7 | 
 8 | COPY requirements.txt /tmp
 9 | RUN . ~/venv/bin/activate && \
10 |     pip install -r /tmp/requirements.txt && \
11 |     pip install --no-deps git+https://github.com/fgvbrt/baselines.git@1659068fdeb5fd4859fa598634008a84afe3616e && \
12 |     git clone https://github.com/openai/retro-contest.git && cd retro-contest/support && \
13 |     pip install .
14 | 
15 | ADD *.py ./
16 | ADD *.yaml ./
17 | ADD test.sh ./
18 | ADD *.pt ./
19 | 
20 | CMD ["/bin/bash", "test.sh"]
21 | 


--------------------------------------------------------------------------------
/MAML/README.md:
--------------------------------------------------------------------------------
 1 | # Algorithms
 2 | 
 3 | Attempt to implement [MAML](https://arxiv.org/abs/1703.03400) and [REPTILE](https://arxiv.org/abs/1803.02999)
 4 | meta learning algorithms. 
 5 | 
 6 | # Setup
 7 | 
 8 | Setup environment with script setup.sh (you need conda for it otherwise change it)
 9 | 
10 | # Run training
11 | 
12 | Meta algo uses Pyro4 for distributed training, so you should read something about 
13 | it for example [here](https://pythonhosted.org/Pyro4/tutorials.html)
14 | 
15 | 
16 | Below is basic example how to run it locally
17 | 
18 | 1) start parameter server locally
19 |     
20 |         $ pyro4-ns &
21 | 
22 | 2) start workers (for example 6 workers locally) 
23 | 
24 |         $ ./start_workers.sh localhost 9000 6 localhost
25 | 
26 | 3) start meta learner
27 | 
28 |         $ python meta_learner.py --config config.yaml config_train.yaml
29 | 


--------------------------------------------------------------------------------
/MAML/all.csv:
--------------------------------------------------------------------------------
 1 | game,state
 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3
 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2
 4 | SonicTheHedgehog-Genesis,GreenHillZone.Act3
 5 | SonicTheHedgehog-Genesis,GreenHillZone.Act1
 6 | SonicTheHedgehog-Genesis,StarLightZone.Act2
 7 | SonicTheHedgehog-Genesis,StarLightZone.Act1
 8 | SonicTheHedgehog-Genesis,MarbleZone.Act2
 9 | SonicTheHedgehog-Genesis,MarbleZone.Act1
10 | SonicTheHedgehog-Genesis,MarbleZone.Act3
11 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2
12 | SonicTheHedgehog-Genesis,LabyrinthZone.Act2
13 | SonicTheHedgehog-Genesis,LabyrinthZone.Act1
14 | SonicTheHedgehog-Genesis,LabyrinthZone.Act3
15 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act1
16 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act2
17 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2
18 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act1
19 | SonicTheHedgehog2-Genesis,MetropolisZone.Act1
20 | SonicTheHedgehog2-Genesis,MetropolisZone.Act2
21 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1
22 | SonicTheHedgehog2-Genesis,OilOceanZone.Act2
23 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act2
24 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act1
25 | SonicTheHedgehog2-Genesis,HillTopZone.Act1
26 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1
27 | SonicTheHedgehog2-Genesis,WingFortressZone
28 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2
29 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act1
30 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2
31 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act2
32 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act1
33 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act1
34 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act2
35 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2
36 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act1
37 | SonicAndKnuckles3-Genesis,DeathEggZone.Act1
38 | SonicAndKnuckles3-Genesis,DeathEggZone.Act2
39 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1
40 | SonicAndKnuckles3-Genesis,SandopolisZone.Act1
41 | SonicAndKnuckles3-Genesis,SandopolisZone.Act2
42 | SonicAndKnuckles3-Genesis,HiddenPalaceZone
43 | SonicAndKnuckles3-Genesis,HydrocityZone.Act2
44 | SonicAndKnuckles3-Genesis,IcecapZone.Act1
45 | SonicAndKnuckles3-Genesis,IcecapZone.Act2
46 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act1
47 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act2
48 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act1
49 | SonicTheHedgehog-Genesis,SpringYardZone.Act1
50 | SonicTheHedgehog-Genesis,GreenHillZone.Act2
51 | SonicTheHedgehog-Genesis,StarLightZone.Act3
52 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act1
53 | SonicTheHedgehog2-Genesis,MetropolisZone.Act3
54 | SonicTheHedgehog2-Genesis,HillTopZone.Act2
55 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act2
56 | SonicAndKnuckles3-Genesis,LavaReefZone.Act1
57 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act2
58 | SonicAndKnuckles3-Genesis,HydrocityZone.Act1
59 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act2
60 | 


--------------------------------------------------------------------------------
/MAML/config.yaml:
--------------------------------------------------------------------------------
 1 | train_params:
 2 |   max_steps: 100000000
 3 |   lr: 0.0002
 4 |   lr_meta: 0.0002
 5 |   vf_coef: 0.5
 6 |   ent_coef: 0.01
 7 |   cliprange: 0.1
 8 |   gamma: 0.99
 9 |   lam: 0.95
10 |   n_steps: 4500
11 |   n_opt_epochs: 3
12 |   batch_size: 4500
13 |   max_grad_norm: 0.5
14 |   n_traj1: 3
15 |   n_traj2: 3
16 |   meta_algo: "maml" # reptile or maml
17 |   weights: "last.pt"
18 |   load_adam_params: "all"
19 |   ep_info_len: null
20 | 
21 | env_params:
22 |   exp_type: "x"
23 |   exp_const: 0.005
24 |   color: False
25 |   stack: 2
26 |   scale_rew: True
27 | 
28 | log:
29 |   log: True
30 |   log_interval: 1
31 |   save_interval: 10
32 |   log_dir: logs
33 |   save_last: True


--------------------------------------------------------------------------------
/MAML/config_debug.yaml:
--------------------------------------------------------------------------------
 1 | train_params:
 2 |   n_steps: 4500
 3 |   n_opt_epochs: 3
 4 |   batch_size: 4500
 5 |   n_traj1: 1
 6 |   n_traj2: 1
 7 | 
 8 | env_params:
 9 |   game_states: "train_debug.csv"
10 |   max_episode_steps: 4500


--------------------------------------------------------------------------------
/MAML/config_test.yaml:
--------------------------------------------------------------------------------
 1 | train_params:
 2 |   max_steps: 10000000
 3 |   weights: "last.pt"
 4 |   ep_info_len: null
 5 | 
 6 | env_params:
 7 |   socket_dir: "tmp/sock"
 8 | 
 9 | log:
10 |   log: True
11 |   log_dir: null


--------------------------------------------------------------------------------
/MAML/config_test_local.yaml:
--------------------------------------------------------------------------------
 1 | train_params:
 2 |   max_steps: 500000
 3 |   weights: "last.pt"
 4 | 
 5 | env_params:
 6 |   game_states: "validation.csv"
 7 |   max_episode_steps: 4500
 8 | 
 9 | log:
10 |   log: False
11 |   log_dir: null


--------------------------------------------------------------------------------
/MAML/config_train.yaml:
--------------------------------------------------------------------------------
1 | train_params:
2 |   ep_info_len: 100
3 | 
4 | env_params:
5 |   game_states: "train_large.csv"
6 |   max_episode_steps: 4500


--------------------------------------------------------------------------------
/MAML/make_image.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | DOCKER_REGISTRY="retrocontestrtidfxqehvzsuwpo.azurecr.io"
3 | docker build -f DockerFile.subm -t $DOCKER_REGISTRY/$1 .


--------------------------------------------------------------------------------
/MAML/meta_learner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.environ['OMP_NUM_THREADS'] = '1'
  3 | 
  4 | import Pyro4
  5 | import train
  6 | import pickle
  7 | import sonic_utils
  8 | from model import CNNPolicy
  9 | import utils
 10 | from time import sleep
 11 | import torch
 12 | from pathlib import Path
 13 | torch.set_num_threads(1)
 14 | 
 15 | 
 16 | def find_workers(prefix):
 17 |     workers = []
 18 |     with Pyro4.locateNS() as ns:
 19 |         for sampler, sampler_uri in ns.list(prefix="{}.".format(prefix)).items():
 20 |             print("found {}".format(prefix), sampler)
 21 |             workers.append(Pyro4.Proxy(sampler_uri))
 22 |     if not workers:
 23 |         raise ValueError("no {} found!".format(prefix))
 24 |     print('found total {} {}s'.format(len(workers), prefix))
 25 |     return workers
 26 | 
 27 | 
 28 | def init_workers(workers, config, weights):
 29 |     results = []
 30 |     print('start workers initialization')
 31 |     for worker in workers:
 32 |         res = Pyro4.Future(worker.initialize)(config, pickle.dumps(weights))
 33 |         results.append(res)
 34 | 
 35 |     while len(results) > 0:
 36 |         for res in results:
 37 |             if res.ready:
 38 |                 results.remove(res)
 39 | 
 40 |     print('finish workers initialization')
 41 | 
 42 | 
 43 | def wait_run_end(workers_results, model, timeout=None):
 44 |     # TODO: use timeout
 45 |     weights = pickle.dumps(model.get_weights())
 46 | 
 47 |     for w, res in workers_results.items():
 48 | 
 49 |         while not res.ready:
 50 |             sleep(1)
 51 | 
 52 |         res = utils.unpickle(res.value)
 53 |         grads = res["grads"]
 54 |         model.add_grads(grads)
 55 | 
 56 |         new_res = Pyro4.Future(w.run)(weights)
 57 |         workers_results[w] = new_res
 58 | 
 59 | 
 60 | def run_maml(args):
 61 |     config = utils.load_config(args.config)
 62 |     train_params = config["train_params"]
 63 | 
 64 |     # open and close env just to get right action and obs space
 65 |     env = sonic_utils.make_from_config(config['env_params'], True)
 66 |     env.close()
 67 | 
 68 |     # init model
 69 |     model = CNNPolicy(
 70 |         env.observation_space, env.action_space, train_params["vf_coef"],
 71 |         train_params["ent_coef"], train_params["lr_meta"], train_params["max_grad_norm"]
 72 | 
 73 |     )
 74 | 
 75 |     workers = find_workers("worker")
 76 |     init_workers(workers, config, model.get_weights())
 77 | 
 78 |     # start run
 79 |     workers_results = {w: Pyro4.Future(w.run)() for w in workers}
 80 | 
 81 |     savedir = utils.prepare_exp_dir(config, args.exp_name)
 82 | 
 83 |     updates = 0
 84 |     while True:
 85 |         # first zero all grads
 86 |         model.optimizer.zero_grad()
 87 | 
 88 |         # then apply add grads from remote workers
 89 |         wait_run_end(workers_results, model)
 90 | 
 91 |         # apply gradient
 92 |         model.optimizer.step()
 93 | 
 94 |         updates += 1
 95 | 
 96 |         # save last weights
 97 |         if config['log']['save_last']:
 98 |             fpath = savedir / 'last.pt'
 99 |             model.save(fpath)
100 | 
101 |         # save on save period
102 |         if updates % config['log']["save_interval"] == 0 or updates == 1:
103 |             fpath = savedir / '{}.pt'.format(updates)
104 |             model.save(fpath)
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     try:
109 |         args = utils.get_args()
110 |         run_maml(args)
111 |     except:
112 |         print("Pyro traceback:")
113 |         print("".join(Pyro4.util.getPyroTraceback()))
114 |         raise
115 | 


--------------------------------------------------------------------------------
/MAML/requirements.txt:
--------------------------------------------------------------------------------
 1 | scipy
 2 | tqdm
 3 | joblib
 4 | zmq
 5 | dill
 6 | progressbar2
 7 | cloudpickle
 8 | opencv-python
 9 | pandas
10 | Pyro4
11 | gym-retro
12 | torchvision
13 | pyyaml


--------------------------------------------------------------------------------
/MAML/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | conda create -n retro python=3.5 -y
 3 | source activate retro
 4 | pip install http://download.pytorch.org/whl/cu91/torch-0.4.0-cp35-cp35m-linux_x86_64.whl
 5 | pip install -r requirements.txt
 6 | pip install --no-deps git+https://github.com/fgvbrt/baselines.git@1659068fdeb5fd4859fa598634008a84afe3616e
 7 | git clone https://github.com/openai/retro-contest.git && cd retro-contest/support && pip install .
 8 | 
 9 | # download roms
10 | wget -qO - https://www.dropbox.com/s/8i0mh0bn2bbe1w5/roms.tar.gz?dl=0 | tar xzv
11 | find ./roms/ -name 'Sonic*' -type d -exec python -m retro.import {} \;


--------------------------------------------------------------------------------
/MAML/start_workers.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # usage:
 4 | #  ./start_samplers.sh host port_start num_samplers ns_host
 5 | 
 6 | host=$1
 7 | port_start=$2
 8 | num_samplers=$3
 9 | ns_host=$4
10 | 
11 | for i in `seq 1 $num_samplers`; do
12 |   python worker.py --name $HOSTNAME --host $host --port $((port_start+i-1)) --ns_host $ns_host &
13 | done


--------------------------------------------------------------------------------
/MAML/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python -u /root/compo/train.py --config config.yaml config_test.yaml


--------------------------------------------------------------------------------
/MAML/train.py:
--------------------------------------------------------------------------------
  1 | import sonic_utils
  2 | import utils
  3 | from model import CNNPolicy
  4 | import numpy as np
  5 | from time import time
  6 | from baselines import logger
  7 | from collections import deque
  8 | import pandas as pd
  9 | 
 10 | 
 11 | def traj_segment_generator(model, env, horizon, sample):
 12 |     t = 0
 13 |     ac = env.action_space.sample()  # not used, just so we have the datatype
 14 |     new = True  # marks if we're on first timestep of an episode
 15 |     ob = env.reset()
 16 | 
 17 |     ep_infos = []
 18 | 
 19 |     # Initialize history arrays
 20 |     obs = np.array([ob for _ in range(horizon)])
 21 |     rews = np.zeros(horizon, 'float32')
 22 |     vpreds = np.zeros(horizon, 'float32')
 23 |     news = np.zeros(horizon, 'int32')
 24 |     acs = np.array([ac for _ in range(horizon)])
 25 |     ac_logits = np.zeros(horizon, 'float32')
 26 | 
 27 |     while True:
 28 |         ac, ac_logit, vpred = model.step(ob, sample)
 29 |         # Slight weirdness here because we need value function at time T
 30 |         # before returning segment [0, T-1] so we get the correct
 31 |         # terminal value
 32 |         if t > 0 and t % horizon == 0:
 33 |             yield {
 34 |                 "ob": obs, "rew": rews,
 35 |                 "vpred": vpreds, "new": news,
 36 |                 "ac": acs, "nextvpred": float(vpred) * (1 - new),
 37 |                 "ac_logits": ac_logits, "ep_infos": ep_infos,
 38 |             }
 39 |             # Be careful!!! if you change the downstream algorithm to aggregate
 40 |             # several of these batches, then be sure to do a deepcopy
 41 |             ep_infos = []
 42 | 
 43 |         i = t % horizon
 44 |         obs[i] = ob
 45 |         vpreds[i] = vpred
 46 |         news[i] = new
 47 |         acs[i] = ac
 48 |         ac_logits[i] = ac_logit
 49 | 
 50 |         ob, rew, new, info = env.step(ac)
 51 |         rews[i] = rew
 52 | 
 53 |         if new:
 54 |             # game_name = env.unwrapped.game_name
 55 |             # state_name = env.unwrapped.state_name
 56 |             if "episode" in info:
 57 |                 ep_infos.append(info["episode"])
 58 | 
 59 |             ob = env.reset()
 60 | 
 61 |         t += 1
 62 | 
 63 | 
 64 | def add_vtarg(seg, gamma, lam):
 65 |     """
 66 |     Compute target value using TD(lambda) estimator, and advantage with GAE(lambda)
 67 |     """
 68 |     new = np.append(seg["new"], 0)  # last element is only used for last vtarg, but we already zeroed it if last new = 1
 69 |     vpred = np.append(seg["vpred"], seg["nextvpred"])
 70 |     T = len(seg["rew"])
 71 |     gaelam = np.empty(T, 'float32')
 72 |     rew = seg["rew"]
 73 |     lastgaelam = 0
 74 |     for t in reversed(range(T)):
 75 |         nonterminal = 1 - new[t+1]
 76 |         delta = rew[t] + gamma * vpred[t+1] * nonterminal - vpred[t]
 77 |         gaelam[t] = lastgaelam = delta + gamma * lam * nonterminal * lastgaelam
 78 |     seg["tdlamret"] = gaelam + seg["vpred"]
 79 | 
 80 | 
 81 | def train(config, exp_name='test'):
 82 | 
 83 |     train_params = config['train_params']
 84 |     env_params = config['env_params']
 85 |     log_params = config["log"]
 86 | 
 87 |     savedir = None
 88 |     if log_params["log_dir"] is not None:
 89 |         savedir = utils.prepare_exp_dir(config, exp_name)
 90 | 
 91 |     env = sonic_utils.make_from_config(env_params)
 92 | 
 93 |     model = CNNPolicy(
 94 |         env.observation_space, env.action_space, train_params["vf_coef"],
 95 |         train_params["ent_coef"], train_params["lr"], train_params["max_grad_norm"]
 96 |     )
 97 | 
 98 |     if train_params["weights"] is not None:
 99 |         model.load(train_params["weights"], train_params["load_adam_params"])
100 | 
101 |     seg_gen = traj_segment_generator(
102 |         model, env, train_params['n_steps'], sample=True)
103 | 
104 |     total_steps = 0
105 |     updates = 0
106 |     t0 = time()
107 |     epinfobuf = deque(maxlen=train_params["ep_info_len"])
108 |     seg_inds = np.arange(train_params['n_steps'])
109 |     n_batches = train_params["n_steps"] // train_params["batch_size"]
110 |     loss_vals = []
111 |     while True:
112 |         if total_steps > train_params['max_steps']:
113 |             break
114 | 
115 |         # get batch
116 |         seg = seg_gen.__next__()
117 |         add_vtarg(seg, train_params['gamma'], train_params['lam'])
118 | 
119 |         # add episode info
120 |         epinfobuf.extend(seg['ep_infos'])
121 | 
122 |         for _ in range(train_params["n_opt_epochs"]):
123 |             np.random.shuffle(seg_inds)
124 |             for i in range(n_batches):
125 |                 start = i * train_params["batch_size"]
126 |                 end = (i + 1) * train_params["batch_size"]
127 |                 inds = seg_inds[start:end]
128 | 
129 |                 losses = model.train(
130 |                     train_params['cliprange'], seg['ob'][inds],
131 |                     seg['tdlamret'][inds], seg['ac'][inds],
132 |                     seg['vpred'][inds], seg["ac_logits"][inds]
133 |                 )
134 |                 loss_vals.append([l.detach().numpy() for l in losses])
135 | 
136 |         total_steps += train_params['n_steps']
137 |         updates += 1
138 | 
139 |         if log_params["log"] and (updates % log_params["log_interval"] == 0 or updates == 1):
140 | 
141 |             tnow = time()
142 |             fps = int(total_steps / (tnow - t0))
143 |             # ev = explained_variance(values, returns)
144 |             logger.logkv("total_steps", total_steps)
145 |             logger.logkv("nupdates", updates)
146 |             logger.logkv("fps", fps)
147 |             logger.logkv('eprewmean', np.mean([epinfo['r'] for epinfo in epinfobuf if 'r' in epinfo]))
148 |             logger.logkv('eprewmean_exp', np.mean([epinfo['r_exp'] for epinfo in epinfobuf if 'r_exp' in epinfo]))
149 |             logger.logkv('eplenmean', np.mean([epinfo['l'] for epinfo in epinfobuf if 'l' in epinfo]))
150 |             logger.logkv('time_elapsed', tnow - t0)
151 | 
152 |             for loss_val, loss_name in zip(np.mean(loss_vals, axis=0), model.loss_names):
153 |                 logger.logkv(loss_name, loss_val)
154 |             logger.dumpkvs()
155 | 
156 |             del loss_vals[:]
157 | 
158 |         # save last weights
159 |         if log_params['save_last'] and savedir is not None:
160 |             fpath = savedir / 'last.pt'
161 |             model.save(fpath)
162 | 
163 |         # save on save period
164 |         if (updates % log_params["save_interval"] == 0 or updates == 1) and savedir is not None:
165 |             fpath = savedir / '{}.pt'.format(updates)
166 |             model.save(fpath)
167 | 
168 |     return epinfobuf
169 | 
170 | 
171 | def test():
172 |     args = utils.get_args()
173 | 
174 |     config = utils.load_config(args.config)
175 | 
176 |     if "game_states" in config['env_params']:
177 |         game_states = pd.read_csv(config['env_params']["game_states"]).values.tolist()
178 | 
179 |         all_means = []
180 |         for game, state in game_states:
181 |             config['env_params']["game_states"] = [(game, state)]
182 |             epinfobuf = train(config, args.exp_name)
183 | 
184 |             rewards = [epinfo['r'] for epinfo in epinfobuf if 'r' in epinfo]
185 |             print("{} {} {:.2f} {:.2f}".format(game, state, np.max(rewards), np.mean(rewards)))
186 |             all_means.append(np.mean(rewards))
187 | 
188 |         print("final result {:.2f}".format(np.mean(all_means)))
189 |     else:
190 |         train(config, args.exp_name)
191 | 
192 | 
193 | if __name__ == '__main__':
194 |     test()
195 | 


--------------------------------------------------------------------------------
/MAML/train_debug.csv:
--------------------------------------------------------------------------------
1 | game,state
2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3
3 | 


--------------------------------------------------------------------------------
/MAML/train_large.csv:
--------------------------------------------------------------------------------
 1 | game,state
 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3
 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2
 4 | SonicTheHedgehog-Genesis,GreenHillZone.Act3
 5 | SonicTheHedgehog-Genesis,GreenHillZone.Act1
 6 | SonicTheHedgehog-Genesis,StarLightZone.Act2
 7 | SonicTheHedgehog-Genesis,StarLightZone.Act1
 8 | SonicTheHedgehog-Genesis,MarbleZone.Act2
 9 | SonicTheHedgehog-Genesis,MarbleZone.Act1
10 | SonicTheHedgehog-Genesis,MarbleZone.Act3
11 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2
12 | SonicTheHedgehog-Genesis,LabyrinthZone.Act2
13 | SonicTheHedgehog-Genesis,LabyrinthZone.Act1
14 | SonicTheHedgehog-Genesis,LabyrinthZone.Act3
15 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act1
16 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act2
17 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2
18 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act1
19 | SonicTheHedgehog2-Genesis,MetropolisZone.Act1
20 | SonicTheHedgehog2-Genesis,MetropolisZone.Act2
21 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1
22 | SonicTheHedgehog2-Genesis,OilOceanZone.Act2
23 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act2
24 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act1
25 | SonicTheHedgehog2-Genesis,HillTopZone.Act1
26 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1
27 | SonicTheHedgehog2-Genesis,WingFortressZone
28 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2
29 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act1
30 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2
31 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act2
32 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act1
33 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act1
34 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act2
35 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2
36 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act1
37 | SonicAndKnuckles3-Genesis,DeathEggZone.Act1
38 | SonicAndKnuckles3-Genesis,DeathEggZone.Act2
39 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1
40 | SonicAndKnuckles3-Genesis,SandopolisZone.Act1
41 | SonicAndKnuckles3-Genesis,SandopolisZone.Act2
42 | SonicAndKnuckles3-Genesis,HiddenPalaceZone
43 | SonicAndKnuckles3-Genesis,HydrocityZone.Act2
44 | SonicAndKnuckles3-Genesis,IcecapZone.Act1
45 | SonicAndKnuckles3-Genesis,IcecapZone.Act2
46 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act1
47 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act2
48 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act1
49 | 


--------------------------------------------------------------------------------
/MAML/train_small.csv:
--------------------------------------------------------------------------------
 1 | game,state
 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3
 3 | SonicTheHedgehog-Genesis,StarLightZone.Act2
 4 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2
 5 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2
 6 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1
 7 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1
 8 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2
 9 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2
10 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2
11 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1
12 | 


--------------------------------------------------------------------------------
/MAML/utils.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import pickle
  3 | from copy import deepcopy
  4 | from datetime import datetime
  5 | import argparse
  6 | from pathlib import Path
  7 | import yaml
  8 | 
  9 | 
 10 | def load_config(fnames):
 11 | 
 12 |     config = {}
 13 |     for fname in fnames:
 14 |         with open(fname) as f:
 15 |             config = merge_dictionaries(config, yaml.load(f))
 16 | 
 17 |     return config
 18 | 
 19 | 
 20 | def add_boolean_flag(parser, name, default=False, help=None):
 21 |     """Add a boolean flag to argparse parser.
 22 |     Parameters
 23 |     ----------
 24 |     parser: argparse.Parser
 25 |         parser to add the flag to
 26 |     name: str
 27 |         --<name> will enable the flag, while --no-<name> will disable it
 28 |     default: bool or None
 29 |         default value of the flag
 30 |     help: str
 31 |         help string for the flag
 32 |     """
 33 |     dest = name.replace('-', '_')
 34 |     parser.add_argument("--" + name, action="store_true", default=default, dest=dest, help=help)
 35 |     parser.add_argument("--no-" + name, action="store_false", dest=dest)
 36 | 
 37 | 
 38 | def prepare_exp_dir(config, exp_name):
 39 |     # directory for logs
 40 |     logdir = Path(config['log']['log_dir']) / exp_name
 41 |     logdir.mkdir(parents=True, exist_ok=True)
 42 | 
 43 |     with open(str(logdir / 'run_config.yaml'), 'w') as f:
 44 |         yaml.dump(config, f)
 45 | 
 46 |     savedir = logdir / 'weights'
 47 |     savedir.mkdir(parents=True, exist_ok=True)
 48 | 
 49 |     return savedir
 50 | 
 51 | 
 52 | def get_args():
 53 |     parser = argparse.ArgumentParser(description="Run commands")
 54 |     parser.add_argument(
 55 |         '--config', type=str, default=None, nargs='+',
 56 |         help="Yaml files with configs")
 57 |     parser.add_argument(
 58 |         '--exp_name', type=str,
 59 |         default=datetime.now().strftime("%d.%m.%Y-%H:%M"),
 60 |         help='Experiment name')
 61 |     return parser.parse_args()
 62 | 
 63 | 
 64 | def merge_dictionaries(a, b, path_to_root=None, extend_lists=False):
 65 |     """
 66 |     создает копию словаря `a` и рекурсивно апдейтит ее элементы элементами из `b`
 67 |     :param extend_lists:
 68 |         if True и в обоих словарях это листы (если в обоих такой элемент есть) то элементы из b добавляются в конец
 69 |             к элементам из a, если в одном из словарей это не лист, то бросатеся ValueError
 70 |         if False - значения типа list трактуются как обычные значения - заменяют/перетирают друг друга
 71 |     """
 72 |     res = deepcopy(a)
 73 | 
 74 |     if path_to_root is None:
 75 |         path_to_root = []
 76 | 
 77 |     for key in b:
 78 |         if key not in res:
 79 |             res[key] = b[key]
 80 |             continue
 81 |         if isinstance(res[key], dict):
 82 |             if isinstance(b[key], dict):
 83 |                 res[key] = merge_dictionaries(res[key], b[key], path_to_root + [str(key)], extend_lists=extend_lists)
 84 |             else:
 85 |                 raise TypeError('Conflict at {}'.format('.'.join(path_to_root + [str(key)])))
 86 |         elif extend_lists and isinstance(res[key], list):
 87 |             if isinstance(b[key], list):
 88 |                 res[key].extend(b[key])
 89 |             else:
 90 |                 raise ValueError(
 91 |                     "Cannot extend list with not list. Path: {}".format('.'.join(path_to_root + [str(key)])))
 92 |         else:
 93 |             if extend_lists and isinstance(b[key], list):
 94 |                 raise ValueError(
 95 |                     "Cannot extend non list with list. Path: {}".format('.'.join(path_to_root + [str(key)])))
 96 |             elif not isinstance(b[key], dict):
 97 |                 res[key] = b[key]
 98 |             else:
 99 |                 raise TypeError('Conflict at {}'.format('.'.join(path_to_root + [str(key)])))
100 |     return res
101 | 
102 | 
103 | def conv_out_dim(in_n, k, p, s):
104 |     """
105 |     :param in_n: input dim
106 |     :param k: kernel size
107 |     :param p: padding size
108 |     :param s: stride size
109 |     :return: output dim
110 |     """
111 |     return int((in_n + 2*p - k) / s + 1)
112 | 
113 | 
114 | def convs_out_dim(in_n, ks, ps, ss):
115 |     assert len(ks) == len(ps) == len(ss)
116 |     for k, p, s in zip(ks, ps, ss):
117 |         in_n = conv_out_dim(in_n, k, p, s)
118 |     return in_n
119 | 
120 | 
121 | def unpickle(data_dict):
122 |     assert isinstance(data_dict, dict) and 'data' in data_dict and 'encoding' in data_dict
123 | 
124 |     data = data_dict["data"]
125 |     encoding = data_dict["encoding"]
126 | 
127 |     if encoding == "base64":
128 |         res = pickle.loads(base64.b64decode(data))
129 |     else:
130 |         raise ValueError('unsopported encoding {}'.format(encoding))
131 | 
132 |     return res
133 | 


--------------------------------------------------------------------------------
/MAML/validation.csv:
--------------------------------------------------------------------------------
 1 | game,state
 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act1
 3 | SonicTheHedgehog-Genesis,GreenHillZone.Act2
 4 | SonicTheHedgehog-Genesis,StarLightZone.Act3
 5 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act1
 6 | SonicTheHedgehog2-Genesis,MetropolisZone.Act3
 7 | SonicTheHedgehog2-Genesis,HillTopZone.Act2
 8 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act2
 9 | SonicAndKnuckles3-Genesis,LavaReefZone.Act1
10 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act2
11 | SonicAndKnuckles3-Genesis,HydrocityZone.Act1
12 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act2
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Retro contest solution
 2 | This is repository for 4th place soulution for [retro contest](https://blog.openai.com/first-retro-contest-retrospective/).
 3 | 
 4 | 
 5 | Content:
 6 |  - baseline - Joint PPO baseline is actual soution, see README.md in this directory for more detail. 
 7 |  - actor_critic - baseline pytorch implementation
 8 |  - deepneuroevolution - evolution algorithms
 9 |  - MAML - MAML and reptile algorithms, see README.md for more details
10 |  
11 |  # Team
12 | - Ivan Sorokin ([github](https://github.com/1ytic)) 
13 | - Kolesnikov Sergey ([linkedin](https://linkedin.com/in/scitator), [twitter](https://twitter.com/Scitator), [github](https://github.com/Scitator)) 
14 | - Sergeev Ilya ([linkedin](https://www.linkedin.com/in/ilya-sergeev/) [twitter](https://twitter.com/sergeevii123), [github](https://github.com/sergeevii123))
15 | - Mikhail Pavlov ([twitter](https://twitter.com/MikhailPavlov5), [github](https://github.com/fgvbrt))


--------------------------------------------------------------------------------
/actor_critic/actor_critic_test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import gym
 3 | from gym import wrappers
 4 | import numpy as np
 5 | from itertools import count
 6 | from collections import namedtuple
 7 | 
 8 | import torch
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | import torch.optim as optim
12 | from torch.autograd import Variable
13 | from torch.distributions import Categorical
14 | from envs import make_retro
15 | import pandas as pd
16 | 
17 | parser = argparse.ArgumentParser(description='PyTorch actor-critic example')
18 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
19 |                     help='discount factor (default: 0.99)')
20 | parser.add_argument('--seed', type=int, default=42, metavar='N',
21 |                     help='random seed (default: 1)')
22 | parser.add_argument('--log-interval', type=int, default=1, metavar='N',
23 |                     help='interval between training status logs (default: 10)')
24 | 
25 | args = parser.parse_args()
26 | game_states = pd.read_csv("train_large.csv").values.tolist()
27 | 
28 | env = make_retro('SonicTheHedgehog-Genesis', 'LabyrinthZone.Act1', game_states)
29 | env.seed(args.seed)
30 | torch.manual_seed(args.seed)
31 | 
32 | 
33 | SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
34 | 
35 | class Policy(nn.Module):
36 |     def __init__(self, num_inputs, action_space):
37 |         super(Policy, self).__init__()
38 |         self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)
39 |         self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
40 |         self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
41 |         self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
42 |         self.affine1 = nn.Linear(32*6, 256)
43 |         self.action_head = nn.Linear(256, action_space)
44 |         self.value_head = nn.Linear(256, 1)
45 | 
46 |         self.saved_actions = []
47 |         self.rewards = []
48 | 
49 |     def forward(self, x):
50 |         x = F.elu(self.conv1(x))
51 |         x = F.elu(self.conv2(x))
52 |         x = F.elu(self.conv3(x))
53 |         x = F.elu(self.conv4(x))
54 |         x = x.view(-1, 32*6)
55 |         x = F.elu(self.affine1(x))
56 |         action_scores = self.action_head(x)
57 |         state_values = self.value_head(x)
58 |         return F.softmax(action_scores, dim=-1), state_values
59 | 
60 | 
61 | model = Policy(env.observation_space.shape[0], env.action_space.n)
62 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
63 | 
64 | 
65 | def select_action(state):
66 |     state = torch.from_numpy(state).float().unsqueeze(0)
67 |     probs, state_value = model(Variable(state))
68 |     m = Categorical(probs)
69 |     action = m.sample()
70 |     return action.data[0]
71 | 
72 | model.load_state_dict(torch.load('weights/{}.pt'.format("actor_critic_sonic1")))
73 | 
74 | running_length = 10
75 | max_reward = -100
76 | 
77 | for i_episode in count(1):
78 |     state = env.reset()
79 |     current_reward = 0
80 |     done = False
81 |     t = 0
82 |     while not done:
83 |         action = select_action(np.array(state))
84 |         state, reward, done, _ = env.step(action)
85 |         env.render()
86 |         current_reward+=reward
87 |         t+=1
88 | 
89 |     running_length = running_length * 0.99 + t * 0.01
90 |     print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}\tReward: {:.5f}'.format(
91 |         i_episode, t, running_length, current_reward))


--------------------------------------------------------------------------------
/actor_critic/actor_critic_train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import numpy as np
  3 | from itertools import count
  4 | from collections import namedtuple
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import torch.optim as optim
 10 | from torch.autograd import Variable
 11 | from torch.distributions import Categorical
 12 | from envs import make_retro
 13 | import pandas as pd
 14 | parser = argparse.ArgumentParser(description='PyTorch actor-critic example')
 15 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
 16 |                     help='discount factor (default: 0.99)')
 17 | parser.add_argument('--seed', type=int, default=42, metavar='N',
 18 |                     help='random seed (default: 1)')
 19 | parser.add_argument('--log-interval', type=int, default=1, metavar='N',
 20 |                     help='interval between training status logs (default: 10)')
 21 | parser.add_argument('--record', action='store_true',
 22 |                     help='save video')
 23 | 
 24 | args = parser.parse_args()
 25 | game_states = pd.read_csv("train_large.csv").values.tolist()
 26 | 
 27 | env = make_retro('SonicTheHedgehog-Genesis', 'LabyrinthZone.Act1', game_states)
 28 | env.seed(args.seed)
 29 | torch.manual_seed(args.seed)
 30 | 
 31 | SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
 32 | 
 33 | class Policy(nn.Module):
 34 |     def __init__(self, num_inputs, action_space):
 35 |         super(Policy, self).__init__()
 36 |         self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)
 37 |         self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
 38 |         self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
 39 |         self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
 40 |         self.affine1 = nn.Linear(32*6, 256)
 41 |         self.action_head = nn.Linear(256, action_space)
 42 |         self.value_head = nn.Linear(256, 1)
 43 | 
 44 |         self.saved_actions = []
 45 |         self.rewards = []
 46 | 
 47 |     def forward(self, x):
 48 |         x = F.elu(self.conv1(x))
 49 |         x = F.elu(self.conv2(x))
 50 |         x = F.elu(self.conv3(x))
 51 |         x = F.elu(self.conv4(x))
 52 |         x = x.view(-1, 32*6)
 53 |         x = F.elu(self.affine1(x))
 54 |         action_scores = self.action_head(x)
 55 |         state_values = self.value_head(x)
 56 |         return F.softmax(action_scores, dim=-1), state_values
 57 | 
 58 | model = Policy(env.observation_space.shape[0], env.action_space.n)
 59 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
 60 | 
 61 | def select_action(state):
 62 |     state = torch.from_numpy(state).float().unsqueeze(0)
 63 |     probs, state_value = model(Variable(state))
 64 |     m = Categorical(probs)
 65 |     action = m.sample()
 66 |     model.saved_actions.append(SavedAction(m.log_prob(action), state_value))
 67 |     return action.data[0]
 68 | 
 69 | 
 70 | def finish_episode():
 71 |     R = 0
 72 |     saved_actions = model.saved_actions
 73 |     policy_losses = []
 74 |     value_losses = []
 75 |     rewards = []
 76 |     for r in model.rewards[::-1]:
 77 |         R = r + args.gamma * R
 78 |         rewards.insert(0, R)
 79 |     rewards = torch.Tensor(rewards)
 80 |     rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
 81 |     for (log_prob, value), r in zip(saved_actions, rewards):
 82 |         reward = r - value.data[0]
 83 |         policy_losses.append(-log_prob * Variable(reward))
 84 |         value_losses.append(F.smooth_l1_loss(value, Variable(torch.Tensor([r]))))
 85 |     optimizer.zero_grad()
 86 |     loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum()
 87 |     loss.backward()
 88 |     optimizer.step()
 89 |     del model.rewards[:]
 90 |     del model.saved_actions[:]
 91 | 
 92 | # model.load_state_dict(torch.load('weights/{}.pt'.format("actor_critic_sonic1")))
 93 | 
 94 | running_length = 10
 95 | max_reward = -100
 96 | for i_episode in count(1):
 97 |     state = env.reset()
 98 |     current_reward = 0
 99 |     done = False
100 |     t = 0
101 |     flip = 0
102 |     while not done:
103 |         action = select_action(np.array(state))
104 |         state, reward, done, _ = env.step(action)
105 |         # env.render()
106 |         model.rewards.append(reward)
107 |         current_reward+=reward
108 |         t+=1
109 | 
110 |     running_length = running_length * 0.99 + t * 0.01
111 |     finish_episode()
112 |     if i_episode % args.log_interval == 0:
113 |         if current_reward > max_reward:
114 |             max_reward = current_reward
115 |             torch.save(model.state_dict(), 'weights/{}.pt'.format("actor_critic_sonic1"))
116 |         print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}\tReward: {:.5f}'.format(
117 |             i_episode, t, running_length, current_reward))


--------------------------------------------------------------------------------
/actor_critic/weights/actor_critic_sonic1.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/actor_critic/weights/actor_critic_sonic1.pt


--------------------------------------------------------------------------------
/baseline/README.md:
--------------------------------------------------------------------------------
 1 | # Algorithm
 2 | Key features:
 3 |  - joint [PPO](https://arxiv.org/abs/1707.06347) training on all train games
 4 |  - mixup
 5 |  - exploration bonus to reward based on observations and x distance
 6 |  - training on test level
 7 |  - choosing best weights among several candidates during first few test episodes 
 8 | 
 9 | 
10 | # Training
11 | To run PPO training:
12 | 
13 | 1) [Install docker](https://docs.docker.com/install/)
14 | 
15 | 2) [Install nvidia docker](https://github.com/NVIDIA/nvidia-docker)
16 | 
17 | 3) build image:
18 |     
19 |        $ docker build -t retro-ppo  -f ppo2.docker .
20 | 
21 | 4) Run training:
22 | 
23 |         $ docker run --runtime=nvidia retro-ppo
24 | 


--------------------------------------------------------------------------------
/baseline/all.csv:
--------------------------------------------------------------------------------
 1 | game,state
 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3
 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2
 4 | SonicTheHedgehog-Genesis,GreenHillZone.Act3
 5 | SonicTheHedgehog-Genesis,GreenHillZone.Act1
 6 | SonicTheHedgehog-Genesis,StarLightZone.Act2
 7 | SonicTheHedgehog-Genesis,StarLightZone.Act1
 8 | SonicTheHedgehog-Genesis,MarbleZone.Act2
 9 | SonicTheHedgehog-Genesis,MarbleZone.Act1
10 | SonicTheHedgehog-Genesis,MarbleZone.Act3
11 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2
12 | SonicTheHedgehog-Genesis,LabyrinthZone.Act2
13 | SonicTheHedgehog-Genesis,LabyrinthZone.Act1
14 | SonicTheHedgehog-Genesis,LabyrinthZone.Act3
15 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act1
16 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act2
17 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2
18 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act1
19 | SonicTheHedgehog2-Genesis,MetropolisZone.Act1
20 | SonicTheHedgehog2-Genesis,MetropolisZone.Act2
21 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1
22 | SonicTheHedgehog2-Genesis,OilOceanZone.Act2
23 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act2
24 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act1
25 | SonicTheHedgehog2-Genesis,HillTopZone.Act1
26 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1
27 | SonicTheHedgehog2-Genesis,WingFortressZone
28 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2
29 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act1
30 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2
31 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act2
32 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act1
33 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act1
34 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act2
35 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2
36 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act1
37 | SonicAndKnuckles3-Genesis,DeathEggZone.Act1
38 | SonicAndKnuckles3-Genesis,DeathEggZone.Act2
39 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1
40 | SonicAndKnuckles3-Genesis,SandopolisZone.Act1
41 | SonicAndKnuckles3-Genesis,SandopolisZone.Act2
42 | SonicAndKnuckles3-Genesis,HiddenPalaceZone
43 | SonicAndKnuckles3-Genesis,HydrocityZone.Act2
44 | SonicAndKnuckles3-Genesis,IcecapZone.Act1
45 | SonicAndKnuckles3-Genesis,IcecapZone.Act2
46 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act1
47 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act2
48 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act1
49 | SonicTheHedgehog-Genesis,SpringYardZone.Act1
50 | SonicTheHedgehog-Genesis,GreenHillZone.Act2
51 | SonicTheHedgehog-Genesis,StarLightZone.Act3
52 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act1
53 | SonicTheHedgehog2-Genesis,MetropolisZone.Act3
54 | SonicTheHedgehog2-Genesis,HillTopZone.Act2
55 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act2
56 | SonicAndKnuckles3-Genesis,LavaReefZone.Act1
57 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act2
58 | SonicAndKnuckles3-Genesis,HydrocityZone.Act1
59 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act2
60 | 


--------------------------------------------------------------------------------
/baseline/config.yaml:
--------------------------------------------------------------------------------
 1 | train_params:
 2 |   policy: "cnn"
 3 |   cnn: "openai1"
 4 |   max_steps: 100000000
 5 |   lr: 0.0002
 6 |   vf_coef: 0.5
 7 |   ent_coef: 0.01
 8 |   cliprange: 0.1
 9 |   gamma: 0.99
10 |   lam: 0.95
11 |   n_steps: 4500
12 |   n_opt_epochs: 3
13 |   batch_size: 4500
14 |   max_grad_norm: 0.5
15 |   n_envs: 12
16 |   nmixup: 2
17 |   log_interval: 5
18 |   save_interval: 20
19 |   weights_path: null
20 |   adam_stats: "weight_stats"
21 |   weights_choose_eps: 5
22 | 
23 | env_params:
24 |   exp_type: ["x", "obs"]
25 |   exp_const: [0.005, 0.0001]
26 |   color: False
27 |   stack: 4
28 |   scale_rew: True
29 |   small_size: False
30 | 


--------------------------------------------------------------------------------
/baseline/config_test.yaml:
--------------------------------------------------------------------------------
 1 | train_params:
 2 |   cnn: "nature"
 3 |   weights_path: ["weights1.pkl", "weights2.pkl", "weights3.pkl", "weights4.pkl"]
 4 |   n_envs: 1
 5 |   nmixup: 0
 6 |   save_interval: 0
 7 |   log_interval: 1
 8 |   weights_choose_eps: 5
 9 | 
10 | env_params:
11 |   stack: 2
12 |   exp_type: ["x", "obs"]
13 |   exp_const: [0.005, 0.001]
14 |   socket_dir: "tmp/sock"


--------------------------------------------------------------------------------
/baseline/config_train.yaml:
--------------------------------------------------------------------------------
1 | train_params:
2 |   cnn: "nature"
3 | 
4 | env_params:
5 |   game_states: "all.csv"
6 |   max_episode_steps: 4500
7 |   small_size: False
8 |   stack: 2


--------------------------------------------------------------------------------
/baseline/jerk.docker:
--------------------------------------------------------------------------------
1 | FROM openai/retro-agent:bare
2 | 
3 | ADD jerk_agent.py ./agent.py
4 | 
5 | CMD ["python", "-u", "/root/compo/agent.py"]
6 | 


--------------------------------------------------------------------------------
/baseline/jerk_agent.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | A scripted agent called "Just Enough Retained Knowledge".
  5 | """
  6 | 
  7 | import random
  8 | 
  9 | import gym
 10 | import numpy as np
 11 | 
 12 | import gym_remote.client as grc
 13 | import gym_remote.exceptions as gre
 14 | 
 15 | EMA_RATE = 0.2
 16 | EXPLOIT_BIAS = 0.25
 17 | TOTAL_TIMESTEPS = int(1e6)
 18 | 
 19 | def main():
 20 |     """Run JERK on the attached environment."""
 21 |     env = grc.RemoteEnv('tmp/sock')
 22 |     env = TrackedEnv(env)
 23 |     new_ep = True
 24 |     solutions = []
 25 |     while True:
 26 |         if new_ep:
 27 |             if (solutions and
 28 |                     random.random() < EXPLOIT_BIAS + env.total_steps_ever / TOTAL_TIMESTEPS):
 29 |                 solutions = sorted(solutions, key=lambda x: np.mean(x[0]))
 30 |                 best_pair = solutions[-1]
 31 |                 new_rew = exploit(env, best_pair[1])
 32 |                 best_pair[0].append(new_rew)
 33 |                 print('replayed best with reward %f' % new_rew)
 34 |                 continue
 35 |             else:
 36 |                 env.reset()
 37 |                 new_ep = False
 38 |         rew, new_ep = move(env, 100)
 39 |         if not new_ep and rew <= 0:
 40 |             print('backtracking due to negative reward: %f' % rew)
 41 |             _, new_ep = move(env, 70, left=True)
 42 |         if new_ep:
 43 |             solutions.append(([max(env.reward_history)], env.best_sequence()))
 44 | 
 45 | def move(env, num_steps, left=False, jump_prob=1.0 / 10.0, jump_repeat=4):
 46 |     """
 47 |     Move right or left for a certain number of steps,
 48 |     jumping periodically.
 49 |     """
 50 |     total_rew = 0.0
 51 |     done = False
 52 |     steps_taken = 0
 53 |     jumping_steps_left = 0
 54 |     while not done and steps_taken < num_steps:
 55 |         action = np.zeros((12,), dtype=np.bool)
 56 |         action[6] = left
 57 |         action[7] = not left
 58 |         if jumping_steps_left > 0:
 59 |             action[0] = True
 60 |             jumping_steps_left -= 1
 61 |         else:
 62 |             if random.random() < jump_prob:
 63 |                 jumping_steps_left = jump_repeat - 1
 64 |                 action[0] = True
 65 |         _, rew, done, _ = env.step(action)
 66 |         total_rew += rew
 67 |         steps_taken += 1
 68 |         if done:
 69 |             break
 70 |     return total_rew, done
 71 | 
 72 | def exploit(env, sequence):
 73 |     """
 74 |     Replay an action sequence; pad with NOPs if needed.
 75 | 
 76 |     Returns the final cumulative reward.
 77 |     """
 78 |     env.reset()
 79 |     done = False
 80 |     idx = 0
 81 |     while not done:
 82 |         if idx >= len(sequence):
 83 |             _, _, done, _ = env.step(np.zeros((12,), dtype='bool'))
 84 |         else:
 85 |             _, _, done, _ = env.step(sequence[idx])
 86 |         idx += 1
 87 |     return env.total_reward
 88 | 
 89 | class TrackedEnv(gym.Wrapper):
 90 |     """
 91 |     An environment that tracks the current trajectory and
 92 |     the total number of timesteps ever taken.
 93 |     """
 94 |     def __init__(self, env):
 95 |         super(TrackedEnv, self).__init__(env)
 96 |         self.action_history = []
 97 |         self.reward_history = []
 98 |         self.total_reward = 0
 99 |         self.total_steps_ever = 0
100 | 
101 |     def best_sequence(self):
102 |         """
103 |         Get the prefix of the trajectory with the best
104 |         cumulative reward.
105 |         """
106 |         max_cumulative = max(self.reward_history)
107 |         for i, rew in enumerate(self.reward_history):
108 |             if rew == max_cumulative:
109 |                 return self.action_history[:i+1]
110 |         raise RuntimeError('unreachable')
111 | 
112 |     # pylint: disable=E0202
113 |     def reset(self, **kwargs):
114 |         self.action_history = []
115 |         self.reward_history = []
116 |         self.total_reward = 0
117 |         return self.env.reset(**kwargs)
118 | 
119 |     def step(self, action):
120 |         self.total_steps_ever += 1
121 |         self.action_history.append(action.copy())
122 |         obs, rew, done, info = self.env.step(action)
123 |         self.total_reward += rew
124 |         self.reward_history.append(self.total_reward)
125 |         return obs, rew, done, info
126 | 
127 | if __name__ == '__main__':
128 |     try:
129 |         main()
130 |     except gre.GymRemoteError as exc:
131 |         print('exception', exc)
132 | 


--------------------------------------------------------------------------------
/baseline/make_image.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | DOCKER_REGISTRY="retrocontestrtidfxqehvzsuwpo.azurecr.io"
3 | docker build -f ppo2.subm.docker -t $DOCKER_REGISTRY/$1 .


--------------------------------------------------------------------------------
/baseline/ppo2.docker:
--------------------------------------------------------------------------------
 1 | FROM openai/retro-agent:tensorflow
 2 | 
 3 | # Needed for OpenCV.
 4 | RUN apt-get update && \
 5 |     apt-get install -y libgtk2.0-dev wget && \
 6 |     rm -rf /var/lib/apt/lists/*
 7 | 
 8 | # Baselines has some unneeded and cumbersome dependencies,
 9 | # so we manually fetch the deps we need.
10 | # Baselines has some unneeded and cumbersome dependencies,
11 | # so we manually fetch the deps we need.
12 | RUN . ~/venv/bin/activate && \
13 |     pip install scipy tqdm joblib zmq dill progressbar2 cloudpickle opencv-python pandas pyyaml && \
14 |     pip install gym-retro && \
15 |     git clone https://github.com/openai/retro-contest.git && cd retro-contest/support && \
16 |     pip install .
17 | 
18 | # ADD roms /roms/
19 | # RUN . ~/venv/bin/activate && find /roms/ -name 'Sonic*' -type d -exec python -m retro.import {} \;
20 | RUN wget -qO - https://www.dropbox.com/s/8i0mh0bn2bbe1w5/roms.tar.gz?dl=0 | tar xzv && \
21 |     . ~/venv/bin/activate && find ./roms/ -name 'Sonic*' -type d -exec python -m retro.import {} \;
22 | 
23 | RUN . ~/venv/bin/activate && \
24 |     pip install --no-deps git+https://github.com/fgvbrt/baselines.git@1e3f646f1859d2447348c647d42c48b7d6cc4423
25 | 
26 | ADD sonic_util.py ./
27 | ADD ppo2_agent.py ./
28 | ADD utils.py ./
29 | ADD train.sh ./
30 | ADD *.yaml ./
31 | ADD *.csv ./
32 | 
33 | CMD ["/bin/bash", "train.sh"]
34 | 


--------------------------------------------------------------------------------
/baseline/ppo2.subm.docker:
--------------------------------------------------------------------------------
 1 | FROM openai/retro-agent:tensorflow
 2 | 
 3 | # Needed for OpenCV.
 4 | RUN apt-get update && \
 5 |     apt-get install -y libgtk2.0-dev && \
 6 |     rm -rf /var/lib/apt/lists/*
 7 | 
 8 | # Baselines has some unneeded and cumbersome dependencies,
 9 | # so we manually fetch the deps we need.
10 | RUN . ~/venv/bin/activate && \
11 |     pip install scipy tqdm joblib zmq dill progressbar2 cloudpickle opencv-python pandas pyyaml && \
12 |     pip install gym-retro && \
13 |     git clone https://github.com/openai/retro-contest.git && cd retro-contest/support && \
14 |     pip install .
15 | 
16 | RUN . ~/venv/bin/activate && \
17 |     pip install --no-deps git+https://github.com/fgvbrt/baselines.git@1e3f646f1859d2447348c647d42c48b7d6cc4423
18 | 
19 | ADD *.pkl ./
20 | 
21 | ADD sonic_util.py ./
22 | ADD ppo2_agent.py ./
23 | ADD utils.py ./
24 | ADD test.sh ./
25 | ADD *.yaml ./
26 | 
27 | CMD ["/bin/bash", "test.sh"]
28 | 


--------------------------------------------------------------------------------
/baseline/ppo2_agent.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Train an agent on Sonic using PPO2 from OpenAI Baselines.
  5 | """
  6 | 
  7 | import tensorflow as tf
  8 | 
  9 | from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
 10 | from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
 11 | import baselines.ppo2.ppo2 as ppo2
 12 | import gym_remote.exceptions as gre
 13 | import functools
 14 | import argparse
 15 | import sonic_util
 16 | from baselines import logger
 17 | from baselines.ppo2.policies import LstmPolicy, CnnPolicy
 18 | import utils
 19 | import os
 20 | import yaml
 21 | import warnings
 22 | from datetime import datetime
 23 | 
 24 | 
 25 | def add_boolean_flag(parser, name, default=False, help=None):
 26 |     """Add a boolean flag to argparse parser.
 27 |     Parameters
 28 |     ----------
 29 |     parser: argparse.Parser
 30 |         parser to add the flag to
 31 |     name: str
 32 |         --<name> will enable the flag, while --no-<name> will disable it
 33 |     default: bool or None
 34 |         default value of the flag
 35 |     help: str
 36 |         help string for the flag
 37 |     """
 38 |     dest = name.replace('-', '_')
 39 |     parser.add_argument("--" + name, action="store_true", default=default, dest=dest, help=help)
 40 |     parser.add_argument("--no-" + name, action="store_false", dest=dest)
 41 | 
 42 | 
 43 | def main(policy, env, params):
 44 |     """Run PPO until the environment throws an exception."""
 45 |     config = tf.ConfigProto()
 46 |     config.gpu_options.allow_growth = True # pylint: disable=E1101
 47 |     with tf.Session(config=config):
 48 |         # Take more timesteps than we need to be sure that
 49 |         # we stop due to an exception.
 50 |         ppo2.learn(policy=policy,
 51 |                    env=env,
 52 |                    nsteps=params['n_steps'],
 53 |                    nminibatches=(params['n_steps']*env.num_envs) // params["batch_size"],
 54 |                    lam=params["lam"],
 55 |                    gamma=params['gamma'],
 56 |                    noptepochs=params["n_opt_epochs"],
 57 |                    log_interval=params["log_interval"],
 58 |                    ent_coef=params["ent_coef"],
 59 |                    vf_coef=params['vf_coef'],
 60 |                    lr=lambda _: params["lr"],
 61 |                    cliprange=lambda _: params['cliprange'],
 62 |                    max_grad_norm=params['max_grad_norm'],
 63 |                    total_timesteps=params["max_steps"],
 64 |                    save_interval=params["save_interval"],
 65 |                    weights_path=params["weights_path"],
 66 |                    adam_stats=params["adam_stats"],
 67 |                    nmixup=params["nmixup"],
 68 |                    weights_choose_eps=params["weights_choose_eps"],
 69 |                    cnn=params['cnn'])
 70 | 
 71 | 
 72 | def run_train():
 73 |     def _parse_args():
 74 |         parser = argparse.ArgumentParser(description="Run commands")
 75 |         parser.add_argument('--config', type=str, default=None, nargs='+',
 76 |                             help="file with config")
 77 |         return parser.parse_args()
 78 | 
 79 |     args = _parse_args()
 80 |     config = utils.load_config(args.config)
 81 | 
 82 |     env_params = config['env_params']
 83 |     train_params = config['train_params']
 84 | 
 85 |     if train_params["policy"] == 'lstm':
 86 |         policy = LstmPolicy
 87 |     elif train_params["policy"] == 'cnn':
 88 |         policy = CnnPolicy
 89 |     else:
 90 |         raise ValueError("unknown policy {}".format(train_params["policy"]))
 91 | 
 92 |     if train_params['cnn'] == "openai1" and not env_params['small_size']:
 93 |         warnings.warn('asked for openai1 policy, but dont set small size for env params')
 94 | 
 95 |     # create environments funcitons
 96 |     n_envs = train_params['n_envs']
 97 |     if n_envs == 1:
 98 |         vec_fn = DummyVecEnv
 99 |     elif n_envs > 1:
100 |         vec_fn = SubprocVecEnv
101 |     else:
102 |         raise ValueError('number of environments less than 1: {}'.format(n_envs))
103 |     env = vec_fn([functools.partial(sonic_util.make_from_config, env_params) for _ in range(n_envs)])
104 | 
105 |     logdir = os.path.join("logs", str(datetime.now()))
106 |     logger.configure(logdir)
107 | 
108 |     # save run config
109 |     with open(os.path.join(logdir, "run_config.yaml"), 'w') as f:
110 |         yaml.dump(config, f)
111 | 
112 |     main(policy, env, train_params)
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     try:
117 |         run_train()
118 |     except gre.GymRemoteError as exc:
119 |         print('exception', exc)
120 | 


--------------------------------------------------------------------------------
/baseline/rainbow.docker:
--------------------------------------------------------------------------------
 1 | FROM openai/retro-agent:tensorflow
 2 | 
 3 | # Needed for OpenCV.
 4 | RUN apt-get update && \
 5 |     apt-get install -y libgtk2.0-dev && \
 6 |     rm -rf /var/lib/apt/lists/*
 7 | 
 8 | # Baselines has some unneeded and cumbersome dependencies,
 9 | # so we manually fetch the deps we need.
10 | RUN . ~/venv/bin/activate && \
11 |     pip install scipy tqdm joblib zmq dill progressbar2 cloudpickle opencv-python && \
12 |     pip install --no-deps git+https://github.com/openai/baselines.git
13 | 
14 | # Use the anyrl open source RL framework.
15 | RUN . ~/venv/bin/activate && \
16 |     pip install anyrl==0.11.17
17 | 
18 | ADD rainbow_agent.py ./agent.py
19 | ADD sonic_util.py .
20 | 
21 | CMD ["python", "-u", "/root/compo/agent.py"]
22 | 


--------------------------------------------------------------------------------
/baseline/rainbow_agent.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | Train an agent on Sonic using an open source Rainbow DQN
 5 | implementation.
 6 | """
 7 | 
 8 | import tensorflow as tf
 9 | 
10 | from anyrl.algos import DQN
11 | from anyrl.envs import BatchedGymEnv
12 | from anyrl.envs.wrappers import BatchedFrameStack
13 | from anyrl.models import rainbow_models
14 | from anyrl.rollouts import BatchedPlayer, PrioritizedReplayBuffer, NStepPlayer
15 | from anyrl.spaces import gym_space_vectorizer
16 | import gym_remote.exceptions as gre
17 | 
18 | from sonic_util import AllowBacktracking, make_env
19 | 
20 | def main():
21 |     """Run DQN until the environment throws an exception."""
22 |     env = AllowBacktracking(make_env(stack=False, scale_rew=False))
23 |     env = BatchedFrameStack(BatchedGymEnv([[env]]), num_images=4, concat=False)
24 |     config = tf.ConfigProto()
25 |     config.gpu_options.allow_growth = True # pylint: disable=E1101
26 |     with tf.Session(config=config) as sess:
27 |         dqn = DQN(*rainbow_models(sess,
28 |                                   env.action_space.n,
29 |                                   gym_space_vectorizer(env.observation_space),
30 |                                   min_val=-200,
31 |                                   max_val=200))
32 |         player = NStepPlayer(BatchedPlayer(env, dqn.online_net), 3)
33 |         optimize = dqn.optimize(learning_rate=1e-4)
34 |         sess.run(tf.global_variables_initializer())
35 |         dqn.train(num_steps=2000000, # Make sure an exception arrives before we stop.
36 |                   player=player,
37 |                   replay_buffer=PrioritizedReplayBuffer(500000, 0.5, 0.4, epsilon=0.1),
38 |                   optimize_op=optimize,
39 |                   train_interval=1,
40 |                   target_interval=8192,
41 |                   batch_size=32,
42 |                   min_buffer_size=20000)
43 | 
44 | if __name__ == '__main__':
45 |     try:
46 |         main()
47 |     except gre.GymRemoteError as exc:
48 |         print('exception', exc)
49 | 


--------------------------------------------------------------------------------
/baseline/requirements.txt:
--------------------------------------------------------------------------------
 1 | scipy
 2 | tqdm
 3 | joblib
 4 | zmq
 5 | dill
 6 | progressbar2
 7 | cloudpickle
 8 | opencv-python
 9 | pandas
10 | gym-retro
11 | pyyaml
12 | 


--------------------------------------------------------------------------------
/baseline/roms/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/baseline/roms/.gitkeep


--------------------------------------------------------------------------------
/baseline/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | conda create -n retro python=3.5 -y
 3 | source activate retro
 4 | pip install http://download.pytorch.org/whl/cu91/torch-0.4.0-cp35-cp35m-linux_x86_64.whl
 5 | pip install -r requirements.txt
 6 | pip install --no-deps git+https://github.com/fgvbrt/baselines.git@1e3f646f1859d2447348c647d42c48b7d6cc4423
 7 | git clone https://github.com/openai/retro-contest.git && cd retro-contest/support && pip install .
 8 | 
 9 | # download roms
10 | wget -qO - https://www.dropbox.com/s/8i0mh0bn2bbe1w5/roms.tar.gz?dl=0 | tar xzv
11 | find ./roms/ -name 'Sonic*' -type d -exec python -m retro.import {} \;


--------------------------------------------------------------------------------
/baseline/simple-agent.docker:
--------------------------------------------------------------------------------
1 | FROM openai/retro-agent
2 | ADD simple-agent.py .
3 | CMD ["python", "-u", "/root/compo/simple-agent.py"]
4 | 


--------------------------------------------------------------------------------
/baseline/simple-agent.py:
--------------------------------------------------------------------------------
 1 | import gym_remote.exceptions as gre
 2 | import gym_remote.client as grc
 3 | 
 4 | 
 5 | def main():
 6 |     print('connecting to remote environment')
 7 |     env = grc.RemoteEnv('tmp/sock')
 8 |     print('starting episode')
 9 |     env.reset()
10 |     while True:
11 |         action = env.action_space.sample()
12 |         action[7] = 1
13 |         ob, reward, done, _ = env.step(action)
14 |         if done:
15 |             print('episode complete')
16 |             env.reset()
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     try:
21 |         main()
22 |     except gre.GymRemoteError as e:
23 |         print('exception', e)


--------------------------------------------------------------------------------
/baseline/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python -u /root/compo/ppo2_agent.py --config config.yaml config_test.yaml


--------------------------------------------------------------------------------
/baseline/train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python -u /root/compo/ppo2_agent.py --config config.yaml config_train.yaml


--------------------------------------------------------------------------------
/baseline/train_large.csv:
--------------------------------------------------------------------------------
 1 | game,state
 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3
 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2
 4 | SonicTheHedgehog-Genesis,GreenHillZone.Act3
 5 | SonicTheHedgehog-Genesis,GreenHillZone.Act1
 6 | SonicTheHedgehog-Genesis,StarLightZone.Act2
 7 | SonicTheHedgehog-Genesis,StarLightZone.Act1
 8 | SonicTheHedgehog-Genesis,MarbleZone.Act2
 9 | SonicTheHedgehog-Genesis,MarbleZone.Act1
10 | SonicTheHedgehog-Genesis,MarbleZone.Act3
11 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2
12 | SonicTheHedgehog-Genesis,LabyrinthZone.Act2
13 | SonicTheHedgehog-Genesis,LabyrinthZone.Act1
14 | SonicTheHedgehog-Genesis,LabyrinthZone.Act3
15 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act1
16 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act2
17 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2
18 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act1
19 | SonicTheHedgehog2-Genesis,MetropolisZone.Act1
20 | SonicTheHedgehog2-Genesis,MetropolisZone.Act2
21 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1
22 | SonicTheHedgehog2-Genesis,OilOceanZone.Act2
23 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act2
24 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act1
25 | SonicTheHedgehog2-Genesis,HillTopZone.Act1
26 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1
27 | SonicTheHedgehog2-Genesis,WingFortressZone
28 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2
29 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act1
30 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2
31 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act2
32 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act1
33 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act1
34 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act2
35 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2
36 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act1
37 | SonicAndKnuckles3-Genesis,DeathEggZone.Act1
38 | SonicAndKnuckles3-Genesis,DeathEggZone.Act2
39 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1
40 | SonicAndKnuckles3-Genesis,SandopolisZone.Act1
41 | SonicAndKnuckles3-Genesis,SandopolisZone.Act2
42 | SonicAndKnuckles3-Genesis,HiddenPalaceZone
43 | SonicAndKnuckles3-Genesis,HydrocityZone.Act2
44 | SonicAndKnuckles3-Genesis,IcecapZone.Act1
45 | SonicAndKnuckles3-Genesis,IcecapZone.Act2
46 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act1
47 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act2
48 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act1
49 | 


--------------------------------------------------------------------------------
/baseline/train_nodocker.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | CUDA_DEVICE_ORDER="PCI_BUS_ID" CUDA_VISIBLE_DEVICES="$1" python ppo2_agent.py --config config.yaml config_train.yaml
3 | 


--------------------------------------------------------------------------------
/baseline/train_small.csv:
--------------------------------------------------------------------------------
 1 | game,state
 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3
 3 | SonicTheHedgehog-Genesis,StarLightZone.Act2
 4 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2
 5 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2
 6 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1
 7 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1
 8 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2
 9 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2
10 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2
11 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1
12 | 


--------------------------------------------------------------------------------
/baseline/utils.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | import yaml
 3 | 
 4 | 
 5 | def load_config(fnames):
 6 | 
 7 |     config = {}
 8 |     for fname in fnames:
 9 |         with open(fname) as f:
10 |             config = merge_dictionaries(config, yaml.load(f))
11 | 
12 |     return config
13 | 
14 | 
15 | def add_boolean_flag(parser, name, default=False, help=None):
16 |     """Add a boolean flag to argparse parser.
17 |     Parameters
18 |     ----------
19 |     parser: argparse.Parser
20 |         parser to add the flag to
21 |     name: str
22 |         --<name> will enable the flag, while --no-<name> will disable it
23 |     default: bool or None
24 |         default value of the flag
25 |     help: str
26 |         help string for the flag
27 |     """
28 |     dest = name.replace('-', '_')
29 |     parser.add_argument("--" + name, action="store_true", default=default, dest=dest, help=help)
30 |     parser.add_argument("--no-" + name, action="store_false", dest=dest)
31 | 
32 | 
33 | def merge_dictionaries(a, b, path_to_root=None, extend_lists=False):
34 |     """
35 |     создает копию словаря `a` и рекурсивно апдейтит ее элементы элементами из `b`
36 |     :param extend_lists:
37 |         if True и в обоих словарях это листы (если в обоих такой элемент есть) то элементы из b добавляются в конец
38 |             к элементам из a, если в одном из словарей это не лист, то бросатеся ValueError
39 |         if False - значения типа list трактуются как обычные значения - заменяют/перетирают друг друга
40 |     """
41 |     res = deepcopy(a)
42 | 
43 |     if path_to_root is None:
44 |         path_to_root = []
45 | 
46 |     for key in b:
47 |         if key not in res:
48 |             res[key] = b[key]
49 |             continue
50 |         if isinstance(res[key], dict):
51 |             if isinstance(b[key], dict):
52 |                 res[key] = merge_dictionaries(res[key], b[key], path_to_root + [str(key)], extend_lists=extend_lists)
53 |             else:
54 |                 raise TypeError('Conflict at {}'.format('.'.join(path_to_root + [str(key)])))
55 |         elif extend_lists and isinstance(res[key], list):
56 |             if isinstance(b[key], list):
57 |                 res[key].extend(b[key])
58 |             else:
59 |                 raise ValueError(
60 |                     "Cannot extend list with not list. Path: {}".format('.'.join(path_to_root + [str(key)])))
61 |         else:
62 |             if extend_lists and isinstance(b[key], list):
63 |                 raise ValueError(
64 |                     "Cannot extend non list with list. Path: {}".format('.'.join(path_to_root + [str(key)])))
65 |             elif not isinstance(b[key], dict):
66 |                 res[key] = b[key]
67 |             else:
68 |                 raise TypeError('Conflict at {}'.format('.'.join(path_to_root + [str(key)])))
69 |     return res
70 | 


--------------------------------------------------------------------------------
/baseline/validation.csv:
--------------------------------------------------------------------------------
 1 | game,state
 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act1
 3 | SonicTheHedgehog-Genesis,GreenHillZone.Act2
 4 | SonicTheHedgehog-Genesis,StarLightZone.Act3
 5 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act1
 6 | SonicTheHedgehog2-Genesis,MetropolisZone.Act3
 7 | SonicTheHedgehog2-Genesis,HillTopZone.Act2
 8 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act2
 9 | SonicAndKnuckles3-Genesis,LavaReefZone.Act1
10 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act2
11 | SonicAndKnuckles3-Genesis,HydrocityZone.Act1
12 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act2
13 | 


--------------------------------------------------------------------------------
/deepneuroevolution/README.md:
--------------------------------------------------------------------------------
 1 | ## AI Labs Neuroevolution Algorithms
 2 | 
 3 | This repo contains distributed implementations of the algorithms described in:
 4 | 
 5 | [1] [Deep Neuroevolution: Genetic Algorithms Are a Competitive Alternative for Training Deep Neural Networks for Reinforcement Learning](https://arxiv.org/abs/1712.06567)
 6 | 
 7 | [2] [Improving Exploration in Evolution Strategies for Deep Reinforcement Learning via a Population of Novelty-Seeking Agents](https://arxiv.org/abs/1712.06560)
 8 | 
 9 | Our code is based off of code from OpenAI, who we thank. The original code and related paper from OpenAI can be found [here](https://github.com/openai/evolution-strategies-starter). The repo has been modified to run both ES and our algorithms, including our Deep Genetic Algorithm (DeepGA) locally and on AWS.
10 | 
11 | Note: The Humanoid experiment depends on [Mujoco](http://www.mujoco.org/). Please provide your own Mujoco license and binary
12 | 
13 | The article describing these papers can be found [here](https://eng.uber.com/deep-neuroevolution/)
14 | 
15 | ## Visual Inspector for NeuroEvolution (VINE)
16 | The folder `./visual_inspector` contains implementations of VINE, i.e., Visual Inspector for NeuroEvolution, an interactive data visualization tool for neuroevolution. Refer to `README.md` in that folder for further instructions on running and customizing your visualization. An article describing this visualization tool can be found [here](https://eng.uber.com/vine/).
17 | 
18 | ## Accelerated Deep Neurevolution
19 | The folder `./gpu_implementation` contains an implementation that uses GPU more efficiently. Refer to `README.md` in that folder for further instructions.
20 | 
21 | ## How to run locally
22 | 
23 | clone repo
24 | 
25 | ```
26 | git clone https://github.com/uber-common/deep-neuroevolution.git
27 | ```
28 | 
29 | create python3 virtual env
30 | 
31 | ```
32 | python3 -m venv env
33 | . env/bin/activate
34 | ```
35 | 
36 | install requirements
37 | ```
38 | pip install -r requirements.txt
39 | ```
40 | If you plan to use the mujoco env, make sure to follow [mujoco-py](https://github.com/openai/mujoco-py)'s readme about how to install mujoco correctly
41 | 
42 | launch redis
43 | ```
44 | . scripts/local_run_redis.sh
45 | ```
46 | 
47 | launch sample ES experiment
48 | ```
49 | . scripts/local_run_exp.sh es configurations/frostbite_es.json  # For the Atari game Frostbite
50 | . scripts/local_run_exp.sh es configurations/humanoid.json  # For the MuJoCo Humanoid-v1 environment
51 | ```
52 | 
53 | launch sample NS-ES experiment
54 | ```
55 | . scripts/local_run_exp.sh ns-es configurations/frostbite_nses.json
56 | . scripts/local_run_exp.sh ns-es configurations/humanoid_nses.json
57 | ```
58 | 
59 | launch sample NSR-ES experiment
60 | ```
61 | . scripts/local_run_exp.sh nsr-es configurations/frostbite_nsres.json
62 | . scripts/local_run_exp.sh nsr-es configurations/humanoid_nsres.json
63 | ```
64 | 
65 | launch sample GA experiment
66 | ```
67 | . scripts/local_run_exp.sh ga configurations/frostbite_ga.json  # For the Atari game Frostbite
68 | ```
69 | 
70 | launch sample Random Search experiment
71 | ```
72 | . scripts/local_run_exp.sh rs configurations/frostbite_ga.json  # For the Atari game Frostbite
73 | ```
74 | 
75 | 
76 | visualize results by running a policy file
77 | ```
78 | python -m scripts.viz 'FrostbiteNoFrameskip-v4' <YOUR_H5_FILE>
79 | python -m scripts.viz 'Humanoid-v1' <YOUR_H5_FILE>
80 | ```
81 | 
82 | ### extra folder
83 | The extra folder holds the XML specification file for the  Humanoid
84 | Locomotion with Deceptive Trap domain used in https://arxiv.org/abs/1712.06560. Use this XML file in gym to recreate the environment.
85 | 


--------------------------------------------------------------------------------
/deepneuroevolution/configurations/frostbite_es.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "calc_obstat_prob": 0.0,
 4 |     "episodes_per_batch": 5000,
 5 |     "eval_prob": 0.01,
 6 |     "l2coeff": 0.005,
 7 |     "noise_stdev": 0.005,
 8 |     "snapshot_freq": 20,
 9 |     "timesteps_per_batch": 10000,
10 |     "return_proc_mode": "centered_rank",
11 |     "episode_cutoff_mode": 5000
12 |   },
13 |   "env_id": "FrostbiteNoFrameskip-v4",
14 |   "optimizer": {
15 |     "args": {
16 |       "stepsize": 0.01
17 |     },
18 |     "type": "adam"
19 |   },
20 |   "policy": {
21 |     "args" : {},
22 |     "type": "ESAtariPolicy"
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/deepneuroevolution/configurations/frostbite_ga.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "calc_obstat_prob": 0.0,
 4 |     "episodes_per_batch": 5000,
 5 |     "eval_prob": 0.01,
 6 |     "l2coeff": 0.005,
 7 |     "noise_stdev": 0.005,
 8 |     "snapshot_freq": 20,
 9 |     "timesteps_per_batch": 10000,
10 |     "return_proc_mode": "centered_rank",
11 |     "episode_cutoff_mode": 5000
12 |   },
13 |   "population_size": 10,
14 |   "num_elites": 1,
15 |   "env_id": "FrostbiteNoFrameskip-v4",
16 |   "policy": {
17 |     "args" : {
18 |       "nonlin_type": "relu"
19 |     },
20 |     "type": "GAAtariPolicy"
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/deepneuroevolution/configurations/frostbite_nses.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "calc_obstat_prob": 0.0,
 4 |     "episodes_per_batch": 100,
 5 |     "eval_prob": 0.03,
 6 |     "l2coeff": 0.005,
 7 |     "noise_stdev": 0.02,
 8 |     "snapshot_freq": 10,
 9 |     "timesteps_per_batch": 1000,
10 |     "return_proc_mode": "centered_sign_rank",
11 |     "episode_cutoff_mode": 5000
12 |   },
13 |   "env_id": "FrostbiteNoFrameskip-v4",
14 |   "algo_type": "ns",
15 |   "novelty_search": {
16 |     "k": 10,
17 |     "population_size": 3,
18 |     "num_rollouts": 1,
19 |     "selection_method": "novelty_prob"
20 |   },
21 |   "optimizer": {
22 |     "args": {
23 |       "stepsize": 0.01
24 |     },
25 |     "type": "adam"
26 |   },
27 |   "policy": {
28 |     "args": {},
29 |     "type": "ESAtariPolicy"
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/deepneuroevolution/configurations/frostbite_nsres.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "calc_obstat_prob": 0.0,
 4 |     "episodes_per_batch": 1000,
 5 |     "eval_prob": 0.1,
 6 |     "l2coeff": 0.005,
 7 |     "noise_stdev": 0.02,
 8 |     "snapshot_freq": 10,
 9 |     "timesteps_per_batch": 10000,
10 |     "return_proc_mode": "centered_sign_rank",
11 |     "episode_cutoff_mode": 5000
12 |   },
13 |   "env_id": "FrostbiteNoFrameskip-v4",
14 |   "algo_type": "nsr",
15 |   "novelty_search": {
16 |     "k": 10,
17 |     "population_size": 3,
18 |     "num_rollouts": 1,
19 |     "selection_method": "novelty_prob"
20 |   },
21 |   "optimizer": {
22 |     "args": {
23 |       "stepsize": 0.01
24 |     },
25 |     "type": "adam"
26 |   },
27 |   "policy": {
28 |     "args": {},
29 |     "type": "ESAtariPolicy"
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/deepneuroevolution/configurations/humanoid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "calc_obstat_prob": 0.01,
 4 |     "episodes_per_batch": 1000,
 5 |     "eval_prob": 0.03,
 6 |     "l2coeff": 0.005,
 7 |     "noise_stdev": 0.02,
 8 |     "snapshot_freq": 10,
 9 |     "timesteps_per_batch": 100000,
10 |     "return_proc_mode": "centered_rank",
11 |     "episode_cutoff_mode": "env_default"
12 |   },
13 |   "env_id": "Humanoid-v1",
14 |   "exp_prefix": "humanoid",
15 |   "optimizer": {
16 |     "args": {
17 |       "stepsize": 0.01
18 |     },
19 |     "type": "adam"
20 |   },
21 |   "policy": {
22 |     "args": {
23 |       "ac_bins": "continuous:",
24 |       "ac_noise_std": 0.01,
25 |       "connection_type": "ff",
26 |       "hidden_dims": [
27 |         256,
28 |         256
29 |       ],
30 |       "nonlin_type": "tanh"
31 |     },
32 |     "type": "MujocoPolicy"
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/deepneuroevolution/configurations/humanoid_nses.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "calc_obstat_prob": 0.01,
 4 |     "episodes_per_batch": 1000,
 5 |     "eval_prob": 0.03,
 6 |     "l2coeff": 0.005,
 7 |     "noise_stdev": 0.02,
 8 |     "snapshot_freq": 10,
 9 |     "timesteps_per_batch": 100000,
10 |     "return_proc_mode": "centered_sign_rank",
11 |     "episode_cutoff_mode": "env_default"
12 |   },
13 |   "env_id": "Humanoid-v1",
14 |   "algo_type": "ns",
15 |   "exp_prefix": "humanoid",
16 |   "novelty_search": {
17 |     "k": 10,
18 |     "population_size": 5,
19 |     "num_rollouts": 5,
20 |     "selection_method": "novelty_prob"
21 |   },
22 |   "optimizer": {
23 |     "args": {
24 |       "stepsize": 0.01
25 |     },
26 |     "type": "adam"
27 |   },
28 |   "policy": {
29 |     "args": {
30 |       "ac_bins": "continuous:",
31 |       "ac_noise_std": 0.01,
32 |       "connection_type": "ff",
33 |       "hidden_dims": [
34 |         256,
35 |         256
36 |       ],
37 |       "nonlin_type": "tanh"
38 |     },
39 |     "type": "MujocoPolicy"
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/deepneuroevolution/configurations/humanoid_nsres.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "calc_obstat_prob": 0.01,
 4 |     "episodes_per_batch": 1000,
 5 |     "eval_prob": 0.03,
 6 |     "l2coeff": 0.005,
 7 |     "noise_stdev": 0.02,
 8 |     "snapshot_freq": 10,
 9 |     "timesteps_per_batch": 100000,
10 |     "return_proc_mode": "centered_sign_rank",
11 |     "episode_cutoff_mode": "env_default"
12 |   },
13 |   "env_id": "Humanoid-v1",
14 |   "algo_type": "nsr",
15 |   "exp_prefix": "humanoid",
16 |   "novelty_search": {
17 |     "k": 10,
18 |     "population_size": 5,
19 |     "num_rollouts": 5,
20 |     "selection_method": "novelty_prob"
21 |   },
22 |   "optimizer": {
23 |     "args": {
24 |       "stepsize": 0.01
25 |     },
26 |     "type": "adam"
27 |   },
28 |   "policy": {
29 |     "args": {
30 |       "ac_bins": "continuous:",
31 |       "ac_noise_std": 0.01,
32 |       "connection_type": "ff",
33 |       "hidden_dims": [
34 |         256,
35 |         256
36 |       ],
37 |       "nonlin_type": "tanh"
38 |     },
39 |     "type": "MujocoPolicy"
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/deepneuroevolution/configurations/sonic_es.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "calc_obstat_prob": 0.0,
 4 |     "episodes_per_batch": 2000,
 5 |     "eval_prob": 0.01,
 6 |     "l2coeff": 0.005,
 7 |     "noise_stdev": 0.005,
 8 |     "snapshot_freq": 20,
 9 |     "timesteps_per_batch": 10000,
10 |     "return_proc_mode": "centered_rank",
11 |     "episode_cutoff_mode": 5000
12 |   },
13 |   "env_id": "LabyrinthZone.Act1",
14 |   "optimizer": {
15 |     "args": {
16 |       "stepsize": 0.01
17 |     },
18 |     "type": "adam"
19 |   },
20 |   "policy": {
21 |     "args" : {},
22 |     "type": "ESAtariPolicy"
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/deepneuroevolution/configurations/sonic_ga.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "calc_obstat_prob": 0.0,
 4 |     "episodes_per_batch": 1000,
 5 |     "eval_prob": 0.01,
 6 |     "l2coeff": 0.005,
 7 |     "noise_stdev": 0.005,
 8 |     "snapshot_freq": 20,
 9 |     "timesteps_per_batch": 10000,
10 |     "return_proc_mode": "centered_rank",
11 |     "episode_cutoff_mode": 5000
12 |   },
13 |   "population_size": 10,
14 |   "num_elites": 1,
15 |   "env_id": "LabyrinthZone.Act1",
16 |   "policy": {
17 |     "args" : {
18 |       "nonlin_type": "elu"
19 |     },
20 |     "type": "GAAtariPolicy"
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/deepneuroevolution/configurations/sonic_nsres.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "calc_obstat_prob": 0.0,
 4 |     "episodes_per_batch": 1000,
 5 |     "eval_prob": 0.1,
 6 |     "l2coeff": 0.005,
 7 |     "noise_stdev": 0.02,
 8 |     "snapshot_freq": 10,
 9 |     "timesteps_per_batch": 10000,
10 |     "return_proc_mode": "centered_sign_rank",
11 |     "episode_cutoff_mode": 5000
12 |   },
13 |   "env_id": "LabyrinthZone.Act1",
14 |   "algo_type": "nsr",
15 |   "novelty_search": {
16 |     "k": 10,
17 |     "population_size": 3,
18 |     "num_rollouts": 1,
19 |     "selection_method": "novelty_prob"
20 |   },
21 |   "optimizer": {
22 |     "args": {
23 |       "stepsize": 0.01
24 |     },
25 |     "type": "adam"
26 |   },
27 |   "policy": {
28 |     "args": {},
29 |     "type": "ESAtariPolicy"
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/deepneuroevolution/es_distributed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/deepneuroevolution/es_distributed/__init__.py


--------------------------------------------------------------------------------
/deepneuroevolution/es_distributed/main.py:
--------------------------------------------------------------------------------
 1 | import errno
 2 | import json
 3 | import logging
 4 | import os
 5 | import sys
 6 | 
 7 | import click
 8 | 
 9 | from .dist import RelayClient
10 | from .es import run_master, run_worker, SharedNoiseTable
11 | os.environ['CUDA_VISIBLE_DEVICES'] = ''
12 | 
13 | def mkdir_p(path):
14 |     try:
15 |         os.makedirs(path)
16 |     except OSError as exc:
17 |         if exc.errno == errno.EEXIST and os.path.isdir(path):
18 |             pass
19 |         else:
20 |             raise
21 | 
22 | @click.group()
23 | def cli():
24 |     logging.basicConfig(
25 |         format='[%(asctime)s pid=%(process)d] %(message)s',
26 |         level=logging.INFO,
27 |         stream=sys.stderr)
28 | 
29 | def import_algo(name):
30 |     if name == 'es':
31 |         from . import es as algo
32 |     elif name == 'ns-es' or name == "nsr-es":
33 |         from . import nses as algo
34 |     elif name == 'ga':
35 |         from . import ga as algo
36 |     elif name == 'rs':
37 |         from . import rs as algo
38 |     else:
39 |         raise NotImplementedError()
40 |     return algo
41 | 
42 | @cli.command()
43 | @click.option('--algo')
44 | @click.option('--exp_str')
45 | @click.option('--exp_file')
46 | @click.option('--master_socket_path', required=True)
47 | @click.option('--log_dir')
48 | def master(algo, exp_str, exp_file, master_socket_path, log_dir):
49 |     # Start the master
50 |     assert (exp_str is None) != (exp_file is None), 'Must provide exp_str xor exp_file to the master'
51 |     if exp_str:
52 |         exp = json.loads(exp_str)
53 |     elif exp_file:
54 |         with open(exp_file, 'r') as f:
55 |             exp = json.loads(f.read())
56 |     else:
57 |         assert False
58 |     log_dir = os.path.expanduser(log_dir) if log_dir else '/tmp/es_master_{}'.format(os.getpid())
59 |     mkdir_p(log_dir)
60 |     algo = import_algo(algo)
61 |     algo.run_master({'unix_socket_path': master_socket_path}, log_dir, exp)
62 | 
63 | 
64 | @cli.command()
65 | @click.option('--algo')
66 | @click.option('--master_host', required=True)
67 | @click.option('--master_port', default=6379, type=int)
68 | @click.option('--relay_socket_path', required=True)
69 | @click.option('--num_workers', type=int, default=0)
70 | def workers(algo, master_host, master_port, relay_socket_path, num_workers):
71 |     # Start the relay
72 |     master_redis_cfg = {'host': master_host, 'port': master_port}
73 |     relay_redis_cfg = {'unix_socket_path': relay_socket_path}
74 |     if os.fork() == 0:
75 |         RelayClient(master_redis_cfg, relay_redis_cfg).run()
76 |         return
77 |     # Start the workers
78 |     algo = import_algo(algo)
79 |     noise = algo.SharedNoiseTable()  # Workers share the same noise
80 |     num_workers = num_workers if num_workers else os.cpu_count()
81 |     logging.info('Spawning {} workers'.format(num_workers))
82 |     for _ in range(num_workers):
83 |         if os.fork() == 0:
84 |             algo.run_worker(master_redis_cfg, relay_redis_cfg, noise=noise)
85 |             return
86 |     os.wait()
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     cli()
91 | 


--------------------------------------------------------------------------------
/deepneuroevolution/es_distributed/optimizers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class Optimizer(object):
 5 |     def __init__(self, theta):
 6 |         self.theta = theta
 7 |         self.dim = len(self.theta)
 8 |         self.t = 0
 9 | 
10 |     def update(self, globalg):
11 |         self.t += 1
12 |         step = self._compute_step(globalg)
13 |         theta = self.theta
14 |         ratio = np.linalg.norm(step) / np.linalg.norm(theta)
15 |         new_theta = self.theta + step
16 |         self.theta = new_theta
17 |         return ratio, new_theta
18 | 
19 |     def _compute_step(self, globalg):
20 |         raise NotImplementedError
21 | 
22 | 
23 | class SGD(Optimizer):
24 |     def __init__(self, theta, stepsize, momentum=0.9):
25 |         Optimizer.__init__(self, theta)
26 |         self.v = np.zeros(self.dim, dtype=np.float32)
27 |         self.stepsize, self.momentum = stepsize, momentum
28 | 
29 |     def _compute_step(self, globalg):
30 |         self.v = self.momentum * self.v + (1. - self.momentum) * globalg
31 |         step = -self.stepsize * self.v
32 |         return step
33 | 
34 | 
35 | class Adam(Optimizer):
36 |     def __init__(self, theta, stepsize, beta1=0.9, beta2=0.999, epsilon=1e-08):
37 |         Optimizer.__init__(self, theta)
38 |         self.stepsize = stepsize
39 |         self.beta1 = beta1
40 |         self.beta2 = beta2
41 |         self.epsilon = epsilon
42 |         self.m = np.zeros(self.dim, dtype=np.float32)
43 |         self.v = np.zeros(self.dim, dtype=np.float32)
44 | 
45 |     def _compute_step(self, globalg):
46 |         a = self.stepsize * np.sqrt(1 - self.beta2 ** self.t) / (1 - self.beta1 ** self.t)
47 |         self.m = self.beta1 * self.m + (1 - self.beta1) * globalg
48 |         self.v = self.beta2 * self.v + (1 - self.beta2) * (globalg * globalg)
49 |         step = -a * self.m / (np.sqrt(self.v) + self.epsilon)
50 |         return step
51 | 
52 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/README.md:
--------------------------------------------------------------------------------
 1 | ## AI Labs - GPU Neuroevolution
 2 | This folder contains preliminary work done to implement GPU-based deep neuroevolution.
 3 | For problems like Atari where the policy evaluation takes a considerable amount of time it is advantageous to make use of GPUs to evaluate the Neural Networks. This code shows how it is possible to run Atari simulations in parallel using the GPU in a way where we can evaluate neural networks in batches and have both CPU and GPU operating at the same time.
 4 | 
 5 | This folder has code in prototype stage and still requires a lot of changes to optimize performance, maintanability, and testing. We welcome pull requests to this repo and have plans to improve it in the future. Although it can run on CPU-only, it is slower than our original implementation due to overhead. Once this implementation has matured we plan on distributing it as a package for easy installation. We included an implementation of the HardMaze, but the GA-NS implementation will be added later on.
 6 | 
 7 | ## Installation
 8 | 
 9 | clone repo
10 | 
11 | ```
12 | git clone https://github.com/uber-common/deep-neuroevolution.git
13 | ```
14 | 
15 | create python3 virtual env
16 | 
17 | ```
18 | python3 -m venv env
19 | . env/bin/activate
20 | ```
21 | 
22 | install tensorflow or tensorflow-gpu > 1.2.
23 | ```
24 | pip install tensorflow-gpu
25 | ```
26 | Follow instructions under ./gym_tensorflow/README on how to compile the optimized interfaces.
27 | 
28 | To train GA on Atari just run:
29 | ```
30 | python ga.py ga_atari_config.json
31 | ```
32 | Random search (It's a special case of GA where 0 individuals become parents):
33 | ```
34 | python ga.py rs_atari_config.json
35 | ```
36 | 
37 | Evolution Strategies:
38 | ```
39 | python es.py es_atari_config.json
40 | ```
41 | 
42 | Visualizing policies is possible if you install gym with `pip install gym` and run:
43 | ```
44 | python -m neuroevolution.display
45 | ```
46 | We currently have one example policy but more will be added in the future.
47 | 
48 | ## Breakdown
49 | 
50 | * gym_tensorflow - Folder containing TensorFlow custom ops for Reinforcement Learning (Atari, Hard Maze).
51 |   * moving away from python-based environments has significant speed ups on a multithreaded environment.
52 | * neuroevolution - folder containing source code to evaluate many policies simultaneously.
53 |   * concurrent_worker.py - Improved implementation where each thread can evaluate a dynamic sized batch of policies at a time. Needs custom Tensorflow ops.
54 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/configurations/es_atari_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "game": "frostbite",
 3 |     "model": "ModelVirtualBN",
 4 |     "num_validation_episodes": 30,
 5 |     "num_test_episodes": 200,
 6 |     "population_size": 5000,
 7 |     "timesteps": 250e6,
 8 |     "episode_cutoff_mode": 5000,
 9 |     "return_proc_mode": "centered_rank",
10 |     "l2coeff": 0.005,
11 |     "mutation_power": 0.02,
12 |     "optimizer": {
13 |         "args": {
14 |             "stepsize": 0.01
15 |         },
16 |         "type": "adam"
17 |     }
18 | }


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/configurations/ga_atari_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "game": "frostbite",
 3 |     "model": "LargeModel",
 4 |     "num_validation_episodes": 30,
 5 |     "num_test_episodes": 200,
 6 |     "population_size": 1000,
 7 |     "episode_cutoff_mode": 5000,
 8 |     "timesteps": 1.5e9,
 9 |     "validation_threshold": 10,
10 |     "mutation_power": 0.002,
11 |     "selection_threshold": 20
12 | }


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/configurations/rs_atari_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "game": "frostbite",
 3 |     "model": "Model",
 4 |     "num_validation_episodes": 30,
 5 |     "num_test_episodes": 200,
 6 |     "population_size": 1000,
 7 |     "episode_cutoff_mode": 5000,
 8 |     "timesteps": 1.5e9,
 9 |     "validation_threshold": 10,
10 |     "mutation_power": 0.002,
11 |     "selection_threshold": 0
12 | }


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/Makefile:
--------------------------------------------------------------------------------
 1 | USE_SDL := 0
 2 | USE_ALE := 0
 3 | USE_GPU := 1
 4 | 
 5 | DIR := ./
 6 | 
 7 | TF_INC := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
 8 | TF_LIB := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
 9 | FLAGS := -std=c++11 -shared -fPIC -I$(TF_INC) -I$(TF_INC)/external/nsync/public -L$(TF_LIB) -D_GLIBCXX_USE_CXX11_ABI=0 -O2
10 | CXX := g++
11 | LDFLAGS := -ltensorflow_framework
12 | 
13 | SOURCES := $(DIR)/*.cpp $(DIR)/ops/*.cpp
14 | 
15 | ifeq ($(USE_GPU), 1)
16 |     FLAGS += -DGOOGLE_CUDA=1
17 | endif
18 | 
19 | # This will likely need to be changed to suit your installation.
20 | ifeq ($(USE_ALE), 1)
21 |     ALE := $(shell pwd)/atari-py/atari_py/ale_interface
22 |     FLAGS += -I$(ALE)/src -I$(ALE)/src/controllers -I$(ALE)/src/os_dependent -I$(ALE)/src/environment -I$(ALE)/src/external -L$(ALE)/build
23 |     LDFLAGS += -lale
24 |     SOURCES += $(DIR)/atari/*.cpp
25 | endif
26 | 
27 | UNAME_S := $(shell uname -s)
28 | ifeq ($(UNAME_S),Linux)
29 |     FLAGS += -Wl,-rpath=$(ALE)/build
30 | endif
31 | ifeq ($(UNAME_S),Darwin)
32 |     FLAGS += -framework Cocoa
33 | endif
34 | 
35 | ifeq ($(strip $(USE_SDL)), 1)
36 |     DEFINES += -D__USE_SDL -DSOUND_SUPPORT
37 |     FLAGS += $(shell sdl-config --cflags)
38 |     LDFLAGS += $(shell sdl-config --libs)
39 | endif
40 | 
41 | 
42 | all: gym_tensorflow.so
43 | 
44 | gym_tensorflow.so:
45 | 	$(CXX) $(FLAGS) $(SOURCES) $(LDFLAGS) -o gym_tensorflow.so
46 | 
47 | clean:
48 | 	rm -rf gym_tensorflow.so
49 | 
50 | remake: clean all
51 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/README.md:
--------------------------------------------------------------------------------
1 | Instructions
2 | -----------------
3 | 
4 | This module provides C++/TensorFlow interfaces that operated similarly to OpenAI's gym library. Since it was built to remove python from the critical portion of the code (simulations) it provides a significant speed up when operating in a multithreading environment.
5 | We currently provide 2 environments utilizing the interface, Atari and Hard Maze. The Atari environment is supported but optional. Our Atari support is licensed under GPLv2 and instructions on how to use it can be found under the `./atari` folder.
6 | 
7 | To compile this module simply open the `Makefile` to adjust settings (USE_GPU, USE_ALE, etc), once configured run `make` to build from source.
8 | 
9 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/__init__.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from .tf_env import GymEnv
 4 | from.import atari, maze
 5 | from .wrappers import StackFramesWrapper
 6 | 
 7 | def make(game, batch_size, *args, **kwargs):
 8 |     if game == 'maze':
 9 |         return maze.MazeEnv(batch_size)
10 |     if game in atari.games:
11 |         return StackFramesWrapper(atari.AtariEnv(game, batch_size, *args, **kwargs))
12 |     if game.startswith('gym.'):
13 |         return GymEnv(game[4:], batch_size, *args, **kwargs)
14 |     raise NotImplementedError(game)
15 | 
16 | 
17 | def get_ref_batch(make_env_f, sess, batch_size):
18 |     env = make_env_f(1)
19 |     assert env.discrete_action
20 |     actions = tf.random_uniform((1,), minval=0, maxval=env.action_space, dtype=tf.int32)
21 | 
22 |     reset_op = env.reset()
23 |     obs_op = env.observation()
24 |     rew_op, done_op=env.step(actions)
25 | 
26 |     sess.run(tf.global_variables_initializer())
27 | 
28 |     sess.run(reset_op)
29 | 
30 |     ref_batch = []
31 |     while len(ref_batch) < batch_size:
32 |         obs, done = sess.run([obs_op, done_op])
33 |         ref_batch.append(obs)
34 |         if done.any():
35 |             sess.run(reset_op)
36 | 
37 |     return np.concatenate(ref_batch)
38 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/atari/README.md:
--------------------------------------------------------------------------------
 1 | Notice
 2 | -----------------
 3 | The ALE/atari-py is not part of deep-neuroevolution.
 4 | This folder provides the instructions and sample code if you are interested in running the ALE.
 5 | It depends on atari-py. atari-py is licensed under GPLv2.
 6 | 
 7 | Instructions
 8 | -----------------
 9 | 
10 | The first thing to do is clone the atari-py repository into the `gym_tensorflow` folder using
11 | ```
12 | git clone https://github.com/fps7806/atari-py.git
13 | ```
14 | The relative path is important but can be changed inside the `Makefile` as necessary.
15 | 
16 | We will be using slightly different settings for the build, so you need to go to ./atari-py/atari_py/ale_interface/CMakeLists.txt file and change the first lines to:
17 | 
18 | ```
19 | cmake_minimum_required (VERSION 2.6)
20 | project(ale)
21 | set(ALEVERSION "0.5")
22 | 
23 | 
24 | option(USE_SDL "Use SDL" OFF)
25 | option(USE_RLGLUE "Use RL-Glue" OFF)
26 | option(BUILD_EXAMPLES "Build Example Agents" OFF)
27 | option(BUILD_CPP_LIB "Build C++ Shared Library" ON)
28 | option(BUILD_CLI "Build ALE Command Line Interface" OFF)
29 | option(BUILD_C_LIB "Build ALE C Library (needed for Python interface)" OFF)
30 | 
31 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused -fPIC -O3 -fomit-frame-pointer -D__STDC_CONSTANT_MACROS -D_GLIBCXX_USE_CXX11_ABI=0")
32 | ```
33 | 
34 | This will ensure that the C++ lib is compiled as well as adding `-D_GLIBCXX_USE_CXX11_ABI=0` which is required for compatibility with TensorFlow.
35 | Once modified you can build the library with `cd ./atari-py && make`.
36 | 
37 | Once built successfully, the `USE_ALE := 1` flag can be set on the ./gym_tensorflow/Makefile so that the necessary files are compiled.
38 | 
39 | Building `cd ./gym_tensorflow && make` should give you access to the Atari games as a set of TensorFlow ops.


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/atari/__init__.py:
--------------------------------------------------------------------------------
1 | from..import tf_env
2 | 
3 | from .tf_atari import *
4 | 
5 | if not hasattr(tf_env.gym_tensorflow_module, 'atari_make'):
6 |     class AtariEnv(TensorFlowEnv):
7 |         def __init__(self, * args, ** kwargs):
8 |             raise NotImplementedError("gym_tensorflow was not compiled with ALE support.")
9 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/atari/tf_atari.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <string>
  3 | #include <ale_interface.hpp>
  4 | #include "tensorflow/core/framework/op_kernel.h"
  5 | #include "tensorflow/core/framework/op.h"
  6 | #include "tensorflow/core/framework/shape_inference.h"
  7 | #include "tensorflow/core/framework/resource_mgr.h"
  8 | #include "tensorflow/core/framework/resource_op_kernel.h"
  9 | #include "tensorflow/core/lib/core/blocking_counter.h"
 10 | #include "tensorflow/core/lib/core/threadpool.h"
 11 | #include "tensorflow/core/platform/mutex.h"
 12 | #include "../tf_env.h"
 13 | 
 14 | #ifdef __USE_SDL
 15 |   #include <SDL.h>
 16 | #endif
 17 | 
 18 | using namespace tensorflow;
 19 | using namespace std;
 20 | using namespace ale;
 21 | 
 22 | #define RAM_SIZE (128)
 23 | 
 24 | class AtariEnvironment : public Environment<uint8>, public StepInterface<int>
 25 | {
 26 |     public:
 27 |         AtariEnvironment(int batch_size)
 28 |         {
 29 |             m_numNoops.resize(batch_size, 0);
 30 |             m_maxFrames.resize(batch_size, 100000);
 31 |             m_pInterfaces = new ALEInterface[batch_size];
 32 |         }
 33 |         void load_rom(string game, int i)
 34 |         {
 35 |             assert(m_numNoops[i] == 0);
 36 |             m_numNoops[i] = 1;
 37 |             m_pInterfaces[i].setFloat("repeat_action_probability", 0.0f);
 38 |             m_pInterfaces[i].setInt("random_seed", 0);
 39 |             m_pInterfaces[i].loadROM(game);
 40 |         }
 41 |         virtual ~AtariEnvironment() {
 42 |             delete[] m_pInterfaces;
 43 |         }
 44 | 
 45 |         TensorShape get_action_shape() override
 46 |         {
 47 |             return TensorShape();
 48 |         }
 49 | 
 50 |         TensorShape get_observation_shape() override
 51 |         {
 52 |             return TensorShape({2,
 53 |                                 static_cast<int>(m_pInterfaces[0].getScreen().height()),
 54 |                                 static_cast<int>(m_pInterfaces[0].getScreen().width())});
 55 |         }
 56 | 
 57 |         void get_observation(uint8 *data, int idx) override
 58 |         {
 59 |             const auto ssize = m_pInterfaces[idx].getScreen().height() * m_pInterfaces[idx].getScreen().width();
 60 |             memcpy(data, m_pInterfaces[idx].theOSystem->console().mediaSource().previousFrameBuffer(), ssize);
 61 |             memcpy(data + ssize, m_pInterfaces[idx].theOSystem->console().mediaSource().currentFrameBuffer(), ssize);
 62 |         }
 63 | 
 64 |         float step(int idx, const int* action) override
 65 |         {
 66 |             int rewards = 0;
 67 |             for (int i = 0; i < m_repeat; ++i)
 68 |             {
 69 |                 assert(m_pInterfaces[idx].getMinimalActionSet().size() > (*action));
 70 |                 rewards += m_pInterfaces[idx].act(m_pInterfaces[idx].getMinimalActionSet()[*action]);
 71 |                 if (is_done(idx))
 72 |                     break;
 73 |             }
 74 |             return rewards;
 75 |         }
 76 | 
 77 |         bool is_done(int idx) override
 78 |         {
 79 |             return m_pInterfaces[idx].game_over() ||
 80 |                    m_pInterfaces[idx].getEpisodeFrameNumber() - m_numNoops[idx] >= m_maxFrames[idx];
 81 |         }
 82 | 
 83 |         void reset(int i, int numNoops=0, int maxFrames=100000) override
 84 |         {
 85 |             m_pInterfaces[i].reset_game();
 86 |             if(numNoops > 0)
 87 |             {
 88 |                 assert(m_pInterfaces[i].getMinimalActionSet()[0] == Action::PLAYER_A_NOOP);
 89 |                 for (int s = 0; s < numNoops;++s)
 90 |                 {
 91 |                     m_pInterfaces[i].act(Action::PLAYER_A_NOOP);
 92 |                     if (m_pInterfaces[i].game_over())
 93 |                         m_pInterfaces[i].reset_game();
 94 |                 }
 95 |             }
 96 |             // Check if FIRE is part of the minimal action set
 97 |             if (m_pInterfaces[i].getMinimalActionSet()[1] == Action::PLAYER_A_FIRE)
 98 |             {
 99 |                 assert(m_pInterfaces[i].getMinimalActionSet().size() >= 3);
100 |                 int action = 1;
101 |                 step(i, &action);
102 |                 if (m_pInterfaces[i].game_over())
103 |                     m_pInterfaces[i].reset_game();
104 | 
105 |                 action = 2;
106 |                 step(i, &action);
107 |                 if (m_pInterfaces[i].game_over())
108 |                     m_pInterfaces[i].reset_game();
109 |             }
110 |             m_numNoops[i] = m_pInterfaces[i].getEpisodeFrameNumber();
111 |             m_maxFrames[i] = maxFrames;
112 |         }
113 | 
114 |         void get_final_state(float *data, int idx)
115 |         {
116 |             auto ram = m_pInterfaces[idx].getRAM();
117 |             for (auto i = 0; i < RAM_SIZE; ++ i)
118 |                 data[i] = ram.get(i);
119 |         }
120 | 
121 |         string DebugString() override { return "AtariEnvironment"; }
122 |       private:
123 |         ALEInterface* m_pInterfaces;
124 |         bool m_initialized;
125 |         int m_repeat = 4;
126 |         std::vector<int> m_numNoops;
127 |         std::vector<int> m_maxFrames;
128 | };
129 | 
130 | class AtariMakeOp : public EnvironmentMakeOp {
131 |     public:
132 |     explicit AtariMakeOp(OpKernelConstruction* context) : EnvironmentMakeOp(context) {
133 |         OP_REQUIRES_OK(context, context->GetAttr("game", &m_game));
134 |         ale::Logger::setMode(ale::Logger::mode(2));
135 |     }
136 | 
137 |  private:
138 |     virtual Status CreateResource(OpKernelContext* context, BaseEnvironment** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
139 |         AtariEnvironment* env = new AtariEnvironment(batch_size);
140 |         if (env == nullptr)
141 |             return errors::ResourceExhausted("Failed to allocate");
142 |         *ret = env;
143 | 
144 |         const auto thread_pool = context->device()->tensorflow_cpu_worker_threads();
145 |         const int num_threads = std::min(thread_pool->num_threads, batch_size);
146 |         auto f = [&](int thread_id) {
147 |             for(int b =thread_id; b < batch_size;b+=num_threads)
148 |             {
149 |                 env->load_rom(m_game, b);
150 |             }
151 |         };
152 | 
153 |         BlockingCounter counter(num_threads-1);
154 |         for (int i = 1; i < num_threads; ++i) {
155 |             thread_pool->workers->Schedule([&, i]() {
156 |                 f(i);
157 |                 counter.DecrementCount();
158 |             });
159 |         }
160 |         f(0);
161 |         counter.Wait();
162 |         return Status::OK();
163 |     }
164 |     std::string m_game;
165 | };
166 | 
167 | REGISTER_OP("AtariMake")
168 |     .Attr("batch_size: int")
169 |     .Attr("game: string")
170 |     .Attr("container: string = ''")
171 |     .Attr("shared_name: string = ''")
172 |     .Output("handle: resource")
173 |     .SetIsStateful()
174 |     .SetShapeFn(shape_inference::ScalarShape);
175 | 
176 | REGISTER_KERNEL_BUILDER(Name("AtariMake").Device(DEVICE_CPU), AtariMakeOp);
177 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/maze/__init__.py:
--------------------------------------------------------------------------------
1 | from .tf_maze import *


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/maze/hard_maze.txt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 400
 3 | 13
 4 | 36 184
 5 | 0
 6 | 31 20
 7 | 31 20
 8 | 41 5 3 8
 9 | 3 8 4 49
10 | 4 49 57 53
11 | 4 49 7 202
12 | 7 202 195 198
13 | 195 198 186 8
14 | 186 8 39 5
15 | 56 54 56 157
16 | 57 106 158 162
17 | 77 201 108 164
18 | 6 80 33 121
19 | 192 146 87 91
20 | 56 55 133 30
21 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/maze/hard_maze.txt.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/deepneuroevolution/gpu_implementation/gym_tensorflow/maze/hard_maze.txt.npy


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/maze/tf_maze.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = """
 2 | Copyright (c) 2018 Uber Technologies, Inc.
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in
10 | all copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 | THE SOFTWARE.
18 | """
19 | import tensorflow as tf
20 | from gym_tensorflow.tf_env import TensorFlowEnv, gym_tensorflow_module
21 | 
22 | 
23 | class MazeEnv(TensorFlowEnv):
24 |     def __init__(self, batch_size, name=None):
25 |         self.batch_size = batch_size
26 |         self.obs_variable = None
27 |         with tf.variable_scope(name, default_name='MazeInstance'):
28 |             self.instances = gym_tensorflow_module.maze_make(batch_size=batch_size, filename='hard_maze.txt')
29 | 
30 |     @property
31 |     def env_default_timestep_cutoff(self):
32 |         return 400
33 | 
34 |     @property
35 |     def action_space(self):
36 |         return 2
37 | 
38 |     @property
39 |     def discrete_action(self):
40 |         return False
41 | 
42 |     def step(self, action, indices=None, name=None):
43 |         with tf.variable_scope(name, default_name='MazeStep'):
44 |             #action = tf.Print(action, [action], 'action=')
45 |             return gym_tensorflow_module.environment_step(self.instances, indices=indices, action=action)
46 | 
47 |     def reset(self, indices=None, max_frames=None, name=None):
48 |         '''Resets Atari instances with a random noop start (1-30) and set the maximum number of frames for the episode (default 100,000 * frameskip)
49 |         '''
50 |         with tf.variable_scope(name, default_name='MazeReset'):
51 |             noops = tf.random_uniform(tf.shape(indices), minval=1, maxval=31, dtype=tf.int32)
52 |             if max_frames is None:
53 |                 max_frames = self.env_default_timestep_cutoff
54 |             return gym_tensorflow_module.environment_reset(self.instances, indices, noops=noops, max_frames=max_frames)
55 | 
56 |     def observation(self, indices=None, name=None):
57 |         with tf.variable_scope(name, default_name='MazeObservation'):
58 |             with tf.device('/cpu:0'):
59 |                 obs = gym_tensorflow_module.environment_observation(self.instances, indices, T=tf.float32)
60 |                 obs.set_shape((None,) + (11,))
61 |                 #obs = tf.Print(obs, [obs], "obs=")
62 |                 return tf.expand_dims(obs, axis=1)
63 | 
64 |     def final_state(self, indices, name=None):
65 |         with tf.variable_scope(name, default_name='MazeFinalState'):
66 |             return gym_tensorflow_module.maze_final_state(self.instances, indices)
67 | 
68 |     def close(self):
69 |         pass


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from ..tf_env import gym_tensorflow_module
 3 | 
 4 | try:
 5 |     indexed_matmul = gym_tensorflow_module.indexed_batch_mat_mul
 6 | except:
 7 |     import time
 8 |     print('Index MatMul implementation not available. This significantly affects performance')
 9 |     time.sleep(5)
10 |     def indexed_matmul(a, b, idx):
11 |         return tf.matmul(a, tf.gather(b, idx))


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/tf_env.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2018 Uber Technologies, Inc.
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in
10 | all copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 | THE SOFTWARE.
18 | */
19 | 
20 | #ifndef TF_ENV_H_
21 | #define TF_ENV_H_
22 | #include <iostream>
23 | #include "tensorflow/core/framework/resource_mgr.h"
24 | #include "tensorflow/core/framework/op_kernel.h"
25 | 
26 | using namespace tensorflow;
27 | class BaseEnvironment : public ResourceBase
28 | {
29 |   public:
30 |     virtual bool is_done(int idx) = 0;
31 |     virtual void reset(int i, int numNoops = 0, int maxFrames = 100000) = 0;
32 | };
33 | 
34 | template<typename T>
35 | class StepInterface
36 | {
37 |   public:
38 |     virtual TensorShape get_action_shape() = 0;
39 |     virtual float step(int idx, const T* action) = 0;
40 | };
41 | 
42 | template<typename T>
43 | class Environment : public BaseEnvironment
44 | {
45 |   public:
46 |     virtual void get_observation(T* data, int idx) = 0;
47 |     virtual TensorShape get_observation_shape() = 0;
48 | };
49 | 
50 | class EnvironmentMakeOp : public OpKernel {
51 |   public:
52 |     explicit EnvironmentMakeOp(OpKernelConstruction *context);
53 | 
54 |     // The resource is deleted from the resource manager only when it is private
55 |     // to kernel. Ideally the resource should be deleted when it is no longer held
56 |     // by anyone, but it would break backward compatibility.
57 |     virtual ~EnvironmentMakeOp() override;
58 | 
59 |     void Compute(OpKernelContext *context) override LOCKS_EXCLUDED(mu_);
60 | 
61 |   protected:
62 |     // Variables accessible from subclasses.
63 |     tensorflow::mutex mu_;
64 |     ContainerInfo cinfo_ GUARDED_BY(mu_);
65 |     BaseEnvironment* resource_ GUARDED_BY(mu_) = nullptr;
66 |     int batch_size;
67 | 
68 |   private:
69 |     // During the first Compute(), resource is either created or looked up using
70 |     // shared_name. In the latter case, the resource found should be verified if
71 |     // it is compatible with this op's configuration. The verification may fail in
72 |     // cases such as two graphs asking queues of the same shared name to have
73 |     // inconsistent capacities.
74 |     virtual Status VerifyResource(BaseEnvironment *resource);
75 | 
76 |     PersistentTensor handle_ GUARDED_BY(mu_);
77 | 
78 |     virtual Status CreateResource(OpKernelContext *context, BaseEnvironment **ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) = 0;
79 | 
80 |     TF_DISALLOW_COPY_AND_ASSIGN(EnvironmentMakeOp);
81 | };
82 | 
83 | #endif
84 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/tf_env.py:
--------------------------------------------------------------------------------
  1 | __copyright__ = """
  2 | Copyright (c) 2018 Uber Technologies, Inc.
  3 | Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | of this software and associated documentation files (the "Software"), to deal
  5 | in the Software without restriction, including without limitation the rights
  6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | copies of the Software, and to permit persons to whom the Software is
  8 | furnished to do so, subject to the following conditions:
  9 | The above copyright notice and this permission notice shall be included in
 10 | all copies or substantial portions of the Software.
 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 17 | THE SOFTWARE.
 18 | """
 19 | import numpy as np
 20 | import os
 21 | 
 22 | import tensorflow as tf
 23 | 
 24 | gym_tensorflow_module = tf.load_op_library(os.path.join(os.path.dirname(__file__), 'gym_tensorflow.so'))
 25 | 
 26 | 
 27 | class TensorFlowEnv(object):
 28 |     pass
 29 | 
 30 | 
 31 | class PythonEnv(TensorFlowEnv):
 32 |     def step(self, action, indices=None, name=None):
 33 |         with tf.variable_scope(name, default_name='PythonStep'):
 34 |             reward, done = tf.py_func(self._step, [action, indices], [tf.float32, tf.bool])
 35 |             reward.set_shape(indices.get_shape())
 36 |             done.set_shape(indices.get_shape())
 37 |             return reward, done
 38 | 
 39 |     def _reset(self, indices):
 40 |         raise NotImplementedError()
 41 | 
 42 |     def reset(self, indices=None, max_frames=None, name=None):
 43 |         with tf.variable_scope(name, default_name='PythonReset'):
 44 |             return tf.py_func(self._reset, [indices], tf.int64).op
 45 | 
 46 |     def _step(self, action, indices):
 47 |         raise NotImplementedError()
 48 | 
 49 |     def _obs(self, indices):
 50 |         raise NotImplementedError()
 51 | 
 52 |     def observation(self, indices=None, name=None):
 53 |         with tf.variable_scope(name, default_name='PythonObservation'):
 54 |             obs = tf.py_func(self._obs, [indices], tf.float32)
 55 |             obs.set_shape(tuple(indices.get_shape()) + self.observation_space)
 56 |             return tf.expand_dims(obs, axis=1)
 57 | 
 58 |     def final_state(self, indices, name=None):
 59 |         with tf.variable_scope(name, default_name='PythonFinalState'):
 60 |             return tf.zeros([tf.shape(indices)[0], 2], dtype=tf.float32)
 61 | 
 62 |     @property
 63 |     def unwrapped(self):
 64 |         return self
 65 | 
 66 |     def close(self):
 67 |         pass
 68 | 
 69 | 
 70 | class GymEnv(PythonEnv):
 71 |     def __init__(self, name, batch_size):
 72 |         import gym
 73 |         self.env = [gym.make(name) for _ in range(batch_size)]
 74 |         self.obs = [None] * batch_size
 75 | 
 76 |     @property
 77 |     def action_space(self):
 78 |         return np.prod(self.env[0].action_space.shape)
 79 | 
 80 |     @property
 81 |     def observation_space(self):
 82 |         return self.env[0].observation_space.shape
 83 | 
 84 |     @property
 85 |     def discrete_action(self):
 86 |         return False
 87 | 
 88 |     def _step(self, action, indices):
 89 |         assert self.discrete_action == False
 90 |         results = map(lambda i: self.env[indices[i]].step(action[i]), range(len(indices)))
 91 |         obs, reward, done, _ = zip(*results)
 92 |         for i in range(len(indices)):
 93 |             self.obs[indices[i]] = obs[i].astype(np.float32)
 94 | 
 95 |         return np.array(reward, dtype=np.float32), np.array(done, dtype=np.bool)
 96 | 
 97 |     def _reset(self, indices):
 98 |         for i in indices:
 99 |             self.obs[i] = self.env[i].reset().astype(np.float32)
100 |         return 0
101 | 
102 |     def _obs(self, indices):
103 |         return np.array([self.obs[i] for i in indices]).astype(np.float32)
104 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | from .stack_frames import StackFramesWrapper


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/gym_tensorflow/wrappers/stack_frames.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | 
 5 | from gym_tensorflow.tf_env import TensorFlowEnv
 6 | 
 7 | class StackFramesWrapper(TensorFlowEnv):
 8 |     def __init__(self, env, num_stacked_frames=4):
 9 |         self.env = env
10 |         self.num_stacked_frames = num_stacked_frames
11 |         self.obs_variable = tf.Variable(tf.zeros(shape=self.observation_space, dtype=tf.float32), trainable=False)
12 | 
13 |     @property
14 |     def batch_size(self):
15 |         return self.env.batch_size
16 | 
17 |     @property
18 |     def env_default_timestep_cutoff(self):
19 |         return self.env.env_default_timestep_cutoff
20 | 
21 |     @property
22 |     def action_space(self):
23 |         return self.env.action_space
24 | 
25 |     @property
26 |     def observation_space(self):
27 |         return self.env.observation_space[:-1] + (self.env.observation_space[-1] * self.num_stacked_frames, )
28 | 
29 |     @property
30 |     def discrete_action(self):
31 |         return self.env.discrete_action
32 | 
33 |     def stack_observation(self, indices, reset=False):
34 |         obs = self.env.observation(indices)
35 | 
36 |         if reset:
37 |             obs_batch = tf.zeros((tf.shape(indices)[0],) +self.env.observation_space[1:-1] + (self.env.observation_space[-1] * self.num_stacked_frames-1, ), dtype=tf.float32)
38 |             obs_batch = tf.concat([obs_batch, obs], axis=-1)
39 |         else:
40 |             obs_batch = tf.gather(self.obs_variable, indices)
41 |             obs_batch = tf.slice(obs_batch, (0, 0, 0, 1), (-1, -1, -1, -1))
42 |             obs_batch = tf.concat([obs_batch, obs], axis=-1)
43 |         return tf.scatter_update(self.obs_variable, indices, obs_batch)
44 | 
45 |     def step(self, action, indices=None, name=None):
46 |         if indices is None:
47 |             indices = np.arange(self.batch_size)
48 |         rew, done = self.env.step(action=action, indices=indices, name=name)
49 |         with tf.control_dependencies([rew, done]):
50 |             with tf.control_dependencies([self.stack_observation(indices)]):
51 |                 return tf.identity(rew), tf.identity(done)
52 | 
53 |     def reset(self, indices=None, max_frames=None, name=None):
54 |         '''Resets Atari instances with a random noop start (1-30) and set the maximum number of frames for the episode (default 100,000 * frameskip)
55 |         '''
56 |         if indices is None:
57 |             indices = np.arange(self.batch_size)
58 |         reset_op = self.env.reset(indices=indices, max_frames=max_frames, name=name)
59 |         with tf.control_dependencies([reset_op]):
60 |             return self.stack_observation(indices, reset=True).op
61 | 
62 |     def observation(self, indices=None, name=None):
63 |         '''Returns current observation after preprocessing (skip, grayscale, warp, stack).\nMust be called ONCE each time step is called if num_stacked_frames > 1
64 |         '''
65 |         if indices is None:
66 |             indices = np.arange(self.batch_size)
67 |         return tf.gather(self.obs_variable, indices)
68 | 
69 |     def final_state(self, indices, name=None):
70 |         return self.env.final_state(indices, name)
71 | 
72 |     @property
73 |     def unwrapped(self):
74 |         return self.env
75 | 
76 |     def close(self):
77 |         return self.env.close()
78 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/neuroevolution/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/deepneuroevolution/gpu_implementation/neuroevolution/__init__.py


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/neuroevolution/distributed_helpers.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import threading
  3 | from queue import Queue
  4 | from multiprocessing.pool import ApplyResult
  5 | 
  6 | import tabular_logger as tlogger
  7 | 
  8 | class AsyncWorker(object):
  9 |     @property
 10 |     def concurrent_tasks(self):
 11 |         raise NotImplementedError()
 12 | 
 13 |     def run_async(self, task_id, task, callback):
 14 |         raise NotImplementedError()
 15 | 
 16 | 
 17 | class WorkerHub(object):
 18 |     def __init__(self, workers, input_queue, done_queue):
 19 |         self.done_buffer = Queue()
 20 |         self.workers = workers
 21 |         self.available_workers = Queue()
 22 |         self.done_queue = done_queue
 23 |         self._cache = {}
 24 |         self.input_queue = input_queue
 25 | 
 26 |         for w in workers:
 27 |             for t in w.concurrent_tasks:
 28 |                 self.available_workers.put((w, t))
 29 | 
 30 |         self.__initialize_handlers()
 31 | 
 32 |     def __initialize_handlers(self):
 33 |         self._input_handler = threading.Thread(
 34 |             target=WorkerHub._handle_input,
 35 |             args=(self,)
 36 |             )
 37 |         self._input_handler._state = 0
 38 | 
 39 |         self._output_handler = threading.Thread(
 40 |             target=WorkerHub._handle_output,
 41 |             args=(self,)
 42 |             )
 43 |         self._output_handler._state = 0
 44 | 
 45 |     def worker_callback(self, worker, subworker, result):
 46 |         worker_task = (worker, subworker)
 47 |         self.available_workers.put(worker_task)
 48 |         task_id = self._cache[worker_task]
 49 |         del self._cache[worker_task]
 50 |         self.done_buffer.put((task_id, result))
 51 | 
 52 |     @staticmethod
 53 |     def _handle_input(self):
 54 |         try:
 55 |             while True:
 56 |                 worker_task = self.available_workers.get()
 57 |                 if worker_task is None:
 58 |                     tlogger.info('WorkerHub._handle_input done')
 59 |                     break
 60 |                 worker, subworker = worker_task
 61 | 
 62 |                 task = self.input_queue.get()
 63 |                 if task is None:
 64 |                     tlogger.info('WorkerHub._handle_input done')
 65 |                     break
 66 |                 task_id, task = task
 67 |                 self._cache[worker_task] = task_id
 68 | 
 69 |                 worker.run_async(subworker, task, self.worker_callback)
 70 |         except:
 71 |             tlogger.exception('WorkerHub._handle_input exception thrown')
 72 |             raise
 73 | 
 74 |     @staticmethod
 75 |     def _handle_output(self):
 76 |         try:
 77 |             while True:
 78 |                 result = self.done_buffer.get()
 79 |                 if result is None:
 80 |                     tlogger.info('WorkerHub._handle_output done')
 81 |                     break
 82 |                 self.done_queue.put(result)
 83 |         except:
 84 |             tlogger.exception('WorkerHub._handle_output exception thrown')
 85 |             raise
 86 | 
 87 |     def initialize(self):
 88 |         self._input_handler.start()
 89 |         self._output_handler.start()
 90 | 
 91 |     def close(self):
 92 |         self.available_workers.put(None)
 93 |         self.input_queue.put(None)
 94 |         self.done_buffer.put(None)
 95 | 
 96 | class AsyncTaskHub(object):
 97 |     def __init__(self, input_queue=None, results_queue=None):
 98 |         if input_queue is None:
 99 |             input_queue = Queue(64)
100 |         self.input_queue = input_queue
101 |         self._cache = {}
102 |         self.results_queue = None
103 |         if results_queue is not None:
104 |             self.results_queue = results_queue
105 | 
106 |             self._output_handler = threading.Thread(
107 |                 target=AsyncTaskHub._handle_output,
108 |                 args=(self,)
109 |                 )
110 |             self._output_handler.daemon = True
111 |             self._output_handler._state = 0
112 |             self._output_handler.start()
113 | 
114 |     @staticmethod
115 |     def _handle_output(self):
116 |         try:
117 |             while True:
118 |                 result = self.results_queue.get()
119 |                 if result is None:
120 |                     tlogger.info('AsyncTaskHub._handle_output done')
121 |                     break
122 |                 self.put(result)
123 |         except:
124 |             tlogger.exception('AsyncTaskHub._handle_output exception thrown')
125 |             raise
126 | 
127 |     def run_async(self, task, callback=None, error_callback=None):
128 |         result = ApplyResult(self._cache, callback, error_callback)
129 |         self.input_queue.put((result._job, task))
130 |         return result
131 | 
132 |     def put(self, result):
133 |         job, result=result
134 |         self._cache[job]._set(0, (True, result))
135 | 
136 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/neuroevolution/helper.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = """
 2 | Copyright (c) 2018 Uber Technologies, Inc.
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in
10 | all copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 | THE SOFTWARE.
18 | """
19 | 
20 | import numbers
21 | import threading
22 | from queue import Queue
23 | import numpy as np
24 | import math
25 | 
26 | 
27 | class SharedNoiseTable(object):
28 |     def __init__(self):
29 |         import ctypes, multiprocessing
30 |         seed = 123
31 |         count = 250000000  # 1 gigabyte of 32-bit numbers. Will actually sample 2 gigabytes below.
32 |         print('Sampling {} random numbers with seed {}'.format(count, seed))
33 |         self._shared_mem = multiprocessing.Array(ctypes.c_float, count)
34 |         self.noise = np.ctypeslib.as_array(self._shared_mem.get_obj())
35 |         assert self.noise.dtype == np.float32
36 |         self.noise[:] = np.random.RandomState(seed).randn(count)  # 64-bit to 32-bit conversion here
37 |         print('Sampled {} bytes'.format(self.noise.size * 4))
38 | 
39 |     def get(self, i, dim):
40 |         return self.noise[i:i + dim]
41 | 
42 |     def sample_index(self, stream, dim):
43 |         return stream.randint(0, len(self.noise) - dim + 1)
44 | 
45 | 
46 | class ConstantSchedule(object):
47 |     def __init__(self, value):
48 |         self._value = value
49 | 
50 |     def value(self, **kwargs):
51 |         return self._value
52 | 
53 | 
54 | class LinearSchedule(object):
55 |     def __init__(self, schedule, final_p, initial_p, field):
56 |         self.schedule = schedule
57 |         self.field = field
58 |         self.final_p = final_p
59 |         self.initial_p = initial_p
60 | 
61 |     def value(self, **kwargs):
62 |         assert self.field in kwargs, "Argument {} not provided to scheduler Available: {}".format(self.field, kwargs)
63 |         fraction = min(float(kwargs[self.field]) / self.schedule, 1.0)
64 |         return self.initial_p + fraction * (self.final_p - self.initial_p)
65 | 
66 | 
67 | class ExponentialSchedule(object):
68 |     def __init__(self, initial_p, final_p, schedule, field):
69 |         self.initial_p = initial_p
70 |         self.final_p = final_p
71 |         self.schedule = schedule
72 |         self.field = field
73 | 
74 |         self.linear = LinearSchedule(
75 |                 initial_p=math.log(self.initial_p),
76 |                 final_p=math.log(self.final_p),
77 |                 schedule=self.schedule,
78 |                 field=self.field)
79 | 
80 |     def value(self, **kwargs):
81 |         return math.exp(self.linear(**kwargs))
82 | 
83 | 
84 | def make_schedule(args):
85 |     if isinstance(args, numbers.Number):
86 |         return ConstantSchedule(args)
87 |     else:
88 |         return globals()[args['type']](**{key: value for key, value in args.items() if key != 'type'})
89 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/neuroevolution/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .dqn_xavier import SmallDQN, LargeDQN
2 | from .dqn import Model, LargeModel
3 | from .batchnorm import ModelBN, ModelVirtualBN
4 | from .simple import LinearClassifier, SimpleClassifier
5 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/neuroevolution/models/batchnorm.py:
--------------------------------------------------------------------------------
  1 | __copyright__ = """
  2 | Copyright (c) 2018 Uber Technologies, Inc.
  3 | Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | of this software and associated documentation files (the "Software"), to deal
  5 | in the Software without restriction, including without limitation the rights
  6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | copies of the Software, and to permit persons to whom the Software is
  8 | furnished to do so, subject to the following conditions:
  9 | The above copyright notice and this permission notice shall be included in
 10 | all copies or substantial portions of the Software.
 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 17 | THE SOFTWARE.
 18 | """
 19 | 
 20 | from .dqn import Model
 21 | import tensorflow as tf
 22 | 
 23 | 
 24 | class ModelBN(Model):
 25 |     def __init__(self):
 26 |         super(ModelBN, self).__init__()
 27 |         self.nonlin = lambda x: tf.nn.relu(self.batchnorm(x))
 28 |     def batchnorm(self, x):
 29 |         with tf.variable_scope(None, default_name='BatchNorm'):
 30 |             ret = tf.layers.batch_normalization(x, center=False, scale=False, training=True)
 31 | 
 32 |             if len(x.get_shape()) == 4:
 33 |                 b = self.create_bias_variable('b', (1, 1, ret.get_shape()[-1].value))
 34 |             else:
 35 |                 b = self.create_bias_variable('b', (1, ret.get_shape()[-1].value))
 36 |             if self.indices is not None:
 37 |                 b = tf.gather(b, self.indices)
 38 | 
 39 |             ret = ret + b
 40 |             return ret
 41 | 
 42 |     def _make_net(self, x, num_actions):
 43 |         x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4, bias=False))
 44 |         x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2, bias=False))
 45 |         x = self.flattenallbut0(x)
 46 |         x = self.nonlin(self.dense(x, 256, 'fc', bias=False))
 47 | 
 48 |         ret = self.dense(x, num_actions, 'out', std=0.1)
 49 |         return ret
 50 | 
 51 | 
 52 | class ModelVirtualBN(Model):
 53 |     def __init__(self):
 54 |         super(ModelVirtualBN, self).__init__()
 55 |         self.is_ref_batch = False
 56 |         self.nonlin = lambda x: tf.nn.relu(self.batchnorm(x))
 57 |         self.device = None
 58 | 
 59 |     @property
 60 |     def requires_ref_batch(self):
 61 |         return True
 62 | 
 63 |     # This got a little out of hand, but it maintains a set of mean/var variables that are updated on load and used during inference.
 64 |     def batchnorm(self, x):
 65 |         with tf.variable_scope('BatchNorm'):
 66 |             if len(x.get_shape()) == 5:
 67 |                 vbn_mean = tf.get_variable('mean', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False)
 68 |                 vbn_var = tf.get_variable('var', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False)
 69 |             else:
 70 |                 vbn_mean = tf.get_variable('mean', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False)
 71 |                 vbn_var = tf.get_variable('var', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False)
 72 | 
 73 |             if self.is_ref_batch:
 74 |                 mean, var = tf.nn.moments(x, list(range(1, len(x.get_shape())-1)))
 75 |                 var = 1 / tf.sqrt(var + 1e-3)
 76 |                 mean, var = tf.scatter_update(vbn_mean, self.indices, mean), tf.scatter_update(vbn_var, self.indices, var)
 77 |             else:
 78 |                 mean, var = vbn_mean, vbn_var
 79 |             while len(mean.get_shape()) < len(x.get_shape()):
 80 |                 mean, var = tf.expand_dims(mean, 1), tf.expand_dims(var, 1)
 81 | 
 82 |             if self.indices is not None:
 83 |                 mean, var = tf.gather(mean, self.indices), tf.gather(var, self.indices)
 84 | 
 85 |             ret = (x-mean) * var
 86 | 
 87 |             if len(x.get_shape()) == 5:
 88 |                 b = self.create_bias_variable('b', (1, 1, 1, ret.get_shape()[-1].value))
 89 |             else:
 90 |                 b = self.create_bias_variable('b', (1, ret.get_shape()[-1].value))
 91 |             if self.indices is not None:
 92 |                 b = tf.gather(b, self.indices)
 93 |             return ret + b
 94 | 
 95 |     def _make_net(self, x, num_actions, ):
 96 |         with tf.variable_scope('layer1'):
 97 |             x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4, bias=False))
 98 |         with tf.variable_scope('layer2'):
 99 |             x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2, bias=False))
100 |         x = self.flattenallbut0(x)
101 |         with tf.variable_scope('layer3'):
102 |             x = self.nonlin(self.dense(x, 256, 'fc', bias=False))
103 | 
104 |         with tf.variable_scope('layer4'):
105 |             return self.dense(x, num_actions, 'out')
106 | 
107 |     def make_weights(self):
108 |         super(ModelVirtualBN, self).make_weights()
109 |         self.ref_batch_idx = tf.placeholder(tf.int32, ())
110 |         tmp = self.indices
111 |         self.indices = [self.ref_batch_idx]
112 |         with tf.device(self.device):
113 |             with tf.variable_scope(self.scope, reuse=True):
114 |                 ref_batch = tf.stack([self.ref_batch])
115 |                 self.is_ref_batch = True
116 |                 self.ref_batch_assign = self._make_net(ref_batch, self.num_actions)
117 |                 self.is_ref_batch = False
118 |         self.indices = tmp
119 | 
120 |     def load(self, sess, i, *args, **kwargs):
121 |         ret = super(ModelVirtualBN, self).load(sess, i, *args, **kwargs)
122 |         sess.run(self.ref_batch_assign, {self.ref_batch_idx: i})
123 |         return ret
124 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/neuroevolution/models/dqn.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = """
 2 | Copyright (c) 2018 Uber Technologies, Inc.
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in
10 | all copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 | THE SOFTWARE.
18 | """
19 | 
20 | import numpy as np
21 | import tensorflow as tf
22 | from .base import BaseModel
23 | 
24 | 
25 | class Model(BaseModel):
26 |     def create_weight_variable(self, name, shape, std):
27 |         scale_by = std / np.sqrt(np.prod(shape[:-1]))
28 |         return self.create_variable(name, shape, scale_by)
29 | 
30 |     def _make_net(self, x, num_actions):
31 |         x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4))
32 |         x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2))
33 |         x = self.flattenallbut0(x)
34 |         x = self.nonlin(self.dense(x, 256, 'fc'))
35 | 
36 |         return self.dense(x, num_actions, 'out', std=0.1)
37 | 
38 | 
39 | class LargeModel(Model):
40 |     def _make_net(self, x, num_actions):
41 |         x = self.nonlin(self.conv(x, name='conv1', num_outputs=32, kernel_size=8, stride=4, std=1.0))
42 |         x = self.nonlin(self.conv(x, name='conv2', num_outputs=64, kernel_size=4, stride=2, std=1.0))
43 |         x = self.nonlin(self.conv(x, name='conv3', num_outputs=64, kernel_size=3, stride=1, std=1.0))
44 |         x = self.flattenallbut0(x)
45 |         x = self.nonlin(self.dense(x, 512, 'fc'))
46 | 
47 |         return self.dense(x, num_actions, 'out', std=0.1)
48 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/neuroevolution/models/dqn_xavier.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = """
 2 | Copyright (c) 2018 Uber Technologies, Inc.
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in
10 | all copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 | THE SOFTWARE.
18 | """
19 | 
20 | import tensorflow as tf
21 | from .base import BaseModel
22 | 
23 | 
24 | class SmallDQN(BaseModel):
25 |     def _make_net(self, x, num_actions):
26 |         x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4))
27 |         x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2))
28 |         x = self.flattenallbut0(x)
29 |         x = self.nonlin(self.dense(x, 256, 'fc'))
30 | 
31 |         return self.dense(x, num_actions, 'out', std=0.1)
32 | 
33 | 
34 | class LargeDQN(BaseModel):
35 |     def _make_net(self, x, num_actions):
36 |         x = self.nonlin(self.conv(x, name='conv1', num_outputs=32, kernel_size=8, stride=4, std=1.0))
37 |         x = self.nonlin(self.conv(x, name='conv2', num_outputs=64, kernel_size=4, stride=2, std=1.0))
38 |         x = self.nonlin(self.conv(x, name='conv3', num_outputs=64, kernel_size=3, stride=1, std=1.0))
39 |         x = self.flattenallbut0(x)
40 |         x = self.nonlin(self.dense(x, 512, 'fc'))
41 | 
42 |         return self.dense(x, num_actions, 'out', std=0.1)
43 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/neuroevolution/models/simple.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = """
 2 | Copyright (c) 2018 Uber Technologies, Inc.
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in
10 | all copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 | THE SOFTWARE.
18 | """
19 | 
20 | from .dqn import Model
21 | 
22 | 
23 | class LinearClassifier(Model):
24 |     def _make_net(self, x, num_actions):
25 |         x = self.flattenallbut0(x)
26 |         ret = self.dense(x, num_actions, 'out')
27 |         return ret
28 | 
29 | class SimpleClassifier(Model):
30 |     def _make_net(self, x, num_actions):
31 |         x = self.flattenallbut0(x)
32 |         x = self.nonlin(self.dense(x, 16, 'fc1'))
33 |         x = self.nonlin(self.dense(x, 16, 'fc2'))
34 |         ret = self.dense(x, num_actions, 'out', std=0.1)
35 |         return ret
36 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/neuroevolution/optimizers.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = """
 2 | Copyright (c) 2018 Uber Technologies, Inc.
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in
10 | all copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 | THE SOFTWARE.
18 | """
19 | 
20 | import numpy as np
21 | 
22 | 
23 | class Optimizer(object):
24 |     def __init__(self, theta):
25 |         self.theta = theta
26 |         self.dim = len(self.theta)
27 |         self.t = 0
28 | 
29 |     def update(self, globalg):
30 |         self.t += 1
31 |         step = self._compute_step(globalg)
32 |         theta = self.theta
33 |         ratio = np.linalg.norm(step) / np.linalg.norm(theta)
34 |         new_theta = self.theta + step
35 |         self.theta = new_theta
36 |         return ratio, new_theta
37 | 
38 |     def _compute_step(self, globalg):
39 |         raise NotImplementedError
40 | 
41 | 
42 | class SGD(Optimizer):
43 |     def __init__(self, theta, stepsize, momentum=0.9):
44 |         Optimizer.__init__(self, theta)
45 |         self.v = np.zeros(self.dim, dtype=np.float32)
46 |         self.stepsize, self.momentum = stepsize, momentum
47 | 
48 |     def _compute_step(self, globalg):
49 |         # NOTE: different from Open AI to match more common momentum implementations (e.g. Tensorflow)
50 |         # original from OpenAI: self.v = self.momentum * self.v + (1 - self.momentum) * globalg
51 |         self.v = self.momentum * self.v + globalg
52 |         step = -self.stepsize * self.v
53 |         return step
54 | 
55 | 
56 | class Adam(Optimizer):
57 |     def __init__(self, theta, stepsize, beta1=0.9, beta2=0.999, epsilon=1e-08):
58 |         Optimizer.__init__(self, theta)
59 |         self.stepsize = stepsize
60 |         self.beta1 = beta1
61 |         self.beta2 = beta2
62 |         self.epsilon = epsilon
63 |         self.m = np.zeros(self.dim, dtype=np.float32)
64 |         self.v = np.zeros(self.dim, dtype=np.float32)
65 | 
66 |     def _compute_step(self, globalg):
67 |         a = self.stepsize * np.sqrt(1 - self.beta2 ** self.t) / (1 - self.beta1 ** self.t)
68 |         self.m = self.beta1 * self.m + (1 - self.beta1) * globalg
69 |         self.v = self.beta2 * self.v + (1 - self.beta2) * (globalg * globalg)
70 |         step = -a * self.m / (np.sqrt(self.v) + self.epsilon)
71 |         return step
72 | 
73 | 


--------------------------------------------------------------------------------
/deepneuroevolution/gpu_implementation/neuroevolution/tf_util.py:
--------------------------------------------------------------------------------
 1 | __copyright__ = """
 2 | Copyright (c) 2018 Uber Technologies, Inc.
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in
10 | all copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 | THE SOFTWARE.
18 | """
19 | 
20 | import tensorflow as tf
21 | import numpy as np
22 | 
23 | import tabular_logger as tlogger
24 | 
25 | def get_available_gpus():
26 |     from tensorflow.python.client import device_lib
27 |     local_device_protos = device_lib.list_local_devices()
28 |     return [x.name for x in local_device_protos if x.device_type == 'GPU']
29 | 
30 | 
31 | class WorkerSession(object):
32 |     def __init__(self, worker):
33 |         self._worker = worker
34 |     def __enter__(self, *args, **kwargs):
35 |         self._sess = tf.Session(*args, **kwargs)
36 |         self._sess.run(tf.global_variables_initializer())
37 |         self._worker.initialize(self._sess)
38 | 
39 |         tlogger.info(self._worker.model.description)
40 | 
41 |         self.coord = tf.train.Coordinator()
42 |         self.threads = tf.train.start_queue_runners(self._sess, self.coord, start=True)
43 | 
44 |         return self._sess
45 | 
46 |     def __exit__(self, exception_type, exception_value, traceback):
47 |         if exception_type in [tf.errors.OutOfRangeError, StopIteration]:
48 |             exception_type = None
49 |         try:
50 |             self._worker.close()
51 |             self.coord.request_stop()
52 |             self.coord.join(self.threads)
53 |             if self._sess is None:
54 |                 raise RuntimeError('Session is already closed.')
55 |             self._sess.close()
56 |         finally:
57 |             self._sess = None
58 |         return exception_type is None
59 | 


--------------------------------------------------------------------------------
/deepneuroevolution/requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.3
 2 | click==6.7
 3 | gym==0.9.4
 4 | h5py==2.7.0
 5 | mujoco-py==0.5.7
 6 | numpy==1.12.1
 7 | packaging==16.8
 8 | pyglet==1.2.4
 9 | PyOpenGL==3.1.0
10 | pyparsing==2.2.0
11 | redis==2.10.5
12 | requests==2.14.2
13 | six==1.10.0
14 | tensorflow==1.15.2
15 | Werkzeug==0.15.3
16 | 


--------------------------------------------------------------------------------
/deepneuroevolution/scripts/dependency.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # from ami-d8bdebb8
  4 | 
  5 | set -x
  6 | 
  7 | sudo apt-get update
  8 | sudo apt-get install -y build-essential cmake git wget htop
  9 | 
 10 | # Build and install a new version of redis
 11 | # https://www.digitalocean.com/community/tutorials/how-to-install-and-configure-redis-on-ubuntu-16-04
 12 | wget --quiet http://download.redis.io/releases/redis-3.2.7.tar.gz -O redis-3.2.7.tar.gz
 13 | tar -xvzf redis-3.2.7.tar.gz
 14 | cd redis-3.2.7
 15 | make
 16 | sudo make install
 17 | sudo mkdir /etc/redis
 18 | sudo cp redis.conf /etc/redis
 19 | cd ..
 20 | rm -rf redis-3.2.7 redis-3.2.7.tar.gz
 21 | 
 22 | # Set up redis working directory
 23 | sudo sed -ie 's/dir \.\//dir \/var\/lib\/redis/' /etc/redis/redis.conf
 24 | sudo mkdir /var/lib/redis
 25 | sudo chown ubuntu:ubuntu /var/lib/redis
 26 | 
 27 | # rely on firewall for security
 28 | sudo sed -ie "s/bind 127.0.0.1//" /etc/redis/redis.conf
 29 | sudo sed -ie "s/protected-mode yes/protected-mode no/" /etc/redis/redis.conf
 30 | 
 31 | # System settings for redis
 32 | echo "vm.overcommit_memory=1" | sudo tee -a /etc/sysctl.conf
 33 | sudo sysctl vm.overcommit_memory=1
 34 | sudo apt-get install -y hugepages
 35 | echo "sudo hugeadm --thp-never" | sudo tee /etc/profile.d/disable_thp.sh > /dev/null
 36 | . /etc/profile.d/disable_thp.sh
 37 | 
 38 | # Start redis with systemctl
 39 | # sudo sed -ie "s/supervised no/supervised systemd/" /etc/redis/redis.conf
 40 | # ^ doesn't seem to matter; if it's enabled, the logs show "systemd supervision requested, but NOTIFY_SOCKET not found"
 41 | echo "
 42 | [Unit]
 43 | Description=Redis In-Memory Data Store
 44 | After=network.target
 45 | 
 46 | [Service]
 47 | User=ubuntu
 48 | Group=ubuntu
 49 | ExecStart=/usr/local/bin/redis-server /etc/redis/redis.conf
 50 | ExecStop=/usr/local/bin/redis-cli shutdown
 51 | Restart=always
 52 | 
 53 | [Install]
 54 | WantedBy=multi-user.target
 55 | " | sudo tee /etc/systemd/system/redis.service > /dev/null
 56 | sudo systemctl start redis
 57 | 
 58 | # anaconda
 59 | sudo echo 'export PATH=/opt/conda/bin:$PATH' | sudo tee /etc/profile.d/conda.sh > /dev/null
 60 | sudo wget --quiet https://repo.continuum.io/archive/Anaconda3-4.2.0-Linux-x86_64.sh -O ~/anaconda.sh
 61 | sudo /bin/bash ~/anaconda.sh -b -p /opt/conda
 62 | sudo rm -f ~/anaconda.sh
 63 | . /etc/profile.d/conda.sh
 64 | sudo /opt/conda/bin/conda update -y --all
 65 | 
 66 | # additional python dependencies
 67 | sudo /opt/conda/bin/conda install -y numpy scipy opencv
 68 | 
 69 | # Mujoco
 70 | sudo mkdir -p /opt/mujoco
 71 | 
 72 | #######################################################
 73 | # WRITE CODE HERE TO PLACE MUJOCO 1.31 in /opt/mujoco #
 74 | # The key file should be in /opt/mujoco/mjkey.txt     #
 75 | # Mujoco should be installed in /opt/mujoco/mjpro131  #
 76 | #######################################################
 77 | 
 78 | sudo echo 'export MUJOCO_PY_MJKEY_PATH=/opt/mujoco/mjkey.txt' | sudo tee /etc/profile.d/mujoco.sh > /dev/null
 79 | sudo echo 'export MUJOCO_PY_MJPRO_PATH=/opt/mujoco/mjpro131' | sudo tee -a /etc/profile.d/mujoco.sh > /dev/null
 80 | . /etc/profile.d/mujoco.sh
 81 | 
 82 | # ALE
 83 | sudo /opt/conda/bin/conda install -y libgcc # ALE needs this for some reason
 84 | sudo apt-get install -y libsdl1.2-dev
 85 | git clone https://github.com/mgbellemare/Arcade-Learning-Environment
 86 | cd Arcade-Learning-Environment
 87 | mkdir build
 88 | cd build
 89 | cmake .. -DUSE_SDL=on
 90 | make
 91 | cd ..
 92 | sudo /opt/conda/bin/pip install .
 93 | cd ..
 94 | rm -rf Arcade-Learning-Environment
 95 | 
 96 | # Tensorflow 0.11.0
 97 | sudo /opt/conda/bin/conda install -c conda-forge -y tensorflow=0.11.0
 98 | 
 99 | # Gym
100 | sudo apt-get install -y zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev libboost-all-dev libsdl2-dev swig freeglut3 libgl1 libglu1
101 | sudo /opt/conda/bin/conda install -y pyopengl
102 | sudo /opt/conda/bin/pip install \
103 |     'gym[atari,classic_control,mujoco]' \
104 |     PyYAML \
105 |     six==1.10.0 \
106 |     awscli
107 | 
108 | # pip redis
109 | sudo /opt/conda/bin/pip install redis
110 | 
111 | # cleanup
112 | sudo /opt/conda/bin/conda clean -y --all
113 | sudo apt-get clean
114 | sudo rm -rf /var/lib/apt/lists/*
115 | # cleanup pip cache?
116 | 
117 | set +x
118 | 


--------------------------------------------------------------------------------
/deepneuroevolution/scripts/local_env_setup.sh:
--------------------------------------------------------------------------------
1 | # your environment setup for a new shell window
2 | echo Setting up local environment
3 | . env/bin/activate
4 | 


--------------------------------------------------------------------------------
/deepneuroevolution/scripts/local_run_exp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | NAME=exp_`date "+%m_%d_%H_%M_%S"`
 3 | ALGO=$1
 4 | EXP_FILE=$2
 5 | tmux new -s $NAME -d
 6 | tmux send-keys -t $NAME '. scripts/local_env_setup.sh' C-m
 7 | tmux send-keys -t $NAME 'python -m es_distributed.main master --master_socket_path /tmp/es_redis_master.sock --algo '$ALGO' --exp_file '"$EXP_FILE" C-m
 8 | tmux split-window -t $NAME
 9 | tmux send-keys -t $NAME '. scripts/local_env_setup.sh' C-m
10 | tmux send-keys -t $NAME 'python -m es_distributed.main workers --master_host localhost --relay_socket_path /tmp/es_redis_relay.sock --algo '$ALGO' --num_workers 10' C-m
11 | tmux a -t $NAME
12 | 


--------------------------------------------------------------------------------
/deepneuroevolution/scripts/local_run_redis.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | tmux new -s redis -d
3 | tmux send-keys -t redis 'redis-server redis_config/redis_master.conf' C-m
4 | tmux split-window -t redis
5 | tmux send-keys -t redis 'redis-server redis_config/redis_local_mirror.conf' C-m
6 | tmux a -t redis
7 | 


--------------------------------------------------------------------------------
/deepneuroevolution/scripts/packer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "variables": {
 3 |     "aws_access_key": "",
 4 |     "aws_secret_key": ""
 5 |   },
 6 |   "builders": [
 7 |     {
 8 |       "type": "amazon-ebs",
 9 |       "access_key": "{{user `aws_access_key`}}",
10 |       "secret_key": "{{user `aws_secret_key`}}",
11 |       "region": "us-west-1",
12 |       "source_ami": "ami-d8bdebb8",
13 |       "instance_type": "t2.micro",
14 |       "ssh_username": "ubuntu",
15 |       "ami_name": "es-dist-{{isotime \"2006-01-02-03-04-05\"}}",
16 |       "ami_block_device_mappings": [
17 |         {
18 |           "device_name": "/dev/sda1",
19 |           "volume_size": 40,
20 |           "delete_on_termination": true
21 |         }
22 |       ],
23 |       "launch_block_device_mappings": [
24 |         {
25 |           "device_name": "/dev/sda1",
26 |           "volume_size": 40,
27 |           "delete_on_termination": true
28 |         }
29 |       ],
30 |       "ami_regions": [
31 |         "us-west-1",
32 |         "us-west-2",
33 |         "us-east-1",
34 |         "us-east-2",
35 |         "eu-west-1",
36 |         "eu-central-1",
37 |         "ap-northeast-1",
38 |         "ap-northeast-2",
39 |         "ap-southeast-1",
40 |         "ap-southeast-2",
41 |         "ap-south-1",
42 |         "sa-east-1"
43 |       ]
44 |     }
45 |   ],
46 |   "provisioners": [
47 |     {
48 |       "type": "shell",
49 |       "scripts": [
50 |         "dependency.sh"
51 |       ]
52 |     }
53 |   ]
54 | }
55 | 


--------------------------------------------------------------------------------
/deepneuroevolution/scripts/viz.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | 
 4 | @click.command()
 5 | @click.argument('env_id')
 6 | @click.argument('policy_file')
 7 | @click.option('--record', is_flag=True)
 8 | @click.option('--stochastic', is_flag=True)
 9 | @click.option('--extra_kwargs')
10 | def main(env_id, policy_file, record, stochastic, extra_kwargs):
11 |     import gym
12 |     from gym import wrappers
13 |     import tensorflow as tf
14 |     from es_distributed.policies import MujocoPolicy, ESAtariPolicy
15 |     from es_distributed.atari_wrappers import ScaledFloatFrame, wrap_deepmind
16 |     from es_distributed.es import get_ref_batch
17 |     import numpy as np
18 | 
19 |     is_atari_policy = "NoFrameskip" in env_id
20 | 
21 |     env = gym.make(env_id)
22 |     if is_atari_policy:
23 |         env = wrap_deepmind(env)
24 | 
25 |     if record:
26 |         import uuid
27 |         env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True)
28 | 
29 |     if extra_kwargs:
30 |         import json
31 |         extra_kwargs = json.loads(extra_kwargs)
32 | 
33 |     with tf.Session():
34 |         if is_atari_policy:
35 |             pi = ESAtariPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
36 |             pi.set_ref_batch(get_ref_batch(env, batch_size=128))
37 |         else:
38 |             pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
39 |             
40 |         while True:
41 |             if is_atari_policy:
42 |                 rews, t, novelty_vector = pi.rollout(env, render=True, random_stream=np.random if stochastic else None)
43 |             print('return={:.4f} len={}'.format(rews.sum(), t))
44 | 
45 |             if record:
46 |                 env.close()
47 |                 return
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     main()
52 | 


--------------------------------------------------------------------------------
/deepneuroevolution/train_large.csv:
--------------------------------------------------------------------------------
 1 | game,state
 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3
 3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2
 4 | SonicTheHedgehog-Genesis,GreenHillZone.Act3
 5 | SonicTheHedgehog-Genesis,GreenHillZone.Act1
 6 | SonicTheHedgehog-Genesis,StarLightZone.Act2
 7 | SonicTheHedgehog-Genesis,StarLightZone.Act1
 8 | SonicTheHedgehog-Genesis,MarbleZone.Act2
 9 | SonicTheHedgehog-Genesis,MarbleZone.Act1
10 | SonicTheHedgehog-Genesis,MarbleZone.Act3
11 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2
12 | SonicTheHedgehog-Genesis,LabyrinthZone.Act2
13 | SonicTheHedgehog-Genesis,LabyrinthZone.Act1
14 | SonicTheHedgehog-Genesis,LabyrinthZone.Act3
15 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act1
16 | SonicTheHedgehog2-Genesis,EmeraldHillZone.Act2
17 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2
18 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act1
19 | SonicTheHedgehog2-Genesis,MetropolisZone.Act1
20 | SonicTheHedgehog2-Genesis,MetropolisZone.Act2
21 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1
22 | SonicTheHedgehog2-Genesis,OilOceanZone.Act2
23 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act2
24 | SonicTheHedgehog2-Genesis,MysticCaveZone.Act1
25 | SonicTheHedgehog2-Genesis,HillTopZone.Act1
26 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1
27 | SonicTheHedgehog2-Genesis,WingFortressZone
28 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2
29 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act1
30 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2
31 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act2
32 | SonicAndKnuckles3-Genesis,CarnivalNightZone.Act1
33 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act1
34 | SonicAndKnuckles3-Genesis,MarbleGardenZone.Act2
35 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2
36 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act1
37 | SonicAndKnuckles3-Genesis,DeathEggZone.Act1
38 | SonicAndKnuckles3-Genesis,DeathEggZone.Act2
39 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1
40 | SonicAndKnuckles3-Genesis,SandopolisZone.Act1
41 | SonicAndKnuckles3-Genesis,SandopolisZone.Act2
42 | SonicAndKnuckles3-Genesis,HiddenPalaceZone
43 | SonicAndKnuckles3-Genesis,HydrocityZone.Act2
44 | SonicAndKnuckles3-Genesis,IcecapZone.Act1
45 | SonicAndKnuckles3-Genesis,IcecapZone.Act2
46 | SonicAndKnuckles3-Genesis,AngelIslandZone.Act1
47 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act2
48 | SonicAndKnuckles3-Genesis,LaunchBaseZone.Act1


--------------------------------------------------------------------------------
/deepneuroevolution/train_small.csv:
--------------------------------------------------------------------------------
 1 | game,state
 2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3
 3 | SonicTheHedgehog-Genesis,StarLightZone.Act2
 4 | SonicTheHedgehog-Genesis,ScrapBrainZone.Act2
 5 | SonicTheHedgehog2-Genesis,ChemicalPlantZone.Act2
 6 | SonicTheHedgehog2-Genesis,OilOceanZone.Act1
 7 | SonicTheHedgehog2-Genesis,CasinoNightZone.Act1
 8 | SonicTheHedgehog2-Genesis,AquaticRuinZone.Act2
 9 | SonicAndKnuckles3-Genesis,LavaReefZone.Act2
10 | SonicAndKnuckles3-Genesis,MushroomHillZone.Act2
11 | SonicAndKnuckles3-Genesis,FlyingBatteryZone.Act1


--------------------------------------------------------------------------------
/deepneuroevolution/train_spring_yard.csv:
--------------------------------------------------------------------------------
1 | game,state
2 | SonicTheHedgehog-Genesis,SpringYardZone.Act3
3 | SonicTheHedgehog-Genesis,SpringYardZone.Act2
4 | SonicTheHedgehog-Genesis,SpringYardZone.Act1


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/README.md:
--------------------------------------------------------------------------------
 1 | ## Visual Inspector for NeuroEvolution (VINE)
 2 | 
 3 | This repo contains implementations of VINE, i.e., Visual Inspector for NeuroEvolution, an interactive data visualization tool for neuroevolution. An article describing this visualization tool can be found [here](https://eng.uber.com/vine/).
 4 | 
 5 | ### Dependencies that need to be downloaded by end-user from a third party
 6 | 
 7 | In addition to requirements in `../requirements.txt`:
 8 | 
 9 | * [Matplotlib](https://matplotlib.org/) -- version 2.0.2
10 | * [Sklearn](http://scikit-learn.org/stable/) -- version 0.19.1
11 | * [Pandas](https://pandas.pydata.org/) -- version 0.22.0
12 | * [Colour](https://github.com/vaab/colour) -- version 0.1.5
13 | 
14 | ### Visualize the pseudo-offspring clouds
15 | 
16 | __Example 1__: visualize the sample Mujoco Humanoid 2D BC (i.e., final x-y location) data for Generations 90 to 99
17 | ```
18 | python -m main_mujoco 90 99 sample_data/mujoco/final_xy_bc/
19 | ```
20 | This will bring up the GUI which consists of two interrelated plots: a pseudo-offspring cloud plot, and a fitness plot, similar to Figure 2 of the [article](https://eng.uber.com/vine/), which is described in detail there.
21 | 
22 | __Example 2__: click `Movie` button on the GUI to generate a visualization of the moving cloud similar to Figure 3 of the [article](https://eng.uber.com/vine/), which can be saved as a movie clip by checking `save movie` checkbox.
23 | 
24 | __Example 3__: right click any point of the pseudo-offspring cloud to view videos of the corresponding agent’s deterministic and stochastic behaviors (only available for Generation 97 in `sample_data`). Follow the steps (all "clicks" are right click) illustrated in Figure 5 of the [article](https://eng.uber.com/vine/).
25 | 
26 | 
27 | To see HELP for the complete description of all available options (e.g., multiple BCs, and hi-dimensional BCs):
28 | ```
29 | python -m main_mujoco --help
30 | ```
31 | 
32 | 
33 | ### Using dimensionality reduction to process high-dimensional BC
34 | 
35 | Assume you would like to reduce 2000D BCs to 2D for Generations 0 to 99 using PCA:
36 | ```
37 | python -m process_bc 0 99 <path_to_hd_bc> 2000 --method pca
38 | ```
39 | The reduced BC data is stored at `<path_to_hd_bc>/reduced_pca`
40 | 
41 | To see HELP for the complete description of all available options:
42 | ```
43 | python -m process_bc --help
44 | ```
45 | 
46 | ### Create and visualize your own data
47 | 
48 | 1. Choose proper behavior characterizations (BCs) (refer to the [article](https://eng.uber.com/vine/) for examples).
49 | 2. Moderately modify your GA or ES code that dump out the BCs during neuroevolution.
50 |    Examples of BC choices and modified version of GA and ES, namely, `es_modified.py`, `ga_modified.py`, are privode in `../es_distributed` for your references.
51 | 3. If applicable, using dimensionality reduction (see above) to reduce hi-dimensional BCs to 2D.
52 | 4. Create (if necessary) and run main_<XXX>.py file to launch GUI.
53 |    `main_mujoco.py` or `main_atari.py` can be used directly or used as a template for most of your use cases.
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/dimen_red/assemble.py:
--------------------------------------------------------------------------------
 1 | """Assemble hi-D BCs from all generations"""
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | def assemble(start_iter, end_iter, path, *, bc_dim, ds_ratio):
 6 |     """Assemble hi-D BCs from all generations"""
 7 |     print("Assembling {}-D BCs... with ds_ratio={}".format(bc_dim, ds_ratio))
 8 | 
 9 |     X, parent_options, child_options, labels = [], [], [], []
10 |     for gen in range(start_iter, end_iter+1):
11 |         print('processing iter {}...'.format(gen))
12 |         parent_file = '{}/snapshots/snapshot_gen_{:04d}/snapshot_parent_{:04d}.dat'.format(path, gen, gen)
13 |         pdata = np.loadtxt(parent_file)
14 | 
15 |         p_bc = pdata[:bc_dim]
16 |         X.append(p_bc)
17 |         parent_options.append(pdata[bc_dim:])
18 |         labels.append(pdata[bc_dim:bc_dim+1])
19 | 
20 |         offspring_file = '{}/snapshots/snapshot_gen_{:04d}/snapshot_offspring_{:04d}.dat'.format(path, gen, gen)
21 |         odata = pd.read_csv(offspring_file, sep=' ', header=None).values
22 | 
23 |         num_rows = odata.shape[0]
24 |         selected = list(range(num_rows))
25 |         if num_rows >= 10 and ds_ratio < 1.0:
26 |             rndperm = np.random.permutation(num_rows)
27 |             n_ds = max(10, int(num_rows*ds_ratio))
28 |             selected = rndperm[:n_ds]
29 | 
30 |         o_bc = odata[selected, :bc_dim]
31 |         num_os = o_bc.shape[0]
32 |         X.append(o_bc)
33 |         child_options.append(odata[selected, bc_dim:])
34 |         labels.append(odata[selected, bc_dim:bc_dim+1])
35 | 
36 |     return np.vstack(X), parent_options, child_options, num_os, np.vstack(labels)
37 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/dimen_red/disassemble.py:
--------------------------------------------------------------------------------
 1 | """disassemble into files by generation"""
 2 | import os
 3 | import os.path as osp
 4 | import fnmatch
 5 | from shutil import copyfile
 6 | import numpy as np
 7 | 
 8 | 
 9 | def disassemble(X, parent_options, child_options, method, *,
10 |                 start_iter, end_iter, path, chunk, copy_file_patterns):
11 |     """Disassemble reduced BCs into each generation"""
12 |     print("Disassembling and writing ...")
13 | 
14 |     assert len(parent_options) == len(child_options) == end_iter - start_iter + 1
15 |     num_gens = len(parent_options)
16 |     dir_name = "reduced_{}".format(method)
17 | 
18 |     for i in range(num_gens):
19 |         gen = i + start_iter
20 |         print('processing iter {}...'.format(gen))
21 | 
22 |         dir_name_gen = '{}/{}/snapshots/snapshot_gen_{:04d}'.format(path, dir_name, gen)
23 |         if not osp.exists(dir_name_gen):
24 |             os.makedirs(dir_name_gen)
25 | 
26 |         pfile_name = '{}/snapshot_parent_{:04d}.dat'.format(dir_name_gen, gen)
27 |         X_pdata = np.hstack((X[i*chunk, :], parent_options[i]))
28 |         len_pdata = len(X_pdata)
29 |         np.savetxt(pfile_name, X_pdata.reshape(1, len_pdata))
30 | 
31 |         ofile_name = '{}/{}/snapshots/snapshot_gen_{:04d}/snapshot_offspring_{:04d}.dat'.format(path, dir_name, gen, gen)
32 |         X_osdata = np.hstack((X[i*chunk+1:(i+1)*chunk, :], child_options[i]))
33 |         np.savetxt(ofile_name, X_osdata)
34 | 
35 |         if copy_file_patterns is not None:
36 |             src_dir = '{}/snapshots/snapshot_gen_{:04d}'.format(path, gen)
37 |             for pattern in copy_file_patterns:
38 |                 for file in os.listdir(src_dir):
39 |                     if fnmatch.fnmatch(file, pattern):
40 |                         copyfile('{}/{}'.format(src_dir, file),
41 |                                  '{}/{}'.format(dir_name_gen, file))
42 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/dimen_red/reduce.py:
--------------------------------------------------------------------------------
 1 | """dimensionality reduction"""
 2 | from sklearn import decomposition, manifold, discriminant_analysis
 3 | import numpy as np
 4 | 
 5 | 
 6 | def reduce_dim(X, *, labels, method='pca'):
 7 |     """dimensionality reduction"""
 8 |     print("Reducing ...")
 9 | 
10 |     if method == 'downsampling':
11 |         X_r = X
12 |     elif method == 'lda':
13 |         X2 = X.copy()
14 |         X2.flat[::X.shape[1] + 1] += 0.01
15 |         X_r = discriminant_analysis.LinearDiscriminantAnalysis(n_components=2).fit_transform(X2, labels)
16 |     elif method == 'tsne':
17 |         X_pca = decomposition.PCA(n_components=50).fit_transform(X)
18 |         X_r = manifold.TSNE(n_components=2, perplexity=30,
19 |                             verbose=2, random_state=0, n_iter=1000).fit_transform(X_pca)
20 |     elif method == 'pca':
21 |         X_r = decomposition.PCA(n_components=2).fit_transform(X)
22 |     elif method == 'two_end':
23 |         nrow, ncol = X.shape
24 |         idx_last_x, idx_last_y = int(ncol / 2 - 1), -1
25 |         X_r = np.hstack((X[:, idx_last_x].reshape(nrow, 1), X[:, idx_last_y].reshape(nrow, 1)))
26 |     else:
27 |         raise NotImplementedError
28 | 
29 |     print('Reduction Completed! X.shape={} X_r.shape={}'.format(X.shape, X_r.shape))
30 |     return X_r
31 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/deepneuroevolution/visual_inspector/figure_base/__init__.py


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_base/buttons.py:
--------------------------------------------------------------------------------
  1 | """buttons"""
  2 | from matplotlib.widgets import Button, CheckButtons, RadioButtons
  3 | from figure_base.figure_control import FigureControl
  4 | 
  5 | 
  6 | class _CheckButtons(CheckButtons):
  7 |     def enforce(self, bval, index):
  8 |         print(bval)
  9 |         if 0 > index >= len(self.labels):
 10 |             raise ValueError("Invalid CheckButton index: %d" % index)
 11 | 
 12 |         l1, l2 = self.lines[index]
 13 |         l1.set_visible(bval)
 14 |         l2.set_visible(bval)
 15 | 
 16 |         if self.drawon:
 17 |             self.ax.figure.canvas.draw()
 18 | 
 19 | class _RadioButtons(RadioButtons):
 20 |     def __init__(self, *args, **kwargs):
 21 |         self.val2index = kwargs.pop('val2index')
 22 |         RadioButtons.__init__(self, *args, **kwargs)
 23 | 
 24 |     def enforce(self, val):
 25 |         index = self.val2index[val]
 26 | 
 27 |         if 0 > index >= len(self.labels):
 28 |             raise ValueError("Invalid RadioButton index: %d" % index)
 29 | 
 30 |         self.value_selected = self.labels[index].get_text()
 31 | 
 32 |         for i, p in enumerate(self.circles):
 33 |             if i == index:
 34 |                 color = self.activecolor
 35 |             else:
 36 |                 color = self.ax.get_facecolor()
 37 |             p.set_facecolor(color)
 38 | 
 39 |         if self.drawon:
 40 |             self.ax.figure.canvas.draw()
 41 | 
 42 | class ButtonArea():
 43 |     def __init__(self, fig, visible_range):
 44 | 
 45 |         self.axhome = fig.add_axes([0.46, 0.01, 0.08, 0.05])
 46 |         self.axreset = fig.add_axes([0.55, 0.01, 0.08, 0.05])
 47 |         self.axmovie = fig.add_axes([0.64, 0.01, 0.08, 0.05])
 48 |         self.axprev = fig.add_axes([0.73, 0.01, 0.08, 0.05])
 49 |         self.axnext = fig.add_axes([0.82, 0.01, 0.08, 0.05])
 50 |         self.bhome = Button(self.axhome, 'Home')
 51 |         self.bhome.on_clicked(self.home)
 52 |         self.breset = Button(self.axreset, 'Reset')
 53 |         self.breset.on_clicked(self.reset)
 54 |         self.bmovie = Button(self.axmovie, 'Movie')
 55 |         self.bmovie.on_clicked(self.movie)
 56 |         self.bnext = Button(self.axnext, 'Next')
 57 |         self.bnext.on_clicked(self.next)
 58 |         self.bprev = Button(self.axprev, 'Prev')
 59 |         self.bprev.on_clicked(self.prev)
 60 | 
 61 | 
 62 |         self.checkb_ax = fig.add_axes([0., 0.0, 0.1, 0.09])
 63 |         self.checkb_ax.axis('off')
 64 | 
 65 |         self.checkb_ax_pace = fig.add_axes([0.1, 0.0, 0.1, 0.09])
 66 |         self.checkb_ax_pace.axis('off')
 67 | 
 68 |         self.checkb_ax_savem = fig.add_axes([0.2, 0.0, 0.1, 0.09])
 69 |         self.checkb_ax_savem.axis('off')
 70 | 
 71 |         curr_stoc = FigureControl.offspring_stochastic
 72 |         self.check = _CheckButtons(self.checkb_ax, ['Random\nSeed'], [curr_stoc])
 73 |         self.check.on_clicked(FigureControl.stochastic)
 74 | 
 75 |         self.check_pace = _CheckButtons(self.checkb_ax_pace, ['Fast\nPace'],
 76 |                                         [FigureControl.step > 1])
 77 |         self.check_pace.on_clicked(FigureControl.fastMove)
 78 | 
 79 |         self.check_savem = _CheckButtons(self.checkb_ax_savem,
 80 |                                          ['Save\nMovie'], [FigureControl.save_movie])
 81 |         self.check_savem.on_clicked(FigureControl.saveMovie)
 82 | 
 83 |         if not visible_range:
 84 |             self.rb_ax = fig.add_axes([0, 0.8, 0.15, 0.15])
 85 |             self.rb_ax.axis('off')
 86 |             self.radio = _RadioButtons(self.rb_ax, ('1', '2', '3'), val2index={1:0, 2:1, 3:2})
 87 |             self.radio.on_clicked(FigureControl.pickVR)
 88 | 
 89 |         self.rb_ax_cloud = fig.add_axes([0, 0.6, 0.15, 0.15])
 90 |         self.rb_ax_cloud.axis('off')
 91 |         self.radio_cloud = _RadioButtons(self.rb_ax_cloud, ('All', 'Top', 'None'),
 92 |                                          val2index={'AllCloud':0, 'TopOnly':1, 'NoCloud':2})
 93 |         self.radio_cloud.on_clicked(FigureControl.pickCloud)
 94 | 
 95 |     def eligibleClick(self, buttonClicked):
 96 |         if buttonClicked == "next":
 97 |             return (not FigureControl.isVisible(FigureControl.maxPossibleGenNumber),
 98 |                     "max gen already displayed")
 99 |         elif buttonClicked == "prev":
100 |             return (not FigureControl.isVisible(FigureControl.minPossibleGenNumber),
101 |                     "min gen already displayed")
102 |         elif buttonClicked == "movie":
103 |             return True, ""
104 |         else:
105 |             return False, "bad button"
106 | 
107 |     def next(self, event=None):
108 |         ok, err = self.eligibleClick("next")
109 |         if not ok:
110 |             FigureControl.print_error(err)
111 |         else:
112 |             print("showing nextGen")
113 |             nextGenNum = FigureControl.minPossibleGenNumber
114 |             if FigureControl.numVisibleGenNumber() > 0:
115 |                 nextGenNum = min(FigureControl.maxVisibleGenNumber() + FigureControl.step,
116 |                                  FigureControl.maxPossibleGenNumber)
117 |             FigureControl.makeGenVisible(nextGenNum, True, "next")
118 | 
119 |     def prev(self, event=None):
120 |         ok, err = self.eligibleClick("prev")
121 |         if not ok:
122 |             FigureControl.print_error(err)
123 |         else:
124 |             print("showing prevGen")
125 |             nextGenNum = FigureControl.maxPossibleGenNumber
126 |             if FigureControl.numVisibleGenNumber() > 0:
127 |                 nextGenNum = max(FigureControl.minVisibleGenNumber() - FigureControl.step,
128 |                                  FigureControl.minPossibleGenNumber)
129 |             FigureControl.makeGenVisible(nextGenNum, True, "prev")
130 | 
131 |     def movie(self, event):
132 |         FigureControl.movie(event)
133 | 
134 |     def reset(self, event=None):
135 |         #t1 = time.time()
136 | 
137 |         if FigureControl.numVisibleGenNumber() != 0:
138 |             while FigureControl.numVisibleGenNumber() != 0:
139 |                 genNumber = FigureControl.maxVisibleGenNumber()
140 |                 print("cleaning ...", genNumber)
141 |                 FigureControl.hideOffSprings(genNumber)
142 | 
143 |         FigureControl.clear_labels()
144 |         self.home()
145 | 
146 |     def home(self, event=None):
147 |         FigureControl.set_home()
148 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_base/figure_control.py:
--------------------------------------------------------------------------------
  1 | """Main figure components"""
  2 | import numpy as np
  3 | import figure_base.settings as gs
  4 | import matplotlib.pyplot as p
  5 | 
  6 | 
  7 | class FigureControl():
  8 |     """Central control for all figures"""
  9 |     @classmethod
 10 |     def init(cls, start_iter, end_iter, visible_range):
 11 |         cls.minPossibleGenNumber = start_iter
 12 |         cls.maxPossibleGenNumber = end_iter
 13 |         cls.setOfVisibleGenNumber = set()
 14 |         cls.cloudMode = 'AllCloud'
 15 |         cls.offspring_stochastic = False
 16 |         cls.save_movie = False
 17 | 
 18 |         cls.step = 1
 19 |         if cls.maxPossibleGenNumber - cls.minPossibleGenNumber >= 100:
 20 |             cls.step = int((cls.maxPossibleGenNumber - cls.minPossibleGenNumber)/10)
 21 | 
 22 |         cls.maxVisibleRangeSize = 1
 23 |         if visible_range:
 24 |             visible_range = int(visible_range)
 25 |             cls.maxVisibleRangeSize = max(1, visible_range)
 26 | 
 27 |     @classmethod
 28 |     def numVisibleGenNumber(cls):
 29 |         return len(cls.setOfVisibleGenNumber)
 30 | 
 31 |     @classmethod
 32 |     def minVisibleGenNumber(cls):
 33 |         return min(cls.setOfVisibleGenNumber)
 34 | 
 35 |     @classmethod
 36 |     def maxVisibleGenNumber(cls):
 37 |         return max(cls.setOfVisibleGenNumber)
 38 | 
 39 |     @classmethod
 40 |     def isVisible(cls, thisGenNumber):
 41 |         return thisGenNumber in cls.setOfVisibleGenNumber
 42 | 
 43 |     @classmethod
 44 |     def plotOffSprings(cls, thisGenNumber):
 45 |         cls.setOfVisibleGenNumber.add(thisGenNumber)
 46 |         for cplot in gs.cloud_plots:
 47 |             cplot.plotOffSprings(thisGenNumber)
 48 |         gs.fitness_plot.markVisible(thisGenNumber, True)
 49 | 
 50 |     @classmethod
 51 |     def hideOffSprings(cls, thisGenNumber):
 52 |         cls.setOfVisibleGenNumber.remove(thisGenNumber)
 53 |         for cplot in gs.cloud_plots:
 54 |             cplot.hideOffSprings(thisGenNumber)
 55 |         gs.fitness_plot.markVisible(thisGenNumber, False)
 56 | 
 57 |     @classmethod
 58 |     def applyVisibleRange(cls, mode, newGen):
 59 |         print("calling applyVisibleRange")
 60 |         while cls.numVisibleGenNumber() >= cls.maxVisibleRangeSize:
 61 |             minVG, maxVG = cls.minVisibleGenNumber(), cls.maxVisibleGenNumber()
 62 |             if mode == "next":
 63 |                 drop_gen = minVG
 64 |             elif mode == "prev":
 65 |                 drop_gen = maxVG
 66 |             elif mode == "dist":
 67 |                 dist_minVG, dist_maxVG = np.abs(newGen - minVG), np.abs(newGen - maxVG)
 68 |                 drop_gen = minVG if dist_minVG >= dist_maxVG else maxVG
 69 |             print("hiding Gen {}", drop_gen)
 70 |             cls.hideOffSprings(drop_gen)
 71 | 
 72 |     @classmethod
 73 |     def pickVR(cls, label):
 74 |         hzdict = {'1': 1, '2': 2, '3': 3}
 75 |         cls.maxVisibleRangeSize = hzdict[label]
 76 |         print("you select {}".format(cls.maxVisibleRangeSize))
 77 |         for cplot in gs.cloud_plots:
 78 |             cplot.button_area.radio.enforce(cls.maxVisibleRangeSize)
 79 | 
 80 |     @classmethod
 81 |     def pickCloud(cls, label):
 82 |         hzdict = {'All': 'AllCloud', 'Top': 'TopOnly', 'None': 'NoCloud'}
 83 |         selectedMode = hzdict[label]
 84 |         oldMode = cls.cloudMode
 85 |         cls.cloudMode = selectedMode
 86 |         print("you select {} vs old {}".format(selectedMode, oldMode))
 87 |         for cplot in gs.cloud_plots:
 88 |             cplot.button_area.radio_cloud.enforce(cls.cloudMode)
 89 |         if oldMode != cls.cloudMode and cls.numVisibleGenNumber() > 0:
 90 |             for gen in cls.setOfVisibleGenNumber:
 91 |                 cls.hideOffSprings(gen)
 92 |                 cls.plotOffSprings(gen)
 93 |             cls.draw_all_cloud_plots()
 94 | 
 95 |     @classmethod
 96 |     def stochastic(cls, label):
 97 |         oldstoc = cls.offspring_stochastic
 98 |         cls.offspring_stochastic = not oldstoc
 99 |         print("offspring_stochastic_seed: ", cls.offspring_stochastic)
100 |         for cplot in gs.cloud_plots:
101 |             cplot.button_area.check.enforce(cls.offspring_stochastic, 0)
102 | 
103 |     @classmethod
104 |     def saveMovie(cls, label):
105 |         oldstoc = cls.save_movie
106 |         cls.save_movie = not oldstoc
107 |         print("save movie: ", cls.save_movie)
108 |         for cplot in gs.cloud_plots:
109 |             cplot.button_area.check_savem.enforce(cls.save_movie, 0)
110 | 
111 |     @classmethod
112 |     def fastMove(cls, label):
113 |         if cls.step > 1:
114 |             cls.step = 1
115 |         else:
116 |             cls.step = int((cls.maxPossibleGenNumber - cls.minPossibleGenNumber)/10)
117 |             cls.step = max(cls.step, 1)
118 |         print("current step size: ", cls.step)
119 |         for cplot in gs.cloud_plots:
120 |             cplot.button_area.check_pace.enforce(cls.step > 1, 0)
121 | 
122 |     @classmethod
123 |     def draw_all_cloud_plots(cls):
124 |         '''draw all cloud plots'''
125 |         for cplot in gs.cloud_plots:
126 |             cplot.fig.canvas.draw()
127 | 
128 |     @classmethod
129 |     def makeGenVisible(cls, gen, visNow, mode, *, skip_fitness_plot=False):
130 |         if visNow:
131 |             for cplot in gs.cloud_plots:
132 |                 cplot.show_new_labels_gen(gen)
133 |             if cls.numVisibleGenNumber() > 0:
134 |                 cls.applyVisibleRange(mode, gen)
135 |             cls.plotOffSprings(gen)
136 |             if not skip_fitness_plot:
137 |                 gs.fitness_plot.setVal(gen)
138 |         else:
139 |             cls.hideOffSprings(gen)
140 |             if not skip_fitness_plot:
141 |                 gs.fitness_plot.fig.canvas.draw()
142 | 
143 |         cls.draw_all_cloud_plots()
144 | 
145 |     @classmethod
146 |     def print_error(cls, err):
147 |         for cplot in gs.cloud_plots:
148 |             cplot.text_area.show(err)
149 |         cls.draw_all_cloud_plots()
150 | 
151 |     @classmethod
152 |     def clear_labels(cls):
153 |         for cplot in gs.cloud_plots:
154 |             cplot.clear_labels()
155 |         gs.fitness_plot.reset()
156 | 
157 |     @classmethod
158 |     def set_home(cls):
159 |         for cplot in gs.cloud_plots:
160 |             cplot.reset_xy_lim()
161 |         cls.draw_all_cloud_plots()
162 | 
163 |     @classmethod
164 |     def movie(cls, event):
165 |         print("you clicked movie. will be showing movie in another figure")
166 |         movie_start = cls.minPossibleGenNumber
167 |         if cls.numVisibleGenNumber() > 0:
168 |             movie_start = cls.minVisibleGenNumber()
169 |         movie_end = cls.maxPossibleGenNumber
170 |         print(movie_start, movie_end)
171 |         cplot = gs.canvas2cloud_plot[event.canvas]
172 |         cplot.play_movie(movie_start, movie_end)
173 | 
174 |     @classmethod
175 |     def handle_close(cls, event):
176 |         print("figure closed")
177 |         if event.canvas == gs.fitness_plot.fig.canvas:
178 |             print("close fitness plot")
179 |             p.close('all')
180 |         else:
181 |             cplot = gs.canvas2cloud_plot[event.canvas]
182 |             print(cplot.title)
183 |             gs.cloud_plots.remove(cplot)
184 |             gs.canvas2cloud_plot.pop(event.canvas)
185 |             if len(gs.cloud_plots) == 0:
186 |                 p.close('all')
187 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_base/fitness_figures.py:
--------------------------------------------------------------------------------
  1 | """generation# v.s. fitness plot"""
  2 | import matplotlib.pyplot as plt
  3 | from matplotlib.widgets import Slider
  4 | import numpy as np
  5 | from figure_base.figure_control import FigureControl
  6 | import figure_base.settings as gs
  7 | from figure_base.mouse_event import FitnessPlotClick, MouseMove
  8 | from figure_base.load_data import loadParentData
  9 | 
 10 | class FitnessPlot():
 11 |     """generation# v.s. fitness plot"""
 12 |     def __init__(self, title, start_iter, end_iter, snapshots_path):
 13 |         x, y = [], []
 14 |         for iteration in range(start_iter, end_iter+1):
 15 |             parent, _, _ = loadParentData(snapshots_path, iteration)
 16 |             x.append(iteration)
 17 |             y.append(parent[0].fitness)
 18 | 
 19 |         self.inc = 1
 20 |         self.fig = plt.figure(title)
 21 |         self.ax = self.fig.add_subplot(111)
 22 | 
 23 |         self.sliderax = self.fig.add_axes([0.125, 0.02, 0.775, 0.03],
 24 |                                           facecolor='yellow')
 25 | 
 26 |         self.slider = DiscreteSlider(self.sliderax, 'Gen', x[0], x[-1],
 27 |                                      increment=self.inc, valinit=-x[-1], valfmt='%0.0f')
 28 |         self.slider.on_changed(self.update)
 29 |         self.x = x
 30 |         self.y = y
 31 |         self.curve, = self.ax.plot(self.x, self.y, '--', picker=3)
 32 | 
 33 |         self.floating_annot = self.ax.annotate("", xy=(0, 0), xytext=(0, -40),
 34 |                                                textcoords="offset points",
 35 |                                                arrowprops=dict(arrowstyle="->"))
 36 |         self.floating_annot.set_visible(False)
 37 |         self.floating_annot.set_fontsize(18)
 38 |         self.floating_annot.set_color('b')
 39 | 
 40 |         self.ax.set_xlim(x[0], x[-1])
 41 |         maxy, miny = max(y), min(y)
 42 |         self.ax.set_ylim(miny - 0.05*abs(miny), maxy + 0.05 * abs(maxy))
 43 |         self.ax.set_ylabel("Fitness")
 44 |         self.ax.grid(True)
 45 |         self.dot, = self.ax.plot(-x[-1], -1, 'o', markersize=15, markerfacecolor="None",
 46 |                                  markeredgecolor='red', markeredgewidth=3)
 47 |         self.mapOfGenToArtist = {}
 48 |         self.fig.canvas.mpl_connect('pick_event', FitnessPlotClick.onpick)
 49 |         self.fig.canvas.mpl_connect("motion_notify_event", MouseMove.hover)
 50 |         self.fig.canvas.mpl_connect('close_event', FigureControl.handle_close)
 51 | 
 52 |     def update(self, value):
 53 |         """update the fitness plot"""
 54 |         if value < 0:
 55 |             self.dot.set_data([[value], [-1]])
 56 |             self.ax.set_title("")
 57 |         else:
 58 |             self.dot.set_data([[value], [self.y[value-self.x[0]]]])
 59 |             self.ax.set_title("Gen {}  Fitness {:.8f} ".format(value, self.y[value-self.x[0]]))
 60 | 
 61 |             vis_now = FigureControl.isVisible(value)
 62 |             if not vis_now:
 63 |                 FigureControl.makeGenVisible(value, True, "dist",
 64 |                                              skip_fitness_plot=True)
 65 | 
 66 |         self.fig.canvas.draw()
 67 | 
 68 |     def setVal(self, val):
 69 |         self.slider.set_val(val)
 70 | 
 71 |     def reset(self):
 72 |         """reset the slider"""
 73 |         self.slider.reset()
 74 | 
 75 |     def markVisible(self, gen, visible):
 76 |         """mark a generation visible"""
 77 |         if not gen in self.mapOfGenToArtist:
 78 |             this_marker = gs.MARKERS[gen%gs.numMarkers]
 79 |             this_color = gs.COLOR_HEX_LISTS[gen%gs.numColors]
 80 |             pt, = self.ax.plot(gen, self.y[gen-self.x[0]],
 81 |                                this_marker,
 82 |                                color=this_color[-1],
 83 |                                markersize=10)
 84 | 
 85 |             numdigits = int(np.log10(gen)) + 1 if gen > 0 else 1
 86 |             annot = self.ax.annotate(gen, xy=(gen, self.y[gen-self.x[0]]),
 87 |                                      xytext=(-5.5*numdigits, 40), textcoords="offset points",
 88 |                                      arrowprops=dict(arrowstyle="->"), fontsize=18)
 89 | 
 90 |             self.mapOfGenToArtist[gen] = (pt, annot)
 91 | 
 92 |         self.mapOfGenToArtist[gen][0].set_visible(visible)
 93 |         self.mapOfGenToArtist[gen][1].set_visible(visible)
 94 | 
 95 | class DiscreteSlider(Slider):
 96 |     """This class is slightly adapted from the following Subscriber Content from the Stack Exchange Network
 97 |        https://stackoverflow.com/questions/13656387
 98 | 
 99 |        The question was asked by J Knight (https://stackoverflow.com/users/1547090/j-knight).
100 |        The answer so used was answered by Joe Kington (https://stackoverflow.com/users/325565/joe-kington)
101 |        and edited by Ian Campbell (https://stackoverflow.com/users/1008353/ian-campbell)
102 | 
103 |        Stack Exchange Network Terms of Service can be found at
104 |        https://stackexchange.com/legal/terms-of-service
105 |     """
106 |     """A matplotlib slider widget with discrete steps."""
107 |     def __init__(self, *args, **kwargs):
108 |         """Identical to Slider.__init__, except for the "increment" kwarg.
109 |         "increment" specifies the step size that the slider will be discritized
110 |         to."""
111 |         self.inc = kwargs.pop('increment', 0.5)
112 |         Slider.__init__(self, *args, **kwargs)
113 |         self.valtext.set_text('')
114 | 
115 |     def set_val(self, val):
116 |         discrete_val = int(val / self.inc) * self.inc
117 |         # We can't just call Slider.set_val(self, discrete_val), because this
118 |         # will prevent the slider from updating properly (it will get stuck at
119 |         # the first step and not "slide"). Instead, we'll keep track of the
120 |         # the continuous value as self.val and pass in the discrete value to
121 |         # everything else.
122 |         xy = self.poly.xy
123 |         xy[2] = discrete_val, 1
124 |         xy[3] = discrete_val, 0
125 |         self.poly.xy = xy
126 |         if discrete_val >= 0:
127 |             self.valtext.set_text(self.valfmt % discrete_val)
128 |         else:
129 |             self.valtext.set_text('')
130 |         if self.drawon:
131 |             self.ax.figure.canvas.draw()
132 |         self.val = val
133 |         if not self.eventson:
134 |             return
135 |         for _, func in self.observers.items():
136 |             func(discrete_val)
137 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_base/load_data.py:
--------------------------------------------------------------------------------
 1 | """load data from file"""
 2 | import numpy as np
 3 | import figure_base.settings as gs
 4 | import pandas as pd
 5 | import os.path as osp
 6 | 
 7 | def color_index(fitness, minfit, maxfit):
 8 |     cind = (fitness - minfit)/(maxfit - minfit) * gs.numBins
 9 |     cind = int(cind)
10 |     if cind >= gs.numBins:
11 |         cind = gs.numBins-1
12 |     elif cind < 0:
13 |         cind = 0
14 | 
15 |     return cind
16 | 
17 | 
18 | class GenStat:
19 |     def __init__(self, artist, table, filename, op_data=None):
20 |         self.parentArtist = artist
21 |         self.osDataTable = table
22 |         self.filename = filename
23 |         self.parent_op_data = op_data
24 |         self.annotation = None # annotation that indicates the selected generation
25 | 
26 | class DataPoint:
27 |     def __init__(self, x, y, fitness, gen, parentOrNot, message, op_data=None):
28 |         self.x = x
29 |         self.y = y
30 |         self.fitness = fitness
31 |         self.gen = gen
32 |         self.parentOrNot = parentOrNot
33 |         self.message = message
34 |         self.child_op_data = op_data
35 | 
36 | def generateMessage(thisGenNumber, parentOrNot, x, y, fitness):
37 |     title_message = 'Gen {} '.format(thisGenNumber)
38 | 
39 |     if parentOrNot:
40 |         title_message = title_message + 'Parent '
41 |     else:
42 |         title_message = title_message + 'Offspring '
43 | 
44 |     title_message = title_message + 'x = {:.6f}  y = {:.6f} fitness (on record) = {:.8f} '.format(
45 |         x, y, fitness
46 |         )
47 | 
48 |     return title_message
49 | 
50 | def loadParentData(path, gen, bc_dim=2):
51 |     filename = '{}/snapshots/snapshot_gen_{:04d}/snapshot_parent_{:04d}.dat'.format(path, gen, gen)
52 |     newf = np.loadtxt(filename)
53 | 
54 |     x_pt = newf[0: bc_dim//2]
55 |     y_pt = newf[bc_dim//2 : bc_dim]
56 |     area_pt = newf[bc_dim]
57 |     op_data = newf[bc_dim+1:]
58 |     f_pt = '{}/snapshots/snapshot_gen_{:04d}/snapshot_parent_{:04d}.h5'.format(path, gen, gen)
59 |     if not osp.exists(f_pt):
60 |         f_pt = None
61 |     message = generateMessage(gen, True, x_pt[-1], y_pt[-1], area_pt)
62 |     return [DataPoint(x_pt, y_pt, area_pt, gen, True, message)], op_data, f_pt
63 | 
64 | def loadOffspringData(path, gen, pfit, bc_dim=2):
65 |     filename = '{}/snapshots/snapshot_gen_{:04d}/snapshot_offspring_{:04d}.dat'.format(path, gen, gen)
66 |     newf = pd.read_csv(filename, sep=' ', header=None).values
67 | 
68 |     if gen not in gs.gen2sorted_indices:
69 |         gs.gen2sorted_indices[gen] = newf[:, bc_dim].argsort()
70 | 
71 |     newf = newf[gs.gen2sorted_indices[gen]]
72 |     area = newf[:, bc_dim]
73 | 
74 |     maxfit = max(pfit, area[-1])
75 |     minfit = min(pfit, area[0])
76 | 
77 |     v = np.linspace(minfit, maxfit, num=gs.numBins+1)
78 |     ind = (np.searchsorted(area, v[1:gs.numBins], side='right'))
79 |     assert len(ind) == gs.numBins - 1
80 | 
81 |     ind_bins = []
82 |     ind_bins.append(range(0, ind[0]))
83 |     for i in range(0, len(ind)-1):
84 |         ind_bins.append(range(ind[i], ind[i+1]))
85 | 
86 |     left, right = ind[-1], len(area)
87 | 
88 |     if right - left <= 10:
89 |         ind_bins.append(range(left, right))
90 |         assert len(ind_bins) == gs.numBins
91 |     else:
92 |         ind_bins.append(range(left, right-10))
93 |         ind_bins.append(range(right-10, right))
94 |         assert len(ind_bins) == gs.numBins+1
95 | 
96 |     return newf, ind_bins, maxfit, minfit
97 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_base/mouse_event.py:
--------------------------------------------------------------------------------
  1 | """mouse event"""
  2 | import time
  3 | from figure_base.figure_control import FigureControl
  4 | import figure_base.settings as gs
  5 | 
  6 | 
  7 | class FitnessPlotClick():
  8 |     """mouse pick event on fitness plot"""
  9 |     @classmethod
 10 |     def onpick(cls, event):
 11 |         """mouse pick event on fitness plot"""
 12 |         event_len = len(event.ind)
 13 |         if not event_len:
 14 |             return True
 15 |         value = event.ind[-1] + FigureControl.minPossibleGenNumber
 16 |         vis_now = FigureControl.isVisible(value)
 17 |         FigureControl.makeGenVisible(value, not vis_now, "dist")
 18 | 
 19 | class PointClick():
 20 |     """mouse pick event on cloud plot"""
 21 |     last_click_time = None
 22 | 
 23 |     @classmethod
 24 |     def rate_limiting(cls):
 25 |         """limit the rate of clicking"""
 26 |         this_click_time = time.time()
 27 |         time_to_last_click = None
 28 |         if cls.last_click_time:
 29 |             time_to_last_click = this_click_time - cls.last_click_time
 30 |         cls.last_click_time = this_click_time
 31 |         return time_to_last_click and time_to_last_click < 0.7
 32 | 
 33 |     @classmethod
 34 |     def button_1(cls, cloud_plot, artist, ind):
 35 |         """click with button 1, i.e., left button"""
 36 |         is_parent = cloud_plot.is_parent_artist(artist, ind)
 37 |         gen = cloud_plot.artist2gen[artist]
 38 |         if is_parent:
 39 |             vis_now = FigureControl.isVisible(gen)
 40 |             FigureControl.makeGenVisible(gen, not vis_now, "dist")
 41 |         else:
 42 |             row_idx = cloud_plot.artist2data[artist][ind]
 43 |             for cpl in gs.cloud_plots:
 44 |                 this_data = cpl.fetch_child_data_point(gen, row_idx)
 45 |                 cpl.show_new_labels_dp(this_data)
 46 |             FigureControl.draw_all_cloud_plots()
 47 |         cloud_plot.button_1(artist, ind)
 48 | 
 49 |     @classmethod
 50 |     def button_3(cls, cloud_plot, artist, ind):
 51 |         """click with button 3, i.e., right button"""
 52 |         is_parent = cloud_plot.is_parent_artist(artist, ind)
 53 |         gen = cloud_plot.artist2gen[artist]
 54 | 
 55 |         for cpl in gs.cloud_plots:
 56 |             if is_parent:
 57 |                 cpl.show_new_labels_gen(gen)
 58 |             else:
 59 |                 row_idx = cloud_plot.artist2data[artist][ind]
 60 |                 this_data = cpl.fetch_child_data_point(gen, row_idx)
 61 |                 cpl.show_new_labels_dp(this_data)
 62 |         FigureControl.draw_all_cloud_plots()
 63 |         cloud_plot.button_3(artist, ind)
 64 | 
 65 |     @classmethod
 66 |     def onpick(cls, event):
 67 |         """mouse pick event on cloud plot"""
 68 |         if cls.rate_limiting():
 69 |             return True
 70 | 
 71 |         if len(event.ind) != 1:
 72 |             print("Two or more points are too close! Please zoom in.")
 73 |             print("Showing the one with higher fitness score")
 74 | 
 75 |         cloud_plot = gs.canvas2cloud_plot[event.canvas]
 76 |         artist = event.artist
 77 |         ind = event.ind[-1]
 78 |         button = event.mouseevent.button
 79 | 
 80 |         if button == 1:
 81 |             cls.button_1(cloud_plot, artist, ind)
 82 |         elif button == 3:
 83 |             cls.button_3(cloud_plot, artist, ind)
 84 | 
 85 | class MouseMove():
 86 |     """mouse move event on plots"""
 87 |     @classmethod
 88 |     def update_annot(cls, ind):
 89 |         """update the parent floating annotations"""
 90 |         gen = ind + FigureControl.minPossibleGenNumber
 91 |         for cplot in gs.cloud_plots:
 92 |             fitness = cplot.update_annot(gen)
 93 | 
 94 |         text = "{}".format(gen)
 95 |         gs.fitness_plot.floating_annot.xy = (gen, fitness)
 96 |         gs.fitness_plot.floating_annot.set_text(text)
 97 | 
 98 |     @classmethod
 99 |     def update_plot(cls, vis):
100 |         """update the plots"""
101 |         for cplot in gs.cloud_plots:
102 |             cplot.annot.set_visible(vis)
103 |         gs.fitness_plot.floating_annot.set_visible(vis)
104 |         FigureControl.draw_all_cloud_plots()
105 |         gs.fitness_plot.fig.canvas.draw_idle()
106 | 
107 |     @classmethod
108 |     def update(cls, event, curve, preferred_idx):
109 |         """update the plots and/or annotations"""
110 |         cont, ind = curve.contains(event)
111 |         if cont:
112 |             idx = ind['ind'][preferred_idx]
113 |             cls.update_annot(idx)
114 |             cls.update_plot(True)
115 |         elif gs.fitness_plot.floating_annot.get_visible():
116 |             cls.update_plot(False)
117 | 
118 |     @classmethod
119 |     def hover(cls, event):
120 |         """mouse move event on plots"""
121 |         if event.canvas == gs.fitness_plot.fig.canvas:
122 |             if event.inaxes == gs.fitness_plot.ax:
123 |                 cls.update(event, gs.fitness_plot.curve, -1)
124 |         else:
125 |             cplot = gs.canvas2cloud_plot[event.canvas]
126 |             if event.inaxes == cplot.main_ax:
127 |                 cls.update(event, cplot.main_curve, 0)
128 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_base/rollout_base.py:
--------------------------------------------------------------------------------
 1 | """Rollout Base"""
 2 | import tensorflow as tf
 3 | import figure_base.settings as gs
 4 | import numpy as np
 5 | import sys
 6 | sys.path.append("..")
 7 | from es_distributed.es import SharedNoiseTable
 8 | from gym import wrappers
 9 | 
10 | 
11 | class RolloutBase():
12 |     @classmethod
13 |     def make_env(cls, *args, **kwargs):
14 |         raise NotImplementedError
15 | 
16 |     @classmethod
17 |     def setup_policy(cls, *args, **kwargs):
18 |         raise NotImplementedError
19 | 
20 |     @classmethod
21 |     def print_info(cls, *args, **kwargs):
22 |         pass
23 | 
24 |     @classmethod
25 |     def post_process(cls, *args, **kwargs):
26 |         return True
27 | 
28 |     @classmethod
29 |     def setup_and_rollout_policy(cls, policy_file, thisData, *,
30 |                                  noise_stdev=0, num_rollouts=1, fixed_seed=None,
31 |                                  render=False, path=None, record=None, bc_choice=None):
32 |         if gs.noise is None:
33 |             gs.noise = SharedNoiseTable()
34 | 
35 |         env = cls.make_env()
36 |         env.reset()
37 |         tf.reset_default_graph()
38 |         if path and record:
39 |             env = wrappers.Monitor(env, path + record, force=True)
40 | 
41 | 
42 |         result = []
43 | 
44 |         with tf.Session():
45 |             pi = cls.setup_policy(policy_file, thisData, noise_stdev, path)
46 |             for _ in range(0, num_rollouts):
47 |                 if fixed_seed:
48 |                     seed = fixed_seed
49 |                 else:
50 |                     seed = np.random.randint(2**31-1)
51 | 
52 |                 if bc_choice:
53 |                     rews, t, novelty_vec = pi.rollout(env, render=render,
54 |                                                       random_stream=np.random.RandomState(), policy_seed=seed, bc_choice=bc_choice)
55 |                 else:
56 |                     rews, t, novelty_vec = pi.rollout(env, render=render,
57 |                                                       random_stream=np.random.RandomState(), policy_seed=seed)
58 |                 cls.print_info(seed, rews, t, novelty_vec)
59 |                 result.append((seed, rews, t, novelty_vec))
60 |                 env.close()
61 |         return cls.post_process(env, result)
62 | 
63 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_base/settings.py:
--------------------------------------------------------------------------------
 1 | """global data structure"""
 2 | from colour import Color
 3 | 
 4 | 
 5 | gen2sorted_indices = {} # Global sorted index for sorting BCs
 6 | fitness_plot = None
 7 | cloud_plots = set()
 8 | canvas2cloud_plot = {} # Figure canvas to figure object
 9 | 
10 | noise = None # Global Noise Table
11 | numBins = 5 # Number of Color Bins for ColorBar
12 | assert numBins > 1
13 | 
14 | COLORS = [
15 |     (Color('#f9d9d9'), Color('#d61515')), # red
16 |     (Color('#d9ddfb'), Color('#0b1667')), # blue
17 |     (Color('#9aecb8'), Color('#045c24')), # green
18 |     (Color('#ffbef9'), Color('#ce00bb')), # pink
19 |     (Color('#d0d0d0'), Color('#000000')), # black
20 |     (Color('#f2d6b9'), Color('#996633')), # brown
21 |     (Color('#d5b2ec'), Color('#9900FF')), # purple
22 |     (Color('#baffff'), Color('#009999')), # teel
23 |     (Color('#ffb27e'), Color('#fb6500')), # orange
24 |     (Color('#beffcf'), Color('#33FF66')), # lime green
25 | ]
26 | 
27 | COLOR_HEX_LISTS = []
28 | for color in COLORS:
29 |     color_gradient = color[0].range_to(color[1], numBins)
30 |     hex_list = [c.get_hex_l() for c in color_gradient]
31 |     COLOR_HEX_LISTS.append(hex_list)
32 | 
33 | numColors = len(COLOR_HEX_LISTS)
34 | 
35 | MARKERS = [
36 |     'D', 'o', 'v', 's', '^', '<',
37 |     '>', '*', 'h', 'H', 'd', 'X'
38 | ]
39 | numMarkers = len(MARKERS)
40 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_custom/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgvbrt/retro_contest/589508e3c8779acdabf93fed055dbca566df54c5/deepneuroevolution/visual_inspector/figure_custom/__init__.py


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_custom/cloud_figures_custom.py:
--------------------------------------------------------------------------------
 1 | """Customerized Cloud Figures"""
 2 | from figure_base.cloud_figures import CloudPlot
 3 | 
 4 | 
 5 | class CloudPlotHDBC(CloudPlot):
 6 |     """Cloud plot to show trajectory as Hi Dim BCs"""
 7 |     def __init__(self, *args, **kwargs):
 8 |         CloudPlot.__init__(self, *args, **kwargs)
 9 |         self.hd_bc, = self.main_ax.plot([], [], color='k', linewidth=3)
10 | 
11 |     def show_new_labels_dp(self, thisData):
12 |         CloudPlot.show_new_labels_dp(self, thisData)
13 |         self.hd_bc.set_data(thisData.x, thisData.y)
14 | 
15 |     def clear_labels(self):
16 |         CloudPlot.clear_labels(self)
17 |         self.hd_bc.set_data([], [])
18 | 
19 | class CloudPlotRollout(CloudPlot):
20 |     """Cloud plot with policy rollout"""
21 |     def __init__(self, *args, **kwargs):
22 |         CloudPlot.__init__(self, *args, **kwargs)
23 |         self.traj_plots = []
24 | 
25 |     def button_3(self, artist, ind):
26 |         from figure_custom.rollout_trajectory import rolloutMaker
27 |         print("rolling out!!")
28 |         gen = self.artist2gen[artist]
29 |         this_data = self.fetch_data_point(artist, ind)
30 |         if self.get_policy_file(gen) != None:
31 |             self.traj_plots.append(rolloutMaker(gen, this_data, self))
32 | 
33 | class CloudPlotRolloutAtari(CloudPlot):
34 |     """Cloud plot with policy rollout"""
35 | 
36 |     def button_3(self, artist, ind):
37 |         from figure_custom.rollout_custom import RolloutAtari
38 |         print("rolling out!!")
39 |         gen = self.artist2gen[artist]
40 |         print(gen)
41 |         this_data = self.fetch_data_point(artist, ind)
42 |         policy_file = self.get_policy_file(gen)
43 |         if policy_file is None:
44 |             return
45 |         noise_stdev = self.get_parent_op_data(gen)[-1]
46 | 
47 |         if this_data.parentOrNot:
48 |             seed = int(self.get_parent_op_data(gen)[-2])
49 |             print(self.get_parent_op_data(gen))
50 |         else:
51 |             seed = int(this_data.child_op_data[-2])
52 |             print(this_data.child_op_data)
53 | 
54 |         x, y, f = this_data.x[-1], this_data.y[-1], this_data.fitness
55 |         record = "snapshots/snapshot_gen_{:04}/clips/x_{:.2f}_y_{:.2f}_f{:.2f}".format(
56 |                 this_data.gen, x, y, f)
57 |         RolloutAtari.setup_and_rollout_policy(policy_file, this_data,
58 |                                               noise_stdev=noise_stdev, fixed_seed=seed,
59 |                                               render=True, path=self.path, record=record)
60 | 
61 | 
62 |         import subprocess
63 |         subprocess.call(["open {}/*.mp4".format(self.path+record)], shell=True)
64 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_custom/rollout_custom.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import figure_base.settings as gs
 3 | from figure_base.rollout_base import RolloutBase
 4 | 
 5 | 
 6 | class RolloutAtari(RolloutBase):
 7 |     @classmethod
 8 |     def make_env(self):
 9 |         from es_distributed.atari_wrappers import wrap_deepmind
10 |         env_id = "FrostbiteNoFrameskip-v4"
11 |         env = gym.make(env_id)
12 |         env = wrap_deepmind(env)
13 |         return env
14 | 
15 |     @classmethod
16 |     def setup_policy(cls, policy_file, thisData, noise_stdev, path):
17 |         from es_distributed.policies import ESAtariPolicy
18 |         import pickle
19 | 
20 |         pi = ESAtariPolicy.Load(policy_file, extra_kwargs=None)
21 | 
22 |         iteration = thisData.gen
23 |         rb_file = path+"/snapshots/snapshot_gen_{:04}/snapshot_parent_{:04d}_rb.p".format(iteration, iteration)
24 |         rb_saved = pickle.load( open( rb_file, "rb" ) )
25 |         pi.set_ref_batch(rb_saved)
26 | 
27 |         if not thisData.parentOrNot:
28 |             noiseIdx, seed, noiseSign = thisData.child_op_data[-3:].astype(int)
29 |             print(noiseIdx, seed, noiseSign)
30 |             theta = pi.get_trainable_flat() + noiseSign * noise_stdev * gs.noise.get(noiseIdx, pi.num_params)
31 |             pi.set_trainable_flat(theta)
32 | 
33 |         return pi
34 | 
35 |     @classmethod
36 |     def print_info(cls, seed, rews, t, novelty_vec):
37 |         print('return={:.4f} len={}'.format(rews.sum(), t))
38 | 
39 |     @classmethod
40 |     def post_process(cls, env, result):
41 |         ram = env.unwrapped._get_ram()
42 |         print(ram)
43 |         return True
44 | 
45 | class RolloutMujoco(RolloutBase):
46 |     @classmethod
47 |     def make_env(cls):
48 |         env = gym.make('Humanoid-v1')
49 |         return env
50 | 
51 |     @classmethod
52 |     def setup_policy(cls, policy_file, thisData, noise_stdev, path):
53 |         from es_distributed.policies import MujocoPolicy
54 |         pi = MujocoPolicy.Load(policy_file, extra_kwargs=None)
55 |         if not thisData.parentOrNot:
56 |             noiseIdx, noiseSign = thisData.child_op_data[1:3].astype(int)
57 |             theta = pi.get_trainable_flat() + noiseSign * noise_stdev * gs.noise.get(noiseIdx, pi.num_params)
58 |             pi.set_trainable_flat(theta)
59 |         return pi
60 | 
61 |     @classmethod
62 |     def get_x_y_death_from_humanoid_bc(cls, bc):
63 |         idx_last_x, idx_last_y = int(len(bc) / 2 - 1), -1
64 |         x_coord, y_coord = bc[idx_last_x], bc[idx_last_y]
65 |         return x_coord, y_coord
66 | 
67 |     @classmethod
68 |     def print_info(cls, seed, rews, t, novelty_vec):
69 |         x_coord, y_coord = cls.get_x_y_death_from_humanoid_bc(novelty_vec)
70 |         print('seed={} x = {:.6f}  y = {:.6f} reward={:.8f} len={}'.format(
71 |               seed, x_coord, y_coord, rews.sum(), t)
72 |              )
73 | 
74 | 
75 |     @classmethod
76 |     def post_process(cls, env, result):
77 |         xs, ys, ts, scores, seeds = [], [], [], [], []
78 |         for r in result:
79 |             seed, rews, _, novelty_vec = r
80 |             x_coord, y_coord = cls.get_x_y_death_from_humanoid_bc(novelty_vec)
81 |             xs.append(x_coord)
82 |             ys.append(y_coord)
83 |             ts.append(novelty_vec)
84 |             scores.append(rews.sum())
85 |             seeds.append(seed)
86 |         return xs, ys, ts, scores, seeds
87 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/figure_custom/rollout_trajectory.py:
--------------------------------------------------------------------------------
  1 | """rollout and obtain the trajectories"""
  2 | import time
  3 | import matplotlib.pyplot as p
  4 | from figure_base.figure_control import FigureControl
  5 | import figure_base.settings as gs
  6 | import numpy as np
  7 | from figure_custom.rollout_custom import RolloutMujoco
  8 | 
  9 | def extract_traj(traj):
 10 |     """extract the trajectory"""
 11 |     length = int(len(traj) / 2)
 12 |     tx, ty = traj[0:length], traj[length:]
 13 |     tx = np.insert(tx, 0, 0.)
 14 |     ty = np.insert(ty, 0, 0.)
 15 |     return tx, ty
 16 | 
 17 | 
 18 | class rolloutMaker():
 19 |     def __init__(self, gen, thisData, cloud_plot):
 20 |         self.fig = p.figure()
 21 |         self.thisData = thisData
 22 |         self.ax_list, self.artist_list = [], []
 23 |         self.artist_fixed = None
 24 |         self.selected_rollout = None
 25 |         self.policy_file = cloud_plot.get_policy_file(gen)
 26 |         self.noise_stdev, = cloud_plot.get_parent_op_data(gen)
 27 |         print(self.noise_stdev)
 28 | 
 29 |         self.fixed_seed = None
 30 |         if not thisData.parentOrNot and len(thisData.child_op_data) > 0:
 31 |             self.fixed_seed = thisData.child_op_data[0]
 32 | 
 33 |         self.fxs, self.fys, self.fts, self.fscores, self.fseeds = None, None, None, None, None
 34 |         if self.fixed_seed:
 35 |             self.fxs, self.fys, self.fts, self.fscores, self.fseeds = RolloutMujoco.setup_and_rollout_policy(self.policy_file, thisData, noise_stdev=self.noise_stdev,
 36 |                                             fixed_seed=int(self.fixed_seed), bc_choice="traj")
 37 | 
 38 |         self.xs, self.ys, self.ts, self.scores, self.seeds = None, None, None, None, None
 39 |         if self.fixed_seed is None or FigureControl.offspring_stochastic:
 40 |             self.xs, self.ys, self.ts, self.scores, self.seeds = RolloutMujoco.setup_and_rollout_policy(self.policy_file, thisData, noise_stdev=self.noise_stdev,
 41 |                                             num_rollouts=9, bc_choice="traj")
 42 | 
 43 |         self.ax1 = p.subplot2grid((3, 6), (0, 0), rowspan=3, colspan=3)
 44 |         self.ax1.plot(0, 0, 'ro', markersize=12, label="Origin")
 45 |         #self.ax1.plot(thisData.x[-1], thisData.y[-1], 'bo', markersize=12, label="Final (Fixed Seed)")
 46 |         self.ax1.grid(True)
 47 | 
 48 |         if self.fxs:
 49 |             self.artist_fixed, = self.ax1.plot(self.fxs, self.fys, 'bo', markersize=12, picker=5, label="Final (Fixed Seed)")
 50 |             traj = self.fts[0]
 51 |             tx, ty = extract_traj(traj)
 52 |             self.ax1.plot(tx, ty, 'b--')
 53 | 
 54 |         if self.xs:
 55 |             for idx, traj in enumerate(self.ts):
 56 |                 tx, ty = extract_traj(traj)
 57 |                 label_words = "Final (Random Seed)" if idx == 0 else None
 58 |                 pt, = self.ax1.plot(self.xs[idx], self.ys[idx], 'C1X', markersize=12, picker=5, label=label_words)
 59 |                 self.artist_list.append(pt)
 60 |                 annot=self.ax1.annotate(idx+1, xy=(self.xs[idx], self.ys[idx]), xytext=(5,5),textcoords="offset points")
 61 |                 annot.set_fontsize(16)
 62 |                 annot.set_color('r')
 63 |                 self.ax1.plot(tx, ty, 'C{}'.format(idx%10))
 64 | 
 65 |                 ax2 = p.subplot2grid((3, 6), (int(idx/3), idx%3+3))
 66 |                 ax2.plot(0, 0, 'ro', markersize=10)
 67 |                 ax2.plot(self.fxs, self.fys, 'bo', markersize=10)
 68 |                 ax2.plot(tx[-1], ty[-1], 'C1X', markersize=10)
 69 |                 ax2.plot(tx, ty, 'C{}'.format(idx%10))
 70 | 
 71 |                 left, right = ax2.get_xlim()
 72 |                 bottom, top = ax2.get_ylim()
 73 |                 ax2.text(0.5*(left+right), 0.5*(bottom+top), '{}'.format(idx+1),
 74 |                          horizontalalignment='center',
 75 |                          verticalalignment='center',
 76 |                          fontsize=32, color='red', alpha=0.5)
 77 | 
 78 |                 ax2.grid(True)
 79 |                 self.ax_list.append(ax2)
 80 |         self.ax1.legend()
 81 |         #self.ax1.set_xlim(cloud_plot.xlim)
 82 |         #self.ax1.set_ylim(cloud_plot.ylim)
 83 |         self.fig.canvas.mpl_connect('button_press_event', self.on_press)
 84 |         self.fig.canvas.mpl_connect('pick_event', self.on_pick)
 85 | 
 86 |         self.fig.show()
 87 | 
 88 |     def on_pick(self, event):
 89 |         thisevent = event.mouseevent
 90 |         thisArtist = event.artist
 91 |         self.reset()
 92 |         if self.artist_fixed and thisArtist == self.artist_fixed:
 93 |             print("you pick the fixed seed")
 94 |             self.fig.suptitle("x:{:.6f}  y:{:.6f}  fitness:{:.8f}".format(self.fxs[0],
 95 |                               self.fys[0], self.fscores[0]))
 96 |             self.artist_fixed.set_markersize(18)
 97 |         else:
 98 |             for i, art_sub in enumerate(self.artist_list):
 99 |                 if thisArtist == art_sub:
100 |                     self.select(i)
101 |                     break
102 |         self.fig.canvas.draw()
103 |         if thisevent.button == 3:
104 |             if self.selected_rollout != None:
105 |                 RolloutMujoco.setup_and_rollout_policy(self.policy_file, self.thisData,
106 |                                                        noise_stdev=self.noise_stdev,
107 |                                                        fixed_seed=self.seeds[self.selected_rollout], render=True)
108 |             else:
109 |                 RolloutMujoco.setup_and_rollout_policy(self.policy_file, self.thisData,
110 |                                                        noise_stdev=self.noise_stdev,
111 |                                                        fixed_seed=int(self.fixed_seed), render=True)
112 | 
113 |     def reset(self):
114 |         if self.artist_fixed:
115 |             self.artist_fixed.set_markersize(12)
116 |         self.fig.suptitle("")
117 |         if self.selected_rollout != None:
118 |             rIdx = self.selected_rollout
119 |             self.artist_list[rIdx].set_markersize(12)
120 |             self.ax_list[rIdx].set_facecolor('1')
121 |             self.selected_rollout = None
122 | 
123 |     def select(self, rIdx):
124 |         self.fig.suptitle("#{}  x:{:.6f}  y:{:.6f}  fitness:{:.8f}".format(rIdx+1, self.xs[rIdx],  self.ys[rIdx], self.scores[rIdx]))
125 |         self.artist_list[rIdx].set_markersize(18)
126 |         self.ax_list[rIdx].set_facecolor('0.9')
127 |         self.selected_rollout = rIdx
128 | 
129 |     def on_press(self, event):
130 |         print('you pressed', event.button, event.xdata, event.ydata)
131 |         ax_on_press = event.inaxes
132 |         if ax_on_press == self.ax1:
133 |             return
134 | 
135 |         self.reset()
136 | 
137 |         if ax_on_press:
138 |             for i, ax_sub in enumerate(self.ax_list):
139 |                 if ax_on_press == ax_sub:
140 |                     self.select(i)
141 |                     break
142 |         self.fig.canvas.draw()
143 | 
144 |         #print(event.button, self.selected_rollout)
145 |         if event.button == 3 and self.selected_rollout != None:
146 |             RolloutMujoco.setup_and_rollout_policy(self.policy_file, self.thisData,
147 |                                                    noise_stdev=self.noise_stdev,
148 |                                                    fixed_seed=self.seeds[self.selected_rollout], render=True)
149 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/main_atari.py:
--------------------------------------------------------------------------------
 1 | """interactive analysis"""
 2 | import click
 3 | import matplotlib.pyplot as p
 4 | import figure_base.settings as gs
 5 | from figure_base.figure_control import FigureControl
 6 | from figure_base.fitness_figures import FitnessPlot
 7 | from figure_custom.cloud_figures_custom import CloudPlotHDBC, CloudPlotRolloutAtari
 8 | 
 9 | 
10 | @click.command()
11 | @click.argument('start_iter', nargs=1)
12 | @click.argument('end_iter', nargs=1)
13 | @click.argument('snapshots_path', nargs=-1)
14 | @click.option('--visible_range', help='Up to how many generations visible on one plot.')
15 | @click.option('--hi_dim_bc', type=(str, int), default=(None, None),
16 |               help='Path to high-dimensional (> 2-D) BC and its dimension')
17 | def main(start_iter, end_iter, snapshots_path, visible_range, hi_dim_bc):
18 |     """
19 |     START_ITER: Plot data that begins at this iteration (generation)\n
20 |     END_ITER: Plot data that ends at this iteration (generation)\n
21 |     SNAPSHOTS_PATH: Path(s) to One or multiple 2-D BCs
22 |     """
23 |     start_iter = int(start_iter)
24 |     end_iter = int(end_iter)
25 | 
26 |     FigureControl.init(start_iter, end_iter, visible_range)
27 | 
28 |     for idx, path in enumerate(snapshots_path):
29 |         print("Generating Cloud Plot {} from {}".format(idx, path))
30 |         cplot = CloudPlotRolloutAtari("Cloud Plot {} ({})".format(idx, path),
31 |                                   start_iter, end_iter, path, visible_range)
32 |         gs.cloud_plots.add(cplot)
33 |         gs.canvas2cloud_plot[cplot.fig.canvas] = cplot
34 | 
35 |     gs.fitness_plot = FitnessPlot("Fitness Plot", start_iter, end_iter, snapshots_path[0])
36 | 
37 | 
38 |     hbc_path, hbc_dim = hi_dim_bc
39 |     if hbc_path != None and hbc_dim != None:
40 |         print("Generating Cloud Plot H-D from {}".format(hbc_path))
41 |         hbcplot = CloudPlotHDBC("Cloud Plot {}-D BC ({})".format(hbc_dim, hbc_path),
42 |                                  start_iter, end_iter, hbc_path, visible_range, hbc_dim)
43 |         gs.cloud_plots.add(hbcplot)
44 |         gs.canvas2cloud_plot[hbcplot.fig.canvas] = hbcplot
45 | 
46 |     p.show()
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/main_mujoco.py:
--------------------------------------------------------------------------------
 1 | """interactive analysis"""
 2 | import click
 3 | import matplotlib.pyplot as p
 4 | import figure_base.settings as gs
 5 | from figure_base.figure_control import FigureControl
 6 | from figure_base.fitness_figures import FitnessPlot
 7 | from figure_custom.cloud_figures_custom import CloudPlotHDBC, CloudPlotRollout
 8 | 
 9 | 
10 | @click.command()
11 | @click.argument('start_iter', nargs=1)
12 | @click.argument('end_iter', nargs=1)
13 | @click.argument('snapshots_path', nargs=-1)
14 | @click.option('--visible_range', help='Up to how many generations visible on one plot.')
15 | @click.option('--hi_dim_bc', type=(str, int), default=(None, None),
16 |               help='Path to high-dimensional (> 2-D) BC and its dimension')
17 | def main(start_iter, end_iter, snapshots_path, visible_range, hi_dim_bc):
18 |     """
19 |     START_ITER: Plot data that begins at this iteration (generation)\n
20 |     END_ITER: Plot data that ends at this iteration (generation)\n
21 |     SNAPSHOTS_PATH: Path(s) to One or multiple 2-D BCs
22 |     """
23 |     start_iter = int(start_iter)
24 |     end_iter = int(end_iter)
25 | 
26 |     FigureControl.init(start_iter, end_iter, visible_range)
27 | 
28 |     for idx, path in enumerate(snapshots_path):
29 |         print("Generating Cloud Plot {} from {}".format(idx, path))
30 |         cplot = CloudPlotRollout("Cloud Plot {} ({})".format(idx, path),
31 |                                   start_iter, end_iter, path, visible_range)
32 |         gs.cloud_plots.add(cplot)
33 |         gs.canvas2cloud_plot[cplot.fig.canvas] = cplot
34 | 
35 |     gs.fitness_plot = FitnessPlot("Fitness Plot", start_iter, end_iter, snapshots_path[0])
36 | 
37 | 
38 |     hbc_path, hbc_dim = hi_dim_bc
39 |     if hbc_path != None and hbc_dim != None:
40 |         print("Generating Cloud Plot H-D from {}".format(hbc_path))
41 |         hbcplot = CloudPlotHDBC("Cloud Plot {}-D BC ({})".format(hbc_dim, hbc_path),
42 |                                  start_iter, end_iter, hbc_path, visible_range, hbc_dim)
43 |         gs.cloud_plots.add(hbcplot)
44 |         gs.canvas2cloud_plot[hbcplot.fig.canvas] = hbcplot
45 | 
46 |     p.show()
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/process_bc.py:
--------------------------------------------------------------------------------
 1 | """interactive analysis"""
 2 | import click
 3 | from dimen_red.assemble import assemble
 4 | from dimen_red.reduce import reduce_dim
 5 | from dimen_red.disassemble import disassemble
 6 | import numpy as np
 7 | 
 8 | @click.command()
 9 | @click.argument('start_iter', nargs=1)
10 | @click.argument('end_iter', nargs=1)
11 | @click.argument('snapshots_path', nargs=1)
12 | @click.argument('bc_dim', nargs=1)
13 | @click.option('--method', default='pca',
14 |               help='Methods of dimensionality reduction or downsampling.')
15 | @click.option('--downsampling_ratio', default=1.0,
16 |               help='Downsampling ratio (<1) when method=downsampling.')
17 | @click.option('--copy_files',
18 |               help='Files to copy over. Support Unix-style wildcards, separated in spaces')
19 | def main(start_iter, end_iter, snapshots_path, bc_dim, method, downsampling_ratio, copy_files):
20 |     """
21 |     Apply dimensionality reduction or downsampling to hi-dimensional data.
22 | 
23 |     START_ITER: Process data that begins at this iteration (generation)\n
24 |     END_ITER: Process data that ends at this iteration (generation)\n
25 |     SNAPSHOTS_PATH: Path to hi-dimensional BC
26 |     """
27 |     start_iter, end_iter, bc_dim = int(start_iter), int(end_iter), int(bc_dim)
28 | 
29 |     if method != 'downsampling':
30 |         downsampling_ratio = 1.0
31 | 
32 |     #step 1: Assemble hi-D BCs from all generations
33 |     X, p_opt, ch_opt, num_os_per_gen, labels = assemble(start_iter, end_iter, snapshots_path,
34 |                                                 bc_dim=bc_dim, ds_ratio=downsampling_ratio)
35 |     print('Assembling Completed! X.shape={} #OS_per_gen={}'.format(X.shape, num_os_per_gen))
36 |     c_labels = np.round(labels/100)
37 | 
38 |     print(c_labels, c_labels.shape)
39 |     X = X / 255.0
40 |     #step 2: Hi-D BCs to 2-D BCs if method != downsampling
41 |     X_r = reduce_dim(X, labels=np.ravel(c_labels), method=method)
42 | 
43 |     #step 3: Disassemble reduced BCs into each generation
44 |     search_patterns = None
45 |     if copy_files is not None:
46 |         search_patterns = copy_files.split(' ')
47 | 
48 |     disassemble(X_r, p_opt, ch_opt, method,
49 |                 start_iter=start_iter, end_iter=end_iter, path=snapshots_path,
50 |                 chunk=1+num_os_per_gen, copy_file_patterns=search_patterns)
51 | 
52 | if __name__ == '__main__':
53 |     main()
54 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0090/snapshot_parent_0090.dat:
--------------------------------------------------------------------------------
1 | 22.6325171264 -1.74513390999 6507.56591797 0.02
2 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0091/snapshot_parent_0091.dat:
--------------------------------------------------------------------------------
1 | 23.738028366 -2.08385136894 6592.18505859 0.02
2 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0092/snapshot_parent_0092.dat:
--------------------------------------------------------------------------------
1 | 23.9506591621 -2.33097182707 6609.68261719 0.02
2 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0093/snapshot_parent_0093.dat:
--------------------------------------------------------------------------------
1 | 24.5922858277 -2.54194614284 6658.01367188 0.02
2 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0094/snapshot_parent_0094.dat:
--------------------------------------------------------------------------------
1 | 25.7124246286 -2.49096325737 6749.34375 0.02
2 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0095/snapshot_parent_0095.dat:
--------------------------------------------------------------------------------
1 | 26.314792498 -2.17572508629 6798.12207031 0.02
2 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0096/snapshot_parent_0096.dat:
--------------------------------------------------------------------------------
1 | 26.5435120513 -1.91470031378 6814.58154297 0.02
2 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0097/snapshot_parent_0097.dat:
--------------------------------------------------------------------------------
1 | 27.1223119141 -1.91108718848 6860.91601562 0.02
2 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0098/snapshot_parent_0098.dat:
--------------------------------------------------------------------------------
1 | 27.8368530397 -1.66141369742 6920.83349609 0.02
2 | 


--------------------------------------------------------------------------------
/deepneuroevolution/visual_inspector/sample_data/mujoco/final_xy_bc/snapshots/snapshot_gen_0099/snapshot_parent_0099.dat:
--------------------------------------------------------------------------------
1 | 28.2299172297 -1.29749504234 6955.35058594 0.02
2 | 


--------------------------------------------------------------------------------