├── .circleci
    └── config.yml
├── .codecov.yml
├── .codespell.skip
├── .coveragerc
├── .dockerignore
├── .gitattributes
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── ci
    ├── build_venv.sh
    ├── code_checks.sh
    ├── local_tests.sh
    └── run_tests.sh
├── experiments
    ├── __init__.py
    ├── benchmark_vec_env.sh
    ├── build_and_run.sh
    ├── common.sh
    ├── modelfree
    │   ├── attack_transfer.sh
    │   ├── baselines.sh
    │   ├── common.sh
    │   ├── dec2018replication.sh
    │   ├── defenses.sh
    │   ├── density.sh
    │   ├── highest_win_rate.py
    │   ├── noisy_actions_and_obs.sh
    │   └── plot.sh
    ├── planning
    │   ├── __init__.py
    │   ├── common.py
    │   ├── mujoco_control-halfcheetah.ipynb
    │   ├── mujoco_control-hopper.ipynb
    │   ├── mujoco_control-ilqrpr.ipynb
    │   ├── mujoco_control.ipynb
    │   ├── mujoco_inverteddoublependulum.ipynb
    │   ├── mujoco_invertedpendulum.ipynb
    │   ├── mujoco_mc.ipynb
    │   ├── mujoco_reacher.ipynb
    │   └── mujoco_swimmer.ipynb
    ├── pull_public_s3.sh
    ├── pull_s3.sh
    ├── push_public_s3.sh
    ├── remote_build_and_run.sh
    └── run_docker.sh
├── pyproject.toml
├── requirements-build.txt
├── requirements-dev.txt
├── requirements.txt
├── scripts
    ├── aws
    │   ├── cloudwatch.sh
    │   └── termination.py
    ├── doubleblind.sh
    ├── grab_frame.py
    └── incomplete_experiments.py
├── setup.cfg
├── setup.py
├── src
    └── aprl
    │   ├── __init__.py
    │   ├── activations
    │       ├── __init__.py
    │       ├── density
    │       │   ├── __init__.py
    │       │   ├── fit_density.py
    │       │   ├── pipeline.py
    │       │   └── visualize.py
    │       ├── generate_activations.py
    │       └── tsne
    │       │   ├── __init__.py
    │       │   ├── fit_model.py
    │       │   ├── pipeline.py
    │       │   └── visualize.py
    │   ├── agents
    │       ├── __init__.py
    │       ├── monte_carlo.py
    │       └── mujoco_lqr.py
    │   ├── common
    │       ├── __init__.py
    │       ├── mujoco.py
    │       ├── multi_monitor.py
    │       └── utils.py
    │   ├── configs
    │       ├── .gitignore
    │       ├── __init__.py
    │       ├── multi
    │       │   ├── __init__.py
    │       │   ├── common.py
    │       │   ├── score.py
    │       │   └── train.py
    │       ├── noise
    │       │   ├── SumoHumans-cond.json
    │       │   ├── SumoHumans.json
    │       │   └── default.json
    │       ├── ray
    │       │   ├── aws.yaml
    │       │   └── baremetal.yaml
    │       └── rew
    │       │   ├── Humanoid.json
    │       │   ├── HumanoidStand.json
    │       │   ├── SumoHumans-cond.json
    │       │   └── default.json
    │   ├── envs
    │       ├── __init__.py
    │       ├── crowded_line.py
    │       ├── gym_compete.py
    │       ├── matrix_game.py
    │       ├── mujoco_costs.py
    │       ├── multi_agent.py
    │       ├── observation_masking.py
    │       ├── sumo_auto_contact.py
    │       └── wrappers.py
    │   ├── multi
    │       ├── __init__.py
    │       ├── common.py
    │       ├── common_worker.py
    │       ├── score.py
    │       ├── score_worker.py
    │       ├── train.py
    │       └── train_worker.py
    │   ├── policies
    │       ├── __init__.py
    │       ├── base.py
    │       ├── loader.py
    │       ├── transparent.py
    │       └── wrappers.py
    │   ├── score_agent.py
    │   ├── train.py
    │   ├── training
    │       ├── __init__.py
    │       ├── embedded_agents.py
    │       ├── gail_dataset.py
    │       ├── logger.py
    │       ├── lookback.py
    │       ├── scheduling.py
    │       └── shaping_wrappers.py
    │   └── visualize
    │       ├── __init__.py
    │       ├── annotated_gym_compete.py
    │       ├── benchmark_ffmpeg.sh
    │       ├── compress_videos.sh
    │       ├── generate_website_json.py
    │       ├── make_videos.py
    │       ├── noisy_observations.py
    │       ├── scores.py
    │       ├── styles.py
    │       ├── tb.py
    │       ├── training.py
    │       └── util.py
├── tests
    ├── SumoAnts_traj
    │   └── agent_0.npz
    ├── __init__.py
    ├── dummy_sumo_ants
    │   ├── old_ppo2
    │   │   ├── model.pkl
    │   │   ├── obs_rms.pkl
    │   │   └── ret_rms.pkl
    │   ├── ppo1
    │   │   ├── model.pkl
    │   │   ├── obs_rms.pkl
    │   │   └── ret_rms.pkl
    │   ├── ppo2
    │   │   ├── model.pkl
    │   │   ├── obs_rms.pkl
    │   │   └── ret_rms.pkl
    │   └── sac
    │   │   ├── model.pkl
    │   │   ├── obs_rms.pkl
    │   │   └── ret_rms.pkl
    ├── policies
    │   ├── __init__.py
    │   └── test_wrappers.py
    ├── test_agents.py
    ├── test_common.py
    ├── test_envs.py
    └── test_experiments.py
└── vendor
    ├── Xdummy
    └── Xdummy-entrypoint


/.circleci/config.yml:
--------------------------------------------------------------------------------
  1 | version: 2.1
  2 | 
  3 | orbs:
  4 |   codecov: codecov/codecov@1.0.5
  5 | 
  6 | executors:
  7 |   my-executor:
  8 |     docker:
  9 |       - image: humancompatibleai/adversarial_policies:base
 10 |         auth:
 11 |           username: $DOCKERHUB_USERNAME
 12 |           password: $DOCKERHUB_PASSWORD
 13 |     working_directory: /adversarial-policies
 14 |     environment:
 15 |       # If you change these, also change scripts/code_checks.sh
 16 |       SRC_FILES: src/ tests/ setup.py
 17 |       TYPECHECK_FILES: src/ tests/ setup.py
 18 | 
 19 | commands:
 20 |   dependencies:
 21 |     # You must still manually update the Docker image if any
 22 |     # binary (non-Python) dependencies change.
 23 |     description: "Check out and update Python dependencies."
 24 |     steps:
 25 |       - checkout
 26 |       # Download and cache dependencies
 27 |       - restore_cache:
 28 |           keys:
 29 |             - v2-dependencies-{{ checksum "requirements-build.txt" }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements-dev.txt" }}
 30 | 
 31 |       - run:
 32 |           name: install dependencies
 33 |           # MUJOCO_KEY is defined in a CircleCI context
 34 |           # Do some sanity checks to make sure key works
 35 |           command: "[[ -d /venv ]] || USE_MPI=True ./ci/build_venv.sh /venv"
 36 | 
 37 |       - save_cache:
 38 |           paths:
 39 |             - /venv
 40 |           key: v2-dependencies-{{ checksum "requirements-build.txt" }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements-dev.txt" }}
 41 | 
 42 |       - run:
 43 |           name: install adversarial-policies
 44 |           # Build a wheel then install to avoid copying whole directory (pip issue #2195)
 45 |           command: |
 46 |             python setup.py sdist bdist_wheel
 47 |             pip install --upgrade --force-reinstall dist/aprl-*.whl
 48 | 
 49 | jobs:
 50 |   lintandtype:
 51 |     executor: my-executor
 52 | 
 53 |     steps:
 54 |       - dependencies
 55 |       - run:
 56 |           name: flake8
 57 |           command: flake8 ${SRC_FILES}
 58 | 
 59 |       - run:
 60 |           name: black
 61 |           command: black --check ${SRC_FILES}
 62 | 
 63 |       - run:
 64 |           name: codespell
 65 |           command: codespell -I .codespell.skip --skip='*.pyc,*.pkl,*.npz' ${SRC_FILES}
 66 | 
 67 |       - run:
 68 |           name: pytype
 69 |           command: pytype ${TYPECHECK_FILES}
 70 | 
 71 |   unit-test:
 72 |     executor: my-executor
 73 |     parallelism: 3
 74 |     steps:
 75 |       - dependencies
 76 | 
 77 |       - run:
 78 |           name: Memory Monitor
 79 |           command: |
 80 |             mkdir /tmp/resource-usage
 81 |             export FILE=/tmp/resource-usage/memory.txt
 82 |             while true; do
 83 |               ps -u root eo pid,%cpu,%mem,args,uname --sort=-%mem >> $FILE
 84 |               echo "----------" >> $FILE
 85 |               sleep 1
 86 |             done
 87 |           background: true
 88 | 
 89 |       - run:
 90 |           name: Headless X Server
 91 |           command: nohup Xdummy
 92 |           background: true
 93 | 
 94 |       - run:
 95 |           name: run tests
 96 |           command: |
 97 |             export DISPLAY=:0
 98 |             pytest --cov=/venv/lib/python3.7/site-packages/aprl --cov=tests \
 99 |                    --junitxml=/tmp/test-reports/junit.xml \
100 |                    --shard-id=${CIRCLE_NODE_INDEX} --num-shards=${CIRCLE_NODE_TOTAL} \
101 |                     -vv tests/
102 |             mv .coverage .coverage.aprl
103 |             coverage combine  # rewrite paths from virtualenv to src/
104 |       - codecov/upload
105 | 
106 |       - store_artifacts:
107 |           path: /tmp/test-reports
108 |           destination: test-reports
109 |       - store_test_results:
110 |           path: /tmp/test-reports
111 |           unit-test:
112 |       - store_artifacts:
113 |           path: /tmp/resource-usage
114 |           destination: resource-usage
115 | 
116 | workflows:
117 |   version: 2
118 |   test:
119 |     jobs:
120 |       - lintandtype:
121 |           context:
122 |           - MuJoCo
123 |           - docker-hub-creds
124 |       - unit-test:
125 |           context:
126 |           - MuJoCo
127 |           - docker-hub-creds
128 | 


--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
 1 | coverage:
 2 |   status:
 3 |     project:
 4 |       default: false
 5 |       main:
 6 |         paths:
 7 |           - "src/"
 8 |           - "!src/imitation/envs/examples/"
 9 |           - "!src/imitation/scripts/"
10 |       auxiliary:
11 |         target: 0%
12 |         paths:
13 |           - "src/aprl/configs/"
14 |       tests:
15 |         # Should not have dead code in our tests
16 |         target: 100%
17 |         paths:
18 |           - "tests/"
19 |     patch:
20 |       default: false
21 |       main:
22 |         paths:
23 |           - "src/"
24 |           - "!src/imitation/envs/examples/"
25 |           - "!src/imitation/scripts/"
26 |       auxiliary:
27 |         paths:
28 |           - "src/aprl/beta/"
29 |           - "src/aprl/configs/"
30 |       tests:
31 |         target: 100%
32 |         paths:
33 |           - "tests/"
34 | 


--------------------------------------------------------------------------------
/.codespell.skip:
--------------------------------------------------------------------------------
1 | ith
2 | iff
3 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | include=src/*
3 | 
4 | [paths]
5 | aprl_source = 
6 | 	src/aprl
7 | 	*venv/lib/python*/site-packages/aprl
8 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .gitignore


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-vendored
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Data files
  2 | data
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | **/*.pyc
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # Vim swap files
 11 | *.swp
 12 | *.swo
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | 
 60 | # PyBuilder
 61 | target/
 62 | 
 63 | # Jupyter Notebook
 64 | .ipynb_checkpoints
 65 | 
 66 | # IPython
 67 | profile_default/
 68 | ipython_config.py
 69 | 
 70 | # pyenv
 71 | .python-version
 72 | 
 73 | # Environments
 74 | .env
 75 | .venv
 76 | env/
 77 | *venv/
 78 | ENV/
 79 | env.bak/
 80 | venv.bak/
 81 | 
 82 | # Spyder project settings
 83 | .spyderproject
 84 | .spyproject
 85 | 
 86 | # Rope project settings
 87 | .ropeproject
 88 | 
 89 | # mkdocs documentation
 90 | /site
 91 | 
 92 | # mypy
 93 | .mypy_cache/
 94 | .dmypy.json
 95 | dmypy.json
 96 | 
 97 | # Type checking
 98 | .pyre/
 99 | .pytype/
100 | 
101 | # IntelliJ/PyCharm
102 | .idea/
103 | 
104 | # MuJoCo
105 | MUJOCO_LOG.TXT
106 | 
107 | # Mac
108 | *.DS_Store
109 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Based on OpenAI's mujoco-py Dockerfile
 2 | 
 3 | ARG USE_MPI=True
 4 | 
 5 | # base stage contains just binary dependencies.
 6 | # This is used in the CI build.
 7 | FROM nvidia/cuda:10.0-runtime-ubuntu18.04 AS base
 8 | ARG USE_MPI
 9 | ARG DEBIAN_FRONTEND=noninteractive
10 | 
11 | RUN echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections \
12 |     && apt-get update -q \
13 |     && apt-get install -y --no-install-recommends \
14 |     build-essential \
15 |     curl \
16 |     ffmpeg \
17 |     git \
18 |     libgl1-mesa-dev \
19 |     libgl1-mesa-glx \
20 |     libglew-dev \
21 |     libosmesa6-dev \
22 |     net-tools \
23 |     parallel \
24 |     patchelf \
25 |     python3.7 \
26 |     python3.7-dev \
27 |     python3-pip \
28 |     rsync \
29 |     software-properties-common \
30 |     unzip \
31 |     vim \
32 |     virtualenv \
33 |     xpra \
34 |     xserver-xorg-dev \
35 |     ttf-mscorefonts-installer \
36 |     && apt-get clean \
37 |     && rm -rf /var/lib/apt/lists/*
38 | 
39 | ENV LANG C.UTF-8
40 | 
41 | RUN    mkdir -p /root/.mujoco \
42 |     && curl -o mjpro150.zip https://www.roboti.us/download/mjpro150_linux.zip \
43 |     && unzip mjpro150.zip -d /root/.mujoco \
44 |     && rm mjpro150.zip \
45 |     && curl -o mujoco131.zip https://www.roboti.us/download/mjpro131_linux.zip \
46 |     && unzip mujoco131.zip -d /root/.mujoco \
47 |     && rm mujoco131.zip \
48 |     && curl -o /root/.mujoco/mjkey.txt https://www.roboti.us/file/mjkey.txt
49 | 
50 | COPY vendor/Xdummy /usr/local/bin/Xdummy
51 | RUN chmod +x /usr/local/bin/Xdummy
52 | 
53 | RUN if [ $USE_MPI = "True" ]; then \
54 |     add-apt-repository --yes ppa:marmistrz/openmpi \
55 |     && apt-get update -q \
56 |     && apt-get install -y libopenmpi3 libopenmpi-dev \
57 |     && apt-get clean \
58 |     && rm -rf /var/lib/apt/lists/*; \
59 |     fi
60 | 
61 | # Set the PATH to the venv before we create the venv, so it's visible in base.
62 | # This is since we may create the venv outside of Docker, e.g. in CI
63 | # or by binding it in for local development.
64 | ENV PATH="/venv/bin:$PATH"
65 | ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:${LD_LIBRARY_PATH}
66 | 
67 | # python-req stage contains Python venv, but not code.
68 | # It is useful for development purposes: you can mount
69 | # code from outside the Docker container.
70 | FROM base as python-req
71 | ARG USE_MPI
72 | 
73 | WORKDIR /adversarial-policies
74 | # Copy over just requirements.txt at first. That way, the Docker cache doesn't
75 | # expire until we actually change the requirements.
76 | COPY ./requirements-build.txt /adversarial-policies/
77 | COPY ./requirements.txt /adversarial-policies/
78 | COPY ./requirements-dev.txt /adversarial-policies/
79 | COPY ./ci/build_venv.sh /adversarial-policies/ci/build_venv.sh
80 | RUN  ci/build_venv.sh /venv && rm -rf $HOME/.cache/pip
81 | 
82 | # full stage contains everything.
83 | # Can be used for deployment and local testing.
84 | FROM python-req as full
85 | 
86 | # Delay copying (and installing) the code until the very end
87 | COPY . /adversarial-policies
88 | # Build a wheel then install to avoid copying whole directory (pip issue #2195)
89 | RUN python3 setup.py sdist bdist_wheel
90 | RUN pip install --upgrade dist/aprl-*.whl
91 | 
92 | # Default entrypoints
93 | ENTRYPOINT ["/adversarial-policies/vendor/Xdummy-entrypoint"]
94 | CMD ["ci/run_tests.sh"]
95 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2018 Adam Gleave
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![CircleCI](https://circleci.com/gh/HumanCompatibleAI/adversarial-policies.svg?style=svg)](https://circleci.com/gh/HumanCompatibleAI/adversarial-policies)
  2 | [![codecov](https://codecov.io/gh/HumanCompatibleAI/adversarial-policies/branch/master/graph/badge.svg)](https://codecov.io/gh/HumanCompatibleAI/adversarial-policies)
  3 | 
  4 | Codebase to train, evaluate and analyze adversarial policies: policies attacking a fixed victim
  5 | agent in a multi-agent system. See [paper](https://arxiv.org/abs/1905.10615) for more information.
  6 | 
  7 | # Installation
  8 | 
  9 | The easiest way to install the code is to build the Docker image in the `Dockerfile`.
 10 | This will install all necessary binary and Python dependencies. Build the image by:
 11 | 
 12 |   ```bash
 13 |   $ docker build .
 14 |   ```
 15 | 
 16 | You can also pull a Docker image for the latest master commit from
 17 | `humancompatibleai/adversarial_policies:latest`. Once you have built the image, run it by:
 18 | 
 19 |   ```bash
 20 |   docker run -it --env MUJOCO_KEY=URL_TO_YOUR_MUJOCO_KEY \
 21 |          humancompatibleai/adversarial_policies:latest /bin/bash  # change tag if built locally
 22 |   ```
 23 | 
 24 | If you want to run outside of Docker (for example, for ease of development), read on.
 25 | 
 26 | This codebase uses Python 3.7. The main binary dependencies are MuJoCo (version 1.3.1, for
 27 | `gym_compete` environments, and 2.0 for the others). You may also need to install some other
 28 | libraries, such as OpenMPI.
 29 | 
 30 | Create a virtual environment by running `ci/build_venv.sh`. Activate it
 31 | by `. ./venv/bin/activate`. Finally, run `pip install -e .` to install
 32 | an editable version of this package.
 33 | 
 34 | # Reproducing Results
 35 | 
 36 | Note we use [Sacred](https://github.com/IDSIA/sacred) for
 37 | experiment configuration.
 38 | 
 39 | ## Training adversarial policies
 40 | 
 41 | `aprl.train` trains a single adversarial policy. By default it will train on `SumoAnts` for
 42 | a brief period of time. You can override any of config parameters, defined in `train_config`, at
 43 | the command line. For example, to replicate one of the experiments in the paper, run:
 44 | 
 45 |   ```bash
 46 |   # Train on Sumo Humans for 20M timesteps
 47 |   python -m aprl.train with env_name=multicomp/SumoHumans-v0 paper
 48 |   ```
 49 | 
 50 | `aprl.multi.train` trains multiple adversarial policies, using Ray (see below) for
 51 | parallelization. To replicate the results in the paper (there may be slight differences due to 
 52 | randomness not captured in the seeding), run `python -m aprl.multi.train with paper`. To run
 53 | the hyperparameter sweep, run `python -m aprl.multi.train with hyper`.
 54 | 
 55 | You can find results from our training run on s3://adversarial-policies-public/multi_train/paper.
 56 | This includes TensorBoard logs, final model weights, checkpoints, and individual policy configs.
 57 | Run `experiments/pull_public_s3.sh` to sync this and other data to `data/aws-public/`.
 58 | 
 59 | ## Evaluating adversarial policies
 60 | 
 61 | `aprl.score_agent` evaluates a pair of policies, for example an adversary and a victim.
 62 | It outputs the win rate for each agent and the number of ties. It can also render to the screen
 63 | or produce videos.
 64 | 
 65 | We similarly use `aprl.multi.score` to evaluate multiple pairs of policies in parallel.
 66 | To reproduce all the evaluations used in the paper, run the following bash scripts, which call
 67 | `aprl.multi.score` internally:
 68 |   - `experiments/modelfree/baselines.sh`: fixed baselines (no adversarial policies).
 69 |   - `experiments/modelfree/attack_transfer.sh <path-to-trained-adversaries>`. To use our
 70 |      pre-trained policies, use the path `data/aws-public/multi_train/paper/20190429_011349`
 71 |      after syncing against S3.
 72 | 
 73 | ## Visualizing Results
 74 | 
 75 | Most of the visualization code lives in the `aprl.visualize` package. To reproduce the figures
 76 | in the paper, use `paper_config`; for those in the appendix, use `supplementary_config`. So:
 77 | 
 78 | ```bash
 79 |   python -m aprl.visualize.scores with paper_config  # heatmaps in the paper
 80 |   python -m aprl.visualize.training with supplementary_config  # training curves in appendix
 81 | ```
 82 | 
 83 | To re-generate all the videos, use `aprl.visualize.make_videos`. We would recommend running
 84 | in Docker, in which case it will render using `Xdummy`. This avoids rendering issues with many
 85 | graphics drivers.
 86 | 
 87 | Note you will likely need to change the default paths in the config to point at your evaluation
 88 | results from the previous section, and desired output directory. For example:
 89 | 
 90 |   ```bash
 91 |   python -m aprl.visualize.scores with tb_dir=<path/to/trained/models> \
 92 |                                        transfer_path=<path/to/multi_score/output>
 93 |   python -m aprl.visualize.make_videos with adversary_path=<path/to/best_adversaries.json>
 94 |   ```
 95 | 
 96 | ## Additional Analysis
 97 | 
 98 | The density modeling can be run by `experiments/aprl/density.sh`, or with custom
 99 | configurations via `aprl.density.pipeline`.
100 | 
101 | The t-SNE visualizations can be replicated with `aprl.tsne.pipeline`.
102 | 
103 | ## Using Ray
104 | 
105 | Many of the experiments are computationally intensive. You can run them on a single machine, but it
106 | might take several weeks. We use [Ray](https://github.com/ray-project/ray) to run distributed
107 | experiments. We include example configs in `src/aprl/configs/ray/`. To use `aws.yaml` you
108 | will need to, at a minimum, edit the config to use your own AMI (anything with Docker should work)
109 | and private key. Then just run `ray up <path-to-config>` and it will start a cluster. SSH into the
110 | head node, start a shell in Docker, and then follow the above instructions. The script should
111 | automatically detect it is part of a Ray cluster and run on the existing Ray server, rather than
112 | starting a new one.
113 | 
114 | # Contributions
115 | 
116 | The codebase follows PEP8, with a 100-column maximum line width. Docstrings should be in reST.
117 | 
118 | Please run the `ci/code_checks.sh` before committing. This runs several linting steps.
119 | These are also run as a continuous integration check.
120 | 
121 | I like to use Git commit hooks to prevent bad commits from happening in the first place:
122 | ```bash
123 | ln -s ../../ci/code_checks.sh .git/hooks/pre-commit
124 | ```
125 | 


--------------------------------------------------------------------------------
/ci/build_venv.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e  # exit immediately on any error
 4 | 
 5 | venv=$1
 6 | if [[ ${venv} == "" ]]; then
 7 | 	venv="venv"
 8 | fi
 9 | 
10 | virtualenv -p python3.7 ${venv}
11 | source ${venv}/bin/activate
12 | pip install -r requirements-build.txt
13 | pip install -r requirements.txt
14 | pip install -r requirements-dev.txt
15 | 
16 | if [[ $USE_MPI == "True" ]]; then
17 |   pip install mpi4py
18 | fi
19 | 


--------------------------------------------------------------------------------
/ci/code_checks.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # If you change these, also change .circle/config.yml.
 4 | SRC_FILES="src/ tests/ setup.py"
 5 | TYPECHECK_FILES="src/ tests/ setup.py"
 6 | 
 7 | set -x  # echo commands
 8 | set -e  # quit immediately on error
 9 | 
10 | flake8 ${SRC_FILES}
11 | black --check ${SRC_FILES}
12 | codespell -I .codespell.skip --skip='*.pyc,*.pkl,*.npz' ${SRC_FILES}
13 | 
14 | if [ -x "`which circleci`" ]; then
15 |     circleci config validate
16 | fi
17 | 
18 | if [ "$skipexpensive" != "true" ]; then
19 |     pytype ${TYPECHECK_FILES}
20 | fi
21 | 


--------------------------------------------------------------------------------
/ci/local_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [[ ${MUJOCO_KEY} == "" ]]; then
 4 |     echo "Set MUJOCO_KEY file to a URL with your key"
 5 |     exit 1
 6 | fi
 7 | 
 8 | # Run the same CI tests that Travis will run on local machine.
 9 | docker build --cache-from humancompatibleai/adversarial_policies:local-test \
10 |              -t humancompatibleai/adversarial_policies:local-test .
11 | if [[ $? -ne 0 ]]; then
12 |     echo "Docker build failed"
13 |     exit 1
14 | fi
15 | 
16 | docker run --rm --env MUJOCO_KEY=${MUJOCO_KEY} --env CODECOV_TOKEN=${CODECOV_TOKEN} \
17 |                       humancompatibleai/adversarial_policies:local-test \
18 |                       ci/run_tests.sh
19 | 


--------------------------------------------------------------------------------
/ci/run_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e  # exit immediately on any error
 4 | 
 5 | echo "Downloading MuJoCo Key"
 6 | curl -o /root/.mujoco/mjkey.txt ${MUJOCO_KEY}
 7 | 
 8 | set -o xtrace  # print commands
 9 | 
10 | num_cpus=$2
11 | if [[ ${num_cpus} == "" ]]; then
12 |   num_cpus=$(nproc --all)
13 |   num_cpus=$((${num_cpus} / 2))
14 | fi
15 | 
16 | export LD_LIBRARY_PATH=/root/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH}
17 | COV_FLAGS="--cov=tests --cov=/venv/lib/python3.7/site-packages/aprl"
18 | pytest -vv -n ${num_cpus} ${COV_FLAGS} tests/
19 | 
20 | mv .coverage .coverage.tmp
21 | coverage combine  # rewrite paths from virtualenv to src/
22 | codecov
23 | 


--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/experiments/__init__.py


--------------------------------------------------------------------------------
/experiments/benchmark_vec_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | NUM_ENVS="1 2 4 8 16"
 4 | 
 5 | for num_env in $NUM_ENVS; do
 6 |     for rep in 1 2 3; do
 7 |         echo "BENCHMARK: ${num_env} environments test ${rep}"
 8 |         time python -m aprl.train with \
 9 |                        total_timesteps=50000 num_env=${num_env} \
10 |                        exp_name="vec-env-benchmark-${num_env}-${rep}"
11 |     done
12 | done


--------------------------------------------------------------------------------
/experiments/build_and_run.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
  4 | . ${DIR}/common.sh
  5 | 
  6 | CMD="bash"
  7 | COPY="True"
  8 | DETACH="False"
  9 | WORK_DIR="$HOME/aprl"
 10 | NAME="adversarial-policies"
 11 | TREEISH="master"
 12 | TB_PORT=6006
 13 | RUN_DOCKER_ARGS=""
 14 | 
 15 | while [[ $# -gt 0 ]]
 16 | do
 17 | key="$1"
 18 | case $key in
 19 |     -c|--cmd)
 20 |     CMD="$2"
 21 |     shift
 22 |     shift
 23 |     ;;
 24 |     -d|--detach)
 25 |     DETACH="True"
 26 |     shift
 27 |     ;;
 28 |     -l|--listen)
 29 |     TB_PORT="$2"
 30 |     shift
 31 |     shift
 32 |     ;;
 33 |     -n|--name)
 34 |     NAME="$2"
 35 |     shift
 36 |     shift
 37 |     ;;
 38 |     --no-copy)
 39 |     COPY="False"
 40 |     shift
 41 |     ;;
 42 |     -r|--revision)
 43 |     TREEISH="$2"
 44 |     shift
 45 |     shift
 46 |     ;;
 47 |     -w|--work-dir)
 48 |     WORK_DIR="$2"
 49 |     shift
 50 |     shift
 51 |     ;;
 52 |     --run-docker-args)
 53 |     RUN_DOCKER_ARGS="$2"
 54 |     shift
 55 |     shift
 56 |     ;;
 57 |     *)
 58 |     echo "Unrecognized option '${key}'"
 59 |     exit 1
 60 | esac
 61 | done
 62 | 
 63 | if [[ ${MUJOCO_KEY} == "" ]]; then
 64 |     echo "Set MUJOCO_KEY file to a URL with your key"
 65 |     exit 1
 66 | fi
 67 | 
 68 | set -e  # exit immediately on any error
 69 | 
 70 | if [[ ${COPY} == "True" ]]; then
 71 |     git clone ${GIT_REPO} ${WORK_DIR}/${NAME}
 72 | fi
 73 | 
 74 | cd ${WORK_DIR}/${NAME}
 75 | git checkout ${TREEISH}
 76 | docker build --cache-from ${DOCKER_REPO}:${NAME} \
 77 |              --build-arg MUJOCO_KEY=${MUJOCO_KEY} \
 78 |              -t ${DOCKER_REPO}:${NAME} .
 79 | 
 80 | mkdir -p data
 81 | tmux new-session -d -s ${NAME} \
 82 |      "export MUJOCO_KEY=${MUJOCO_KEY} &&
 83 |      ./experiments/run_docker.sh -t ${NAME} -l ${TB_PORT}:6006 \
 84 |                                  -n ${NAME} -c \"${CMD}\" ${RUN_DOCKER_ARGS}; \
 85 |      echo 'Finished; press Ctrl-D to exit'; cat /dev/stdin"
 86 | ATTEMPTS=0
 87 | while [[ `docker inspect -f {{.State.Running}} ${NAME}` != "true" ]]; do
 88 |     echo "Waiting for Docker container to start"
 89 |     sleep 2
 90 |     ATTEMPTS=$((ATTEMPTS + 1))
 91 |     if [[ $ATTEMPTS -gt 5 ]]; then
 92 |         echo "Could not start Docker container. Dieing. Look in tmux session '${NAME}' for logs."
 93 |         exit 1
 94 |     fi
 95 | done
 96 | tmux new-window -t ${NAME} -d \
 97 |     "docker exec ${NAME} bash -c \"env=aprl . ci/prepare_env.sh && tensorboard --port 6006 --logdir data/\""
 98 | 
 99 | if [[ ${DETACH} == "True" ]]; then
100 |     echo "Experiment '${NAME}' running in eponymous tmux session, \
101 |           cwd '${WORK_DIR}/${NAME}' and TensorBoard running on port '${TB_PORT}'"
102 | else
103 |     tmux attach-session -t ${NAME}
104 | fi


--------------------------------------------------------------------------------
/experiments/common.sh:
--------------------------------------------------------------------------------
 1 | DOCKER_REPO="humancompatibleai/adversarial_policies"
 2 | GIT_REPO="https://github.com/HumanCompatibleAI/adversarial-policies.git"
 3 | 
 4 | call_parallel() {
 5 |     PARALLEL_FLAGS=$1
 6 |     shift
 7 |     OUT_DIR=$1
 8 |     shift
 9 |     MODULE_NAME=$1
10 |     shift
11 |     EXTRA_ARGS=$*
12 |     parallel ${PARALLEL_FLAGS} --header : --results ${OUT_DIR}/parallel \
13 |               python -m ${MODULE_NAME} ${EXTRA_ARGS}
14 | }
15 | 


--------------------------------------------------------------------------------
/experiments/modelfree/attack_transfer.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 4 | . ${DIR}/common.sh
 5 | 
 6 | OUT_ROOT=data/aws/score_agents
 7 | TIMESTAMP=`date --iso-8601=seconds`
 8 | 
 9 | function multi_score {
10 |   python -m aprl.multi.score with victims='["zoo"]' opponents='["adversary"]' "$@" high_accuracy
11 | }
12 | 
13 | if [[ $# -eq 0 ]]; then
14 |     echo "usage: $0 <logdir> [logdir ...]"
15 |     exit 1
16 | fi
17 | 
18 | for dir in normal victim_masked_init victim_masked_zero adversary_masked_init; do
19 |     mkdir -p ${OUT_ROOT}/${dir}/${TIMESTAMP}
20 | done
21 | 
22 | ADVERSARY_PATHS=${OUT_ROOT}/normal/${TIMESTAMP}/best_adversaries.json
23 | python ${DIR}/highest_win_rate.py ${ADVERSARY_PATHS} --logdir $*
24 | 
25 | export ADVERSARY_PATHS=${ADVERSARY_PATHS}
26 | 
27 | multi_score save_path=${OUT_ROOT}/normal/${TIMESTAMP}/adversary_transfer.json&
28 | wait_proc
29 | 
30 | multi_score mask_observations_of_victim \
31 |     save_path=${OUT_ROOT}/victim_masked_init/${TIMESTAMP}/adversary_transfer.json&
32 | wait_proc
33 | 
34 | multi_score mask_observations_of_victim mask_observations_with_zeros \
35 |     save_path=${OUT_ROOT}/victim_masked_zero/${TIMESTAMP}/adversary_transfer.json&
36 | wait_proc
37 | 
38 | multi_score mask_observations_of_adversary \
39 |     save_path=${OUT_ROOT}/adversary_masked_init/${TIMESTAMP}/adversary_transfer.json&
40 | wait_proc
41 | 
42 | wait


--------------------------------------------------------------------------------
/experiments/modelfree/baselines.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 4 | . ${DIR}/common.sh
 5 | 
 6 | function multi_score {
 7 |   python -m aprl.multi.score with "$@" high_accuracy
 8 | }
 9 | 
10 | OUT_DIR=data/aws/score_agents.tmp
11 | 
12 | mkdir -p ${OUT_DIR}
13 | for kind in zoo fixed; do
14 |     mkdir -p ${OUT_DIR}/normal
15 |     multi_score victims='["zoo"]' opponents="[\"${kind}\"]" \
16 |                 save_path=${OUT_DIR}/normal/${kind}_baseline.json&
17 |     wait_proc
18 | 
19 |     mkdir -p ${OUT_DIR}/victim_masked_init
20 |     multi_score victims='["zoo"]' opponents="[\"${kind}\"]" \
21 |                 mask_observations_of_victim \
22 |                 save_path=${OUT_DIR}/victim_masked_init/${kind}_baseline.json&
23 |     wait_proc
24 | 
25 |     mkdir -p ${OUT_DIR}/victim_masked_zero
26 |     multi_score victims='["zoo"]' opponents="[\"${kind}\"]" \
27 |                 mask_observations_of_victim mask_observations_with_zeros \
28 |                 save_path=${OUT_DIR}/victim_masked_zero/${kind}_baseline.json&
29 |     wait_proc
30 | done
31 | 
32 | wait
33 | 


--------------------------------------------------------------------------------
/experiments/modelfree/common.sh:
--------------------------------------------------------------------------------
 1 | function wait_proc {
 2 |     if [[ -f ~/ray_bootstrap_config.yaml ]]; then
 3 |         # Running on a Ray cluster. We want to submit all the jobs in parallel.
 4 |         sleep 5  # stagger jobs a bit
 5 |     else
 6 |         # Running locally. Each job will start a Ray cluster. Submit sequentially.
 7 |         wait
 8 |     fi
 9 | }
10 | 


--------------------------------------------------------------------------------
/experiments/modelfree/dec2018replication.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Reproduce results of Dec 2018 draft write-up
 4 | 
 5 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 6 | . ${DIR}/../common.sh
 7 | 
 8 | ENV_NAMES="multicomp/KickAndDefend-v0 multicomp/SumoAnts-v0"
 9 | PRETRAINED="1 2 3"
10 | SEEDS="0 1 2"
11 | 
12 | OUT_DIR=data/mf-dec2018rep
13 | 
14 | # Train PPO against victims
15 | python -m aprl.multi.train with dec2018rep
16 | 
17 | SCORE_AGENT="aprl.score_agent with episodes=1000 num_env=16 render=False"
18 | # Baseline: pretrained policy
19 | call_parallel "$*" ${OUT_DIR}/pretrained ${SCORE_AGENT} \
20 |          env_name={env_name} agent_a_path={agent_a_path} agent_b_path={agent_b_path} \
21 |          ::: env_name ${ENV_NAMES} ::: agent_a_path ${PRETRAINED} ::: agent_b_path ${PRETRAINED}
22 | 
23 | # Baseline: random action and constant zero
24 | call_parallel "$*" ${OUT_DIR}/fixed ${SCORE_AGENT} \
25 |          env_name={env_name} agent_a_type={agent_a_type} agent_b_path={agent_b_path} \
26 |          ::: env_name ${ENV_NAMES} ::: agent_a_type random zero ::: agent_b_path ${PRETRAINED}


--------------------------------------------------------------------------------
/experiments/modelfree/defenses.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 4 | . ${DIR}/common.sh
 5 | 
 6 | OUT_ROOT=score_agents/defenses
 7 | mkdir -p ${OUT_ROOT}
 8 | 
 9 | function multi_score {
10 |   python -m aprl.multi.score with "$@" defenses high_accuracy
11 | }
12 | 
13 | multi_score save_path=${OUT_ROOT}/normal.json&
14 | wait_proc
15 | 
16 | multi_score mask_observations_of_victim save_path=${OUT_ROOT}/victim_masked_init.json&
17 | wait_proc
18 | 
19 | wait
20 | 


--------------------------------------------------------------------------------
/experiments/modelfree/density.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 4 | 
 5 | . ${DIR}/common.sh
 6 | 
 7 | TIMESTAMP=`date --iso-8601=seconds`
 8 | ACTIVATION_DIR="data/density/${TIMESTAMP}/activations"
 9 | 
10 | # We fit our density model with 20,000 timesteps, and use 20,000 timesteps for evaluation.
11 | # So we need 40,000 timesteps for the training opponent. The others we only actually need 20,000
12 | # for so this is slightly wasteful.
13 | python -m aprl.common.generate_activations with score_update.score.timesteps=40000 \
14 |                                                      out_dir=${ACTIVATION_DIR}
15 | 
16 | for components in 5 10 20 40 80; do
17 |     for cov_type in full diag; do
18 |         python -m aprl.density.fit_density with gmm \
19 |                                            model_kwargs.n_components=${components} \
20 |                                            model_kwargs.covariance_type=${cov_type} \
21 |                                            activation_glob="${ACTIVATION_DIR}/*" \
22 |                                            output_root=data/density/${TIMESTAMP}/fitted
23 |         wait_proc
24 |     done
25 | done
26 | 
27 | wait
28 | 


--------------------------------------------------------------------------------
/experiments/modelfree/highest_win_rate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """Processes experimental output to find adversarial policies with maximal win rate."""
  4 | 
  5 | import argparse
  6 | import collections
  7 | import json
  8 | import logging
  9 | import os.path
 10 | 
 11 | import numpy as np
 12 | import tensorflow as tf
 13 | 
 14 | logger = logging.getLogger('scripts.highest_win_rate')
 15 | 
 16 | 
 17 | def event_files(path):
 18 |     for root, dirs, files in os.walk(path, followlinks=True):
 19 |         # checkpoint directories never contain TF events files, and will slow down search
 20 |         dirs[:] = list(filter(lambda x: x != 'checkpoint', dirs))
 21 |         if root.split(os.path.sep)[-2:] == ['rl', 'tb']:
 22 |             for name in files:
 23 |                 if 'tfevents' in name:
 24 |                     yield os.path.join(root, name)
 25 | 
 26 | 
 27 | def get_stats(event_path, episode_window):
 28 |     events = collections.defaultdict(list)
 29 |     last_step = 0
 30 |     for event in tf.train.summary_iterator(event_path):
 31 |         for value in event.summary.value:
 32 |             if value.tag in ['game_win0', 'game_win1', 'game_tie']:
 33 |                 events[value.tag].append(value.simple_value)
 34 |                 last_step = event.step
 35 | 
 36 |     logger.info(f"Read {len(events['game_win0'])} events up to {last_step} from '{event_path}'")
 37 |     means = {k: np.mean(v[-episode_window:]) for k, v in events.items()}
 38 | 
 39 |     return means
 40 | 
 41 | 
 42 | def _strip_up_to(path, dirname):
 43 |     path_components = path.split(os.path.sep)
 44 |     if path_components[0] == '':
 45 |         path_components[0] = os.path.sep
 46 |     try:
 47 |         path_index = len(path_components) - 1 - path_components[::-1].index(dirname)
 48 |     except ValueError as e:
 49 |         raise ValueError(f"Error stripping '{dirname}' in '{path_components}': {e}")
 50 |     return os.path.join(*path_components[0:path_index])
 51 | 
 52 | 
 53 | def get_sacred_config(event_path):
 54 |     root = _strip_up_to(event_path, 'baselines')
 55 |     sacred_config_path = os.path.join(root, 'sacred', 'train', '1', 'config.json')
 56 |     with open(sacred_config_path, 'r') as f:
 57 |         return json.load(f)
 58 | 
 59 | 
 60 | def get_final_model_path(event_path):
 61 |     root = _strip_up_to(event_path, 'rl')
 62 |     abs_path = os.path.join(root, 'final_model')
 63 |     components = abs_path.split(os.path.sep)
 64 |     try:
 65 |         multi_train_start = components.index('multi_train')
 66 |         components = components[multi_train_start:]
 67 |     except ValueError:
 68 |         pass
 69 |     return os.path.sep.join(components)
 70 | 
 71 | 
 72 | def unstack(d):
 73 |     d = collections.OrderedDict(sorted(d.items()))
 74 |     res = collections.OrderedDict()
 75 |     for k, v in d.items():
 76 |         env_name, opp_index, opp_path = k
 77 |         res.setdefault(env_name, {}).setdefault(opp_index, {})[opp_path] = v
 78 |     return res
 79 | 
 80 | 
 81 | def find_best(logdirs, episode_window):
 82 |     # keys: (env_name, opp_index, opp_path)
 83 |     # value: path to policy evaluated on env_name against opponent opp_path playing opp_index
 84 |     best_policy = {}
 85 |     best_winrate = collections.defaultdict(float)
 86 | 
 87 |     for logdir in logdirs:
 88 |         for event_path in event_files(logdir):
 89 |             stats = get_stats(event_path=event_path, episode_window=episode_window)
 90 |             config = get_sacred_config(event_path)
 91 |             env_name = str(config['env_name'])
 92 |             opp_index = int(config['embed_index'])
 93 |             opp_type = str(config['embed_type'])
 94 |             # multi_score is not set up to handle multiple embedded agent types
 95 |             if opp_type != 'zoo' and config['load_policy']['type'] == 'zoo':
 96 |                 # Assuming that this case corresponds to a situation where we're finetuning a
 97 |                 # zoo policy, and that we still want the resulting dictionary indexed by the
 98 |                 # integer zoo policy we finetuned, rather than the full path of its adversary
 99 |                 zoo_path = str(config['load_policy']['path'])
100 |             else:
101 |                 zoo_path = str(config['embed_path'])
102 |             our_index = 1 - opp_index
103 |             key = (env_name, opp_index, zoo_path)
104 |             our_winrate = stats[f'game_win{our_index}']
105 | 
106 |             if our_winrate > best_winrate[key]:
107 |                 best_policy[key] = get_final_model_path(event_path)
108 |                 best_winrate[key] = our_winrate
109 | 
110 |     result = {
111 |         'policies': unstack(best_policy),
112 |         'winrates': unstack(best_winrate),
113 |     }
114 | 
115 |     return result
116 | 
117 | 
118 | def directory_type(path):
119 |     if not os.path.isdir(path):
120 |         raise ValueError(f"'{path}' does not exist")
121 |     return path
122 | 
123 | 
124 | def get_args():
125 |     parser = argparse.ArgumentParser()
126 |     parser.add_argument('logdir', nargs="+", type=directory_type)
127 |     parser.add_argument('--episode-window', type=int, default=50)
128 |     parser.add_argument('--output_path')
129 |     return parser.parse_args()
130 | 
131 | 
132 | def main():
133 |     logging.basicConfig(level=logging.INFO)
134 |     parsed_args = get_args()
135 |     output_path = parsed_args.output_path
136 |     # If no output path is given, default to saving it in the first logdir under a fixed name
137 |     if output_path is None:
138 |         if len(parsed_args.logdir) > 1:
139 |             raise ValueError("Must specify --output_path when using multiple log directories.")
140 |         output_path = os.path.join(parsed_args.logdir[0], 'highest_win_policies_and_rates.json')
141 | 
142 |     for logdir in parsed_args.logdir:
143 |         if 'multi_train' not in logdir.split(os.path.sep):
144 |             logger.warning(f"logdir '{logdir}' does not contain 'multi_train'."
145 |                            "Falling back to absolute paths, JSON may not be portable.")
146 | 
147 |     logger.info(f"Output path: {output_path}")
148 |     logger.info(f"Log dir: {parsed_args.logdir}")
149 |     with open(output_path, 'w') as f:  # fail fast if output_path inaccessible
150 |         result = find_best(parsed_args.logdir, parsed_args.episode_window)
151 |         json.dump(result, f)
152 | 
153 | 
154 | if __name__ == '__main__':
155 |     main()
156 | 


--------------------------------------------------------------------------------
/experiments/modelfree/noisy_actions_and_obs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 4 | . ${DIR}/common.sh
 5 | 
 6 | AWS_ROOT=data/aws
 7 | OUT_ROOT=${AWS_ROOT}/score_agents
 8 | TIMESTAMP=`date --iso-8601=seconds`
 9 | 
10 | # Format: multi_score <opponent_type> <noise_type> ["extra_config1 ..."]
11 | # opponent_type: one of zoo or adversary
12 | # noise_type: one of ${NOISE_TYPES}
13 | # extra_config: a string with a list of space-separated named configs for aprl.multi.score
14 | # Saves to ${noise_type}/${TIMESTMAP}/${opponent_type}.json
15 | function multi_score {
16 |     opponent_type=$1
17 |     noise_type=$2
18 |     extra_configs=$3
19 | 
20 |     python -m aprl.multi.score with victims='["zoo"]' opponents="[\"${opponent_type}\"]" \
21 |               ${noise_type} ${extra_configs} medium_accuracy \
22 |               save_path=${OUT_ROOT}/${noise_type}/${TIMESTAMP}/${opponent_type}.json
23 |     wait_proc
24 | }
25 | 
26 | # Sanity check we have the data
27 | if [[ ! -d ${OUT_ROOT} || ! -d ${AWS_ROOT}/multi_train ]]; then
28 |   echo "Could not find some required data dierctories."
29 |   echo "Consider running these commands (if using Ray, add to {head,worker}_start_ray_commands):"
30 |   echo "aws s3 sync s3://adversarial-policies/score_agents/ /adversarial-policies/data/aws/score_agents/ &&"
31 |   echo "aws s3 sync --exclude='*/checkpoint/*' --exclude='*/datasets/*' \
32 |         s3://adversarial-policies/multi_train/paper/20190429_011349/ \
33 |         /adversarial-policies/data/aws/multi_train/paper/20190429_011349/"
34 |   exit 1
35 | fi
36 | 
37 | # Make a directory for each of the noise types we're using, to store results in
38 | NOISE_TYPES="noise_adversary_actions noise_victim_actions mask_observations_with_additive_noise \
39 |             mask_observations_with_smaller_additive_noise"
40 | for dir in ${NOISE_TYPES}; do
41 |     mkdir -p ${OUT_ROOT}/${dir}/${TIMESTAMP}
42 | done
43 | 
44 | export ADVERSARY_PATHS=${OUT_ROOT}/normal/2019-05-05T18:12:24+00:00/best_adversaries.json
45 | 
46 | multi_score zoo noise_adversary_actions
47 | echo "Zoo baseline noisy actions completed"
48 | 
49 | multi_score adversary noise_adversary_actions
50 | echo "Noisy actions completed"
51 | 
52 | multi_score adversary noise_victim_actions
53 | echo "Noisy victim actions completed"
54 | 
55 | multi_score zoo mask_observations_with_additive_noise mask_observations_of_victim
56 | multi_score adversary mask_observations_with_additive_noise mask_observations_of_victim
57 | echo "Additive noise masking baseline complete"
58 | 
59 | multi_score zoo mask_observations_with_smaller_additive_noise mask_observations_of_victim
60 | multi_score adversary mask_observations_with_smaller_additive_noise mask_observations_of_victim
61 | echo "Additive noise masking baseline complete"
62 | 
63 | wait
64 | echo "Additive noise masking complete"
65 | 


--------------------------------------------------------------------------------
/experiments/modelfree/plot.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # https://github.com/mrahtz/tbplot
 4 | TBPLOT="$HOME/dev/tbplot/tbplot"
 5 | ENV_NAMES="KickAndDefend-v0 SumoHumans-v0 SumoAnts-v0 \
 6 |           SumoHumansAutoContact-v0 SumoAntsAutoContact-v0 \
 7 |           RunToGoalHumans-v0 RunToGoalAnts-v0 \
 8 |           YouShallNotPassHumans-v0"
 9 | VICTIMS="1 2 3 4"
10 | 
11 | if [[ $# -neq 2 ]]; then
12 |     echo "usage: $0 <TB data dir> <PNG output dir>"
13 |     exit 1
14 | fi
15 | 
16 | DATA_DIR="$1"
17 | OUT_DIR="$2"
18 | 
19 | parallel -j 8 --header : \
20 |          ${TBPLOT} --step --smoothing 0.9 \
21 |          --out ${OUT_DIR}/{env_name}_{victim}.png \
22 |          "${DATA_DIR}/train_rl_*_env_name:victim_path=\[*{env_name}*,\ {victim}\]*/data/baselines/*/rl/tb" \
23 |          ::: env_name ${ENV_NAMES} \
24 |          ::: victim ${VICTIMS}
25 | 


--------------------------------------------------------------------------------
/experiments/planning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/experiments/planning/__init__.py


--------------------------------------------------------------------------------
/experiments/planning/common.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import time
 3 | 
 4 | import gym
 5 | from ilqr.controller import RecedingHorizonController
 6 | import numpy as np
 7 | import pandas as pd
 8 | 
 9 | 
10 | def set_seeds(seed):
11 |     random.seed(seed)
12 |     np.random.seed(seed)
13 | 
14 | 
15 | def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
16 |     info = "converged" if converged else ("accepted" if accepted else "failed")
17 |     print("iteration", iteration_count, info, J_opt, xs[-1], us[-1])
18 | 
19 | 
20 | def make_env(env_name, seed, horizon=None):
21 |     env = gym.make(env_name)
22 |     if horizon is None:
23 |         horizon = env._max_episode_steps
24 |     env = env.unwrapped
25 |     env.frame_skip = 1
26 |     env.seed(seed)
27 |     env.reset()
28 |     us_init = np.array([env.action_space.sample() for _ in range(horizon)])
29 | 
30 |     return env, us_init
31 | 
32 | 
33 | def fit_ilqr(ilqrs, x0s, us_init, **kwargs):
34 |     xs = {}
35 |     us = {}
36 |     print(ilqrs.keys())
37 |     for k, ilqr in ilqrs.items():
38 |         start = time.time()
39 |         print('*** Fitting {} ***'.format(k))
40 |         x0 = x0s[k]
41 |         xs[k], us[k] = ilqr.fit(x0, us_init, on_iteration=on_iteration,
42 |                                 **kwargs)
43 |         end = time.time()
44 |         print('*** Fitted {} in {}s ***'.format(k, end - start))
45 |     return xs, us
46 | 
47 | 
48 | def receding(ilqr, x0, us_init, seed, step_size=1, horizon=None, **kwargs):
49 |     if horizon is None:
50 |         horizon = len(us_init)
51 |     controller = RecedingHorizonController(x0, ilqr)
52 |     controller.seed(seed)
53 |     xs = np.zeros((horizon, ) + x0.shape)
54 |     us = np.zeros((horizon, ) + us_init[0].shape)
55 |     i = 0
56 |     for x, u in controller.control(us_init, step_size=step_size, **kwargs):
57 |         xs[i:i + step_size] = x[:-1]
58 |         us[i:i + step_size] = u
59 |         print('iteration {} x = {}, u = {}'.format(i, x, u))
60 |         i += step_size
61 |         if i == horizon:
62 |             break
63 |     return xs, us
64 | 
65 | 
66 | def evaluate(env, dynamics, x0, us, render=False):
67 |     dynamics.set_state(x0)
68 |     if render:
69 |         env.render()
70 |     rew = []
71 |     actual_xs = []
72 |     for u in us:
73 |         _obs, r, done, info = env.step(u)
74 |         if done:
75 |             print('warning: early termination! (assuming zero-reward from now)')
76 |             break
77 |         rew.append(r)
78 |         actual_xs.append(dynamics.get_state())
79 |         if render:
80 |             env.render()
81 |             time.sleep(0.01)
82 |     return rew, actual_xs
83 | 
84 | 
85 | def multi_evaluate(env, dynamics, x0s, us, **kwargs):
86 |     rews = {}
87 |     actual_xs = {}
88 |     for k, solved_us in us.items():
89 |         print(k)
90 |         rews[k], actual_xs[k] = evaluate(env.unwrapped, dynamics[k], x0s[k],
91 |                                          solved_us, **kwargs)
92 |     rewards = {k: sum(r) for k, r in rews.items()}
93 |     lengths = {k: len(r) for k, r in rews.items()}
94 |     return pd.DataFrame({'rewards': rewards, 'lengths': lengths})
95 | 


--------------------------------------------------------------------------------
/experiments/pull_public_s3.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | aws --no-sign-request s3 sync \
4 |     --exclude='*/checkpoint/*' --exclude='*/datasets/*' --exclude='videos/*' \
5 |     s3://adversarial-policies-public/ data/aws-public/
6 | 


--------------------------------------------------------------------------------
/experiments/pull_s3.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | S3_SYNC_CMD="aws s3 sync --exclude=*/checkpoint/* --exclude=*/datasets/*"
4 | 
5 | ${S3_SYNC_CMD} s3://adversarial-policies/ data/aws/
6 | ${S3_SYNC_CMD} s3://adversarial-policies-public/ data/aws-public/
7 | 


--------------------------------------------------------------------------------
/experiments/push_public_s3.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Local directories
 4 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 5 | PROJECT_DIR=$( dirname "${SCRIPT_DIR}" )
 6 | PUBLIC_AWS=${PROJECT_DIR}/data/aws-public
 7 | 
 8 | # S3 Repos and commands
 9 | PRIVATE_S3_REPO=s3://adversarial-policies
10 | PUBLIC_S3_REPO=s3://adversarial-policies-public
11 | S3_SYNC_CMD="aws s3 sync --exclude='*/checkpoint/*' --exclude='*/datasets/*' --acl public-read --delete"
12 | 
13 | # Copy subset of data from private AWS to public view
14 | echo "Syncing from private bucket ${PRIVATE_S3_REPO} to public bucket ${PUBLIC_S3_REPO}"
15 | 
16 | REMOTE_COPY="multi_train/paper/20190429_011349 score_agents"
17 | for path in ${REMOTE_COPY}; do
18 |   echo "Syncing ${path}"
19 |   ${S3_SYNC_CMD} ${PRIVATE_S3_REPO}/${path} ${PUBLIC_S3_REPO}/${path}
20 | done
21 | 
22 | echo "Syncing from local machine ${PUBLIC_AWS} to public bucket ${PUBLIC_S3_REPO}"
23 | LOCAL_COPY="videos"
24 | for path in ${LOCAL_COPY}; do
25 |   echo "Syncing ${path}"
26 |   ${S3_SYNC_CMD} ${PUBLIC_AWS}/${path} ${PUBLIC_S3_REPO}/${path}
27 | done
28 | 


--------------------------------------------------------------------------------
/experiments/remote_build_and_run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 4 | . ${DIR}/common.sh
 5 | 
 6 | REMOTE_HOST=""
 7 | LOCAL_DATA="${DIR}/../data"
 8 | REMOTE_WORK_DIR="/scratch/${USER}/aprl"
 9 | TB_PORT=6006
10 | EXTRA_ARGS=""
11 | 
12 | 
13 | while [[ $# -gt 0 ]]
14 | do
15 | key="$1"
16 | case $key in
17 |     -c|--cmd)
18 |     CMD="$2"
19 |     shift
20 |     shift
21 |     ;;
22 |     -h|--host)
23 |     REMOTE_HOST="$2"
24 |     shift
25 |     shift
26 |     ;;
27 |     -l|--listen)
28 |     TB_PORT="$2"
29 |     shift
30 |     shift
31 |     ;;
32 |     -n|--name)
33 |     NAME="$2"
34 |     shift
35 |     shift
36 |     ;;
37 |     -o|--output-dir)
38 |     LOCAL_DATA="$2"
39 |     shift
40 |     shift
41 |     ;;
42 |     -w|--work-dir)
43 |     REMOTE_WORK_DIR="$2"
44 |     shift
45 |     shift
46 |     ;;
47 |     *)
48 |     EXTRA_ARGS="${EXTRA_ARGS} $1"
49 |     shift
50 |     ;;
51 | esac
52 | done
53 | 
54 | if [[ ${MUJOCO_KEY} == "" ]]; then
55 |     echo "Set MUJOCO_KEY file to a URL with your key"
56 |     exit 1
57 | fi
58 | 
59 | if [[ ${REMOTE_HOST} == "" ]]; then
60 |     echo "Missing mandatory argument -h <host>"
61 |     exit 1
62 | fi
63 | 
64 | set -o xtrace  # print commands
65 | set -e  # exit immediately on any error
66 | 
67 | echo "Starting experiment"
68 | ssh -t -L ${TB_PORT}:localhost:${TB_PORT} ${REMOTE_HOST} \
69 |      "export MUJOCO_KEY='${MUJOCO_KEY}' && \
70 |       git clone ${GIT_REPO} ${REMOTE_WORK_DIR}/${NAME} || (cd ${REMOTE_WORK_DIR}/${NAME} && git fetch) && \
71 |       ${REMOTE_WORK_DIR}/${NAME}/experiments/build_and_run.sh \
72 |           --no-copy -w ${REMOTE_WORK_DIR} -n ${NAME} -l ${TB_PORT} -c \"${CMD}\" ${EXTRA_ARGS}"
73 | 
74 | echo "Experiment completed, copying data"
75 | rsync -rlptv --exclude=sacred ${REMOTE_HOST}:${REMOTE_WORK_DIR}/${NAME}/data/ ${LOCAL_DATA}/
76 | rsync -rlptv ${REMOTE_HOST}:${REMOTE_WORK_DIR}/${NAME}/data/sacred/ ${LOCAL_DATA}/sacred/${REMOTE_HOST}
77 | 


--------------------------------------------------------------------------------
/experiments/run_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 4 | . ${DIR}/common.sh
 5 | 
 6 | CMD="bash"
 7 | NAME="adversarial-policies"
 8 | TAG="latest"
 9 | RM="--rm"
10 | FLAGS=""
11 | 
12 | while [[ $# -gt 0 ]]
13 | do
14 | key="$1"
15 | 
16 | case $key in
17 |     -c|--cmd)
18 |     CMD="$2"
19 |     shift
20 |     shift
21 |     ;;
22 |     -l|--listen)
23 |     FLAGS="${FLAGS} -p $2"
24 |     shift
25 |     shift
26 |     ;;
27 |     -n|--name)
28 |     NAME="$2"
29 |     shift
30 |     shift
31 |     ;;
32 |     -p|--persist)
33 |     RM=""
34 |     shift
35 |     ;;
36 |     -t|--tag)
37 |     TAG="$2"
38 |     shift
39 |     shift
40 |     ;;
41 |     *)
42 |     echo "Unrecognized option '${key}'"
43 |     exit 1
44 | esac
45 | done
46 | 
47 | if [[ ${MUJOCO_KEY} == "" ]]; then
48 |     echo "Set MUJOCO_KEY file to a URL with your key"
49 |     exit 1
50 | fi
51 | 
52 | docker run \
53 |        ${FLAGS} \
54 |        ${RM} \
55 |        -it \
56 |        --env MUJOCO_KEY=${MUJOCO_KEY} \
57 |        --name ${NAME} \
58 |        --mount type=bind,source="$(pwd)"/data,target=/adversarial-policies/data \
59 |        ${DOCKER_REPO}:${TAG} \
60 |        bash -c ". ci/prepare_env.sh && ${CMD}"
61 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 100
3 | target-version = ["py37"]
4 | 


--------------------------------------------------------------------------------
/requirements-build.txt:
--------------------------------------------------------------------------------
1 | # Baselines unhelpfully does not list TensorFlow as a requirement,
2 | # but setup will break if it isn't installed. So force installing it first.
3 | tensorflow>=1.13.0,<1.14.0
4 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | black
 2 | codecov
 3 | codespell
 4 | flake8
 5 | flake8-blind-except
 6 | flake8-builtins
 7 | flake8-debugger
 8 | flake8-isort
 9 | isort~=4.0
10 | pytype
11 | pytest
12 | pytest-cov
13 | pytest-shard
14 | pytest-xdist
15 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.15
 2 | pandas>=0.24.1
 3 | scikit-learn>=0.20.3
 4 | Pillow>=6.0.0
 5 | matplotlib>=3.0.3
 6 | Theano>=1.0
 7 | sacred>=0.8.1
 8 | pymongo>=3.8.0
 9 | GitPython>=2.1
10 | baselines @ git+https://github.com/HumanCompatibleAI/baselines.git@f70377
11 | stable-baselines @ git+https://github.com/hill-a/stable-baselines.git@6fbc9a9
12 | ray[debug,tune]>=1.0.0
13 | boto3>=1.9
14 | awscli>=1.16
15 | statsmodels>=0.9.0
16 | seaborn>=0.9.0
17 | ilqr @ git+https://github.com/anassinator/ilqr.git
18 | gym[mujoco]==0.15.4
19 | mujoco-py-131 @ git+https://github.com/AdamGleave/mujoco-py.git@mj131
20 | gym_compete @ git+https://github.com/HumanCompatibleAI/multiagent-competition.git@3a3f9dc
21 | 


--------------------------------------------------------------------------------
/scripts/aws/cloudwatch.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | INSTANCE_ID=`ec2metadata --instance-id | cut -d' ' -f 2`
 4 | EC2_REGION=`ec2metadata --availability-zone | sed 's/[a-z]$//' | cut -d' ' -f 2`
 5 | CLUSTER_NAME=`aws ec2 describe-tags --filters "Name=resource-id,Values=${INSTANCE_ID}" "Name=key,Values=ray-cluster-name" --region=${EC2_REGION} | grep Value | cut -f2 -d':' | cut -f2 -d'"'`
 6 | ALARM_NAME="${CLUSTER_NAME}-idle"
 7 | 
 8 | aws cloudwatch delete-alarms --region ${EC2_REGION} --alarm-name ${ALARM_NAME}
 9 | aws cloudwatch put-metric-alarm --region ${EC2_REGION} --alarm-name ${ALARM_NAME} \
10 |     --namespace AWS/EC2 --metric-name CPUUtilization \
11 |     --threshold 20 --comparison-operator LessThanThreshold \
12 |     --statistic Average --period 3600 \
13 |     --datapoints-to-alarm 12 --evaluation-periods 24 \
14 |     --treat-missing-data notBreaching \
15 |     --alarm-actions arn:aws:sns:us-west-2:286342508718:default \
16 |     --dimensions "Name=InstanceId,Value=${INSTANCE_ID}"
17 | 


--------------------------------------------------------------------------------
/scripts/aws/termination.py:
--------------------------------------------------------------------------------
 1 | import urllib
 2 | from urllib.request import urlopen
 3 | import logging
 4 | import subprocess
 5 | import time
 6 | 
 7 | TERMINATION_URL = 'http://169.254.169.254/latest/meta-data/spot/termination-time'
 8 | POLL_INTERVAL = 5
 9 | 
10 | def run():
11 |     logging.info('Starting.')
12 |     not_terminated = True
13 |     while not_terminated:
14 |         try:
15 |             time.sleep(POLL_INTERVAL)
16 |             req = urlopen(TERMINATION_URL)
17 |             not_terminated = False
18 |         except urllib.error.HTTPError as e:
19 |             if e.getcode() != 404:
20 |                 logging.error('Unexpected response code ', e)
21 |         except urllib.error.URLError as e:
22 |             logging.error('Unexpected error ', e)
23 |     logging.info('Received termination notice!')
24 |     logging.info('Scheduled to terminate at ', req.read())
25 |     logging.info('Shutting down Ray cleanly.')
26 |     subprocess.check_call(['ray', 'stop'])
27 | 
28 | if __name__ == '__main__':
29 |     logging.basicConfig(level=logging.INFO)
30 |     run()
31 | 


--------------------------------------------------------------------------------
/scripts/doubleblind.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 4 | ROOT_DIR="$( dirname "${SCRIPT_DIR}" )"
 5 | 
 6 | OPTIONS="-v -z -r -lpt"
 7 | EXCLUDES="LICENSE README.md setup.py scripts/doubleblind.sh ci/local_tests.sh .travis.yml experiments/common.sh experiments/planning 
 8 |           src/aprl/configs/ray/ .git supplementary.zip *.pkl requirements*.txt"
 9 | 
10 | # Refuse to compile if we find any of these words in non-excluded sources
11 | BLACKLISTED="Adam Gleave Michael Dennis Cody Neel Kant Sergey Levine Stuart Russell berkeley humancompatibleai humancompatible"
12 | 
13 | TMPDIR=`mktemp --tmpdir -d doubleblinded.XXXXXXXX`
14 | 
15 | SYNC_CMD="rsync ${OPTIONS} --exclude-from=.gitignore"
16 | for exclude in ${EXCLUDES}; do
17 |   SYNC_CMD="${SYNC_CMD} --exclude=${exclude}"
18 | done
19 | 
20 | ${SYNC_CMD} ${ROOT_DIR} ${TMPDIR}
21 | pushd ${TMPDIR}
22 | 
23 | GREP_TERMS=""
24 | for pattern in ${BLACKLISTED}; do
25 |   GREP_TERMS="${GREP_TERMS} -e ${pattern}"
26 | done
27 | grep -r . -i -F ${GREP_TERMS}
28 | if [[ $? -ne 1 ]]; then
29 |   echo "Found blacklisted word. Dieing."
30 |   exit 1
31 | fi
32 | 
33 | cp $HOME/dev/adversarial-policies-paper/supplementary.pdf .
34 | 
35 | rm ${ROOT_DIR}/supplementary.zip
36 | zip -r ${ROOT_DIR}/supplementary.zip .
37 | popd
38 | 


--------------------------------------------------------------------------------
/scripts/grab_frame.py:
--------------------------------------------------------------------------------
 1 | """Extract a frame from the initial state of an environment for illustration purposes.
 2 | 
 3 | Lets user interactively move the camera, then takes a screenshot when ready."""
 4 | 
 5 | import argparse
 6 | import select
 7 | import sys
 8 | import time
 9 | 
10 | import imageio
11 | import mujoco_py
12 | import numpy as np
13 | 
14 | from aprl.envs.wrappers import make_env
15 | from aprl.visualize.annotated_gym_compete import AnnotatedGymCompete
16 | 
17 | 
18 | def get_img(env_name, seed):
19 |     env = make_env(env_name, int(seed), 0, None)
20 |     env = AnnotatedGymCompete(env, env_name, 'zoo', '1', 'zoo', '1', None,
21 |                               resolution=(640, 480), font='times', font_size=24,
22 |                               draw=False)
23 |     env.reset()
24 | 
25 |     env_scene = env.unwrapped.env_scene
26 |     env_scene.viewer = mujoco_py.MjViewer(init_width=1000, init_height=750)
27 |     env_scene.viewer.start()
28 |     env_scene.viewer.set_model(env_scene.model)
29 |     env_scene.viewer_setup()
30 | 
31 |     print("Type save to save the image, step to take one timestep.")
32 | 
33 |     running = True
34 |     while running:
35 |         img = None
36 |         while sys.stdin not in select.select([sys.stdin], [], [], 0)[0]:
37 |             env.render()
38 |             img = env.render(mode='rgb_array')
39 | 
40 |         input = sys.stdin.readline().strip()
41 |         if input == 'save':
42 |             running = False
43 |         elif input == 'step':
44 |             action = tuple(np.zeros(space.shape) for space in env.action_space.spaces)
45 |             env.step(action)
46 |         else:
47 |             print(f"Unrecognized command '{input}'")
48 | 
49 |     return img
50 | 
51 | 
52 | def main():
53 |     parser = argparse.ArgumentParser()
54 |     parser.add_argument('--env', type=str, help="environment name")
55 |     parser.add_argument('--seed', type=int, default=time.time())
56 |     parser.add_argument('--out', type=str, help="path to save figure")
57 |     args = parser.parse_args()
58 | 
59 |     img = get_img(args.env, args.seed)
60 |     imageio.imwrite(args.out, img)
61 | 
62 | if __name__ == '__main__':
63 |     main()
64 | 


--------------------------------------------------------------------------------
/scripts/incomplete_experiments.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import os
 4 | 
 5 | import pandas as pd
 6 | 
 7 | 
 8 | logger = logging.getLogger('scripts.incomplete_experiments')
 9 | 
10 | 
11 | def directory_type(path):
12 |     if not os.path.isdir(path):
13 |         raise ValueError(f"'{path}' does not exist")
14 |     return path
15 | 
16 | 
17 | def get_args():
18 |     parser = argparse.ArgumentParser()
19 |     parser.add_argument('data_dir', type=directory_type)
20 |     return parser.parse_args()
21 | 
22 | 
23 | def get_stats(data_dir):
24 |     started = {}
25 |     completed = {}
26 |     data_dir = os.path.abspath(data_dir)
27 |     for root, dirs, files in os.walk(data_dir, followlinks=True):
28 |         # checkpoint directories are irrelevant and will slow down search
29 |         logger.debug(f"Searching '{root}'")
30 |         dirs[:] = list(filter(lambda x: x not in ['checkpoint', 'mon', 'tb'], dirs))
31 |         components = root.split(os.path.sep)
32 | 
33 |         if 'final_model' in dirs:
34 |             # root is of format .../exp_name/timestamp/run_id/data/baselines/run_id
35 |             assert components[-2] == 'baselines'
36 |             logger.debug(f"Found final_model in '{root}'")
37 |             exp_name = os.path.relpath(os.path.join('/', *components[:-4]), data_dir)
38 |             completed[exp_name] = completed.get(exp_name, 0) + 1
39 |             dirs[:] = []  # no need to search further in data/baselines/*
40 |         elif 'sacred' in dirs:
41 |             # root is of format ../exp_name/timestamp/run_id/data/sacred
42 |             assert components[-1] == 'data'
43 |             logger.debug(f"Found sacred at '{root}'")
44 |             exp_name = os.path.relpath(os.path.join('/', *components[:-2]), data_dir)
45 |             started[exp_name] = started.get(exp_name, 0) + 1
46 |             dirs.remove('sacred')  # don't need to search inside it
47 | 
48 |     return started, completed
49 | 
50 | 
51 | def compute_incompletes(started, completed):
52 |     incomplete = {k: num_started - completed.get(k, 0) for k, num_started in started.items()}
53 |     percent_incomplete = {k: num_incomplete / started[k]
54 |                           for k, num_incomplete in incomplete.items()}
55 |     percent_incomplete = pd.Series(percent_incomplete)
56 |     percent_incomplete = percent_incomplete.sort_values(ascending=False)
57 |     percent_incomplete.index.name = 'path'
58 |     percent_incomplete.name = 'percent_incomplete'
59 |     return percent_incomplete
60 | 
61 | 
62 | def main():
63 |     logging.basicConfig(level=logging.INFO)
64 |     args = get_args()
65 |     started, completed = get_stats(args.data_dir)
66 |     percent_incomplete = compute_incompletes(started, completed)
67 |     print(percent_incomplete.to_csv(header=True))
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     main()
72 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [coverage:report]
 2 | exclude_lines =
 3 |     pragma: no cover
 4 | omit =
 5 |     setup.py
 6 | 
 7 | [coverage:run]
 8 | include=
 9 |     src/*
10 |     tests/*
11 | 
12 | [coverage:paths]
13 | source =
14 |     src/aprl
15 |     *venv/lib/python*/site-packages/aprl
16 | 
17 | [flake8]
18 | max-line-length=100
19 | ignore = W503,E203
20 | 
21 | [isort]
22 | known_first_party=aprl
23 | known_third_party=baselines,gym_compete,ray,stable_baselines
24 | force_sort_within_sections=True
25 | force_grid_wrap=0
26 | include_trailing_comma=True
27 | line_length=100
28 | multi_line_output=3
29 | use_parentheses=True
30 | 
31 | [tool:pytest]
32 | filterwarnings =
33 |     ignore:the imp module is deprecated in favour of importlib:DeprecationWarning:distutils
34 |     ignore:Using or importing the ABCs from 'collections':DeprecationWarning:(google|pkg_resources|tensorflow|theano)
35 |     ignore:inspect.getargspec:DeprecationWarning:tensorflow
36 |     ignore:Passing.* as a synonym of type is deprecated:FutureWarning:(tensorflow|tensorboard)
37 |     ignore:inspect.getargspec:DeprecationWarning:ray
38 |     ignore:Importing from numpy.testing:DeprecationWarning:theano
39 |     ignore:Parameters to load are deprecated:Warning:gym
40 |     ignore:The binary mode of fromstring is deprecated:DeprecationWarning:gym
41 |     ignore:.*TF Lite has moved from tf.contrib.lite to tf.lite:PendingDeprecationWarning
42 |     ignore:It appears you are loading from a file with old format. Older cloudpickle format has been replaced with zip-archived models. Consider saving the model with new format.:DeprecationWarning:stable_baselines
43 |     ignore:Loading model parameters from a list. This has been replaced with parameter dictionaries with variable names and parameters. If you are loading from a file, consider re-saving the file.:DeprecationWarning:stable_baselines
44 |     ignore:Usage of `load_running_average` is deprecated. Please use `load` or pickle instead.:DeprecationWarning:stable_baselines
45 | 
46 | [pytype]
47 | inputs = aprl
48 | python_version = 3.7
49 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from setuptools import find_packages, setup
 4 | 
 5 | setup(
 6 |     name="aprl",
 7 |     version="0.2",
 8 |     description="Adversarial Policies for Reinforcement Learning",
 9 |     author="Adam Gleave, Michael Dennis, et al",
10 |     author_email="adam@gleave.me",
11 |     python_requires=">=3.7.0",
12 |     url="https://github.com/HumanCompatibleAI/adversarial-policies",
13 |     packages=find_packages("src"),
14 |     package_dir={"": "src"},
15 |     package_data={"aprl": ["configs/multi/*.json", "configs/noise/*.json", "configs/rew/*.json"]},
16 |     # We have some non-pip packages as requirements,
17 |     # see requirements-build.txt and requirements.txt.
18 |     install_requires=[],
19 |     include_package_data=True,
20 |     license="MIT",
21 |     classifiers=[
22 |         # Trove classifiers
23 |         # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
24 |         "License :: OSI Approved :: MIT License",
25 |         "Programming Language :: Python",
26 |         "Programming Language :: Python :: 3",
27 |         "Programming Language :: Python :: 3.7",
28 |         "Programming Language :: Python :: Implementation :: CPython",
29 |         "Programming Language :: Python :: Implementation :: PyPy",
30 |     ],
31 | )
32 | 


--------------------------------------------------------------------------------
/src/aprl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/__init__.py


--------------------------------------------------------------------------------
/src/aprl/activations/__init__.py:
--------------------------------------------------------------------------------
1 | """Generating and analysing activations of victim policy network."""
2 | 


--------------------------------------------------------------------------------
/src/aprl/activations/density/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/activations/density/__init__.py


--------------------------------------------------------------------------------
/src/aprl/activations/density/pipeline.py:
--------------------------------------------------------------------------------
 1 | """Records activations from victim's policy network and then fits a density model."""
 2 | 
 3 | import logging
 4 | import os
 5 | import os.path as osp
 6 | 
 7 | import sacred
 8 | from sacred.observers import FileStorageObserver
 9 | 
10 | from aprl.activations import generate_activations
11 | from aprl.activations.density.fit_density import fit_model, fit_model_ex
12 | from aprl.common import utils
13 | 
14 | density_ex = sacred.Experiment(
15 |     "density", ingredients=[generate_activations.generate_activations_ex, fit_model_ex]
16 | )
17 | logger = logging.getLogger("aprl.density.pipeline")
18 | 
19 | 
20 | @density_ex.config
21 | def main_config(generate_activations, fit_density_model):
22 |     generate_activations = dict(generate_activations)
23 |     generate_activations["score_update"] = {"score": {"timesteps": 40000}}
24 | 
25 |     output_root = osp.join("data", "density")  # where to produce output
26 |     _ = locals()  # quieten flake8 unused variable warning
27 |     del _
28 | 
29 | 
30 | @density_ex.named_config
31 | def debug_config(generate_activations, fit_density_model):
32 |     # Is this the name of an ingredient? Is it being auto-added to config somehow?
33 |     output_root = "/tmp/density-debug"
34 |     generate_activations = dict(generate_activations)
35 |     fit_density_model = dict(fit_density_model)
36 | 
37 |     generate_activations["score_configs"] = [("debug_two_agents",)]
38 |     generate_activations["score_update"] = {"score": {"timesteps": 100}}
39 |     fit_density_model["max_timesteps"] = 100
40 |     fit_density_model["model_kwargs"] = {"n_components": 2}
41 | 
42 |     _ = locals()  # quieten flake8 unused variable warning
43 |     del _
44 | 
45 | 
46 | @density_ex.main
47 | def pipeline(_run, output_root, fit_density_model):
48 |     out_dir = osp.join(output_root, utils.make_timestamp())
49 |     os.makedirs(out_dir)
50 | 
51 |     activation_glob = fit_density_model["activation_glob"]
52 |     if activation_glob is None:
53 |         activation_dir = osp.join(out_dir, "activations")
54 |         generate_activations.generate_activations(out_dir=activation_dir)
55 |         activation_glob = osp.join(activation_dir, "*")
56 | 
57 |     # This is unsuitable for hyperparameter sweeps, as can only run one model fitting step.
58 |     # See experiments/modelfree/density.sh for a bash script hyperparameter sweep, that
59 |     # re-uses activations.
60 |     # SOMEDAY: Add support for running multiple fitting configs?
61 |     # (Does not neatly fit into Sacred model.)
62 |     model_dir = osp.join(out_dir, "fitted")
63 |     fit_model(activation_glob=activation_glob, output_root=model_dir)
64 | 
65 |     return out_dir
66 | 
67 | 
68 | def main():
69 |     observer = FileStorageObserver(osp.join("data", "sacred", "density"))
70 |     density_ex.observers.append(observer)
71 |     density_ex.run_commandline()
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     main()
76 | 


--------------------------------------------------------------------------------
/src/aprl/activations/generate_activations.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | import os.path as osp
  5 | 
  6 | import sacred
  7 | from sacred.observers import FileStorageObserver
  8 | 
  9 | from aprl.common import utils
 10 | from aprl.multi.score import extract_data, run_external
 11 | 
 12 | generate_activations_ex = sacred.Experiment("generate_activations")
 13 | logger = logging.getLogger("aprl.activations.generate_activations")
 14 | 
 15 | 
 16 | @generate_activations_ex.config
 17 | def activation_storing_config():
 18 |     adversary_path = osp.join(
 19 |         "data",
 20 |         "aws",
 21 |         "score_agents",
 22 |         "normal",
 23 |         "2019-05-05T18:12:24+00:00",
 24 |         "best_adversaries.json",
 25 |     )
 26 |     ray_upload_dir = "data"  # where Ray will upload multi.score outputs. 'data' works on local
 27 |     out_dir = None
 28 | 
 29 |     # Configs for the multi-score experiments
 30 |     score_configs = [(x,) for x in ["zoo_baseline", "random_baseline", "adversary_trained"]]
 31 |     score_update = {}
 32 | 
 33 |     _ = locals()  # quieten flake8 unused variable warning
 34 |     del _
 35 | 
 36 | 
 37 | def _activations_path_generator(
 38 |     trial_root,
 39 |     cfg,
 40 |     env_sanitized,
 41 |     victim_index,
 42 |     victim_type,
 43 |     victim_path,
 44 |     opponent_type,
 45 |     opponent_path,
 46 | ):
 47 |     del cfg
 48 |     src_path = osp.join(trial_root, "data", "trajectories", f"agent_{victim_index}.npz")
 49 | 
 50 |     if opponent_path.startswith("/"):  # is path name
 51 |         opponent_root = osp.sep.join(opponent_path.split(osp.sep)[:-3])
 52 |         opponent_sacred = osp.join(opponent_root, "sacred", "train", "1", "config.json")
 53 | 
 54 |         with open(opponent_sacred, "r") as f:
 55 |             opponent_cfg = json.load(f)
 56 | 
 57 |         if "embed_path" in opponent_cfg:
 58 |             opponent_path = opponent_cfg["embed_path"]
 59 |         elif "victim_path" in opponent_cfg:
 60 |             # TODO(adam): remove backwards compatibility when all policies retrained
 61 |             opponent_path = opponent_cfg["victim_path"]
 62 |         else:
 63 |             raise KeyError("'embed_path' and 'victim_path' not present in 'opponent_cfg'")
 64 | 
 65 |     new_name = (
 66 |         f"{env_sanitized}_victim_{victim_type}_{victim_path}"
 67 |         f"_opponent_{opponent_type}_{opponent_path}"
 68 |     )
 69 |     return src_path, new_name, "npz"
 70 | 
 71 | 
 72 | @generate_activations_ex.main
 73 | def generate_activations(
 74 |     _run, out_dir, score_configs, score_update, adversary_path, ray_upload_dir
 75 | ):
 76 |     """Uses multi.score to generate activations, then extracts them into a convenient
 77 |     directory structure."""
 78 |     logger.info("Generating activations")
 79 |     activation_dirs = run_external(
 80 |         score_configs,
 81 |         post_named_configs=["save_activations"],
 82 |         config_updates=score_update,
 83 |         adversary_path=adversary_path,
 84 |     )
 85 | 
 86 |     os.makedirs(out_dir)
 87 |     extract_data(_activations_path_generator, out_dir, activation_dirs, ray_upload_dir)
 88 |     logger.info("Activations saved")
 89 | 
 90 |     utils.add_artifacts(_run, out_dir)
 91 | 
 92 | 
 93 | def main():
 94 |     observer = FileStorageObserver(osp.join("data", "sacred", "generate_activations"))
 95 |     generate_activations_ex.observers.append(observer)
 96 |     generate_activations_ex.run_commandline()
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     main()
101 | 


--------------------------------------------------------------------------------
/src/aprl/activations/tsne/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/activations/tsne/__init__.py


--------------------------------------------------------------------------------
/src/aprl/activations/tsne/fit_model.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import os.path as osp
  4 | import pickle
  5 | import re
  6 | import tempfile
  7 | from typing import Any, Dict
  8 | 
  9 | import numpy as np
 10 | import pandas as pd
 11 | import ray
 12 | import sacred
 13 | from sacred.observers import FileStorageObserver
 14 | from sklearn.manifold import TSNE
 15 | 
 16 | from aprl.common import utils
 17 | 
 18 | fit_model_ex = sacred.Experiment("tsne_fit_model")
 19 | logger = logging.getLogger("aprl.activations.tsne.fit_model")
 20 | 
 21 | 
 22 | @fit_model_ex.config
 23 | def base_config():
 24 |     ray_server = None  # by default will launch a server
 25 |     init_kwargs = {}  # passed to ray.init()
 26 |     activation_dir = None
 27 |     output_root = None
 28 |     data_type = "ff_policy"
 29 |     num_components = 2
 30 |     num_observations = None
 31 |     seed = 0
 32 |     perplexity = 250
 33 |     _ = locals()  # quieten flake8 unused variable warning
 34 |     del _
 35 | 
 36 | 
 37 | @fit_model_ex.named_config
 38 | def debug_config():
 39 |     num_observations = 1000
 40 |     _ = locals()  # quieten flake8 unused variable warning
 41 |     del _
 42 | 
 43 | 
 44 | def _load_and_reshape_single_file(np_path, opponent_type, data_type):
 45 |     traj_data = np.load(np_path, allow_pickle=True)
 46 |     episode_list = traj_data[data_type].tolist()
 47 |     episode_lengths = [len(episode) for episode in episode_list]
 48 |     episode_id = []
 49 |     observation_index = []
 50 |     relative_observation_index = []
 51 |     for i, episode_length in enumerate(episode_lengths):
 52 |         episode_id += [i] * episode_length
 53 |         episode_observation_ids = list(range(episode_length))
 54 |         observation_index += episode_observation_ids
 55 |         relative_observation_index += [el / episode_length for el in episode_observation_ids]
 56 | 
 57 |     concatenated_data = np.concatenate(episode_list)
 58 |     opponent_type = [opponent_type] * len(concatenated_data)
 59 | 
 60 |     metadata_df = pd.DataFrame(
 61 |         {
 62 |             "episode_id": episode_id,
 63 |             "observation_index": observation_index,
 64 |             "relative_observation_index": relative_observation_index,
 65 |             "opponent_id": opponent_type,
 66 |         }
 67 |     )
 68 |     return concatenated_data, metadata_df
 69 | 
 70 | 
 71 | @ray.remote
 72 | def fit_tsne_helper(
 73 |     activation_paths, output_dir, num_components, num_observations, perplexity, data_type
 74 | ):
 75 |     logger.info(f"Starting T-SNE fitting, saving to {output_dir}")
 76 | 
 77 |     all_file_data = []
 78 |     all_metadata = []
 79 |     for opponent_type, path in activation_paths.items():
 80 |         logger.debug(f"Loaded data for {opponent_type} from {path}")
 81 |         file_data, metadata = _load_and_reshape_single_file(path, opponent_type, data_type)
 82 |         all_file_data.append(file_data)
 83 |         all_metadata.append(metadata)
 84 | 
 85 |     merged_file_data = np.concatenate(all_file_data)
 86 |     merged_metadata = pd.concat(all_metadata)
 87 | 
 88 |     # Optionally, sub-sample
 89 |     if num_observations is None:
 90 |         num_observations = len(merged_metadata)
 91 |     sub_data = merged_file_data[0:num_observations].reshape(num_observations, 128)
 92 | 
 93 |     # Save metadata
 94 |     metadata_path = os.path.join(output_dir, "metadata.csv")
 95 |     merged_metadata[0:num_observations].to_csv(metadata_path)
 96 | 
 97 |     # Fit t-SNE
 98 |     tsne_obj = TSNE(n_components=num_components, verbose=1, perplexity=perplexity)
 99 |     tsne_ids = tsne_obj.fit_transform(sub_data)
100 | 
101 |     # Save weights
102 |     tsne_weights_path = os.path.join(output_dir, "tsne_weights.pkl")
103 |     with open(tsne_weights_path, "wb") as fp:
104 |         pickle.dump(tsne_obj, fp)
105 | 
106 |     # Save cluster IDs
107 |     cluster_ids_path = os.path.join(output_dir, "cluster_ids.npy")
108 |     np.save(cluster_ids_path, tsne_ids)
109 | 
110 |     logger.info(f"Completed T-SNE fitting, saved to {output_dir}")
111 | 
112 | 
113 | @fit_model_ex.main
114 | def fit_model(
115 |     _run,
116 |     ray_server: str,
117 |     init_kwargs: Dict[str, Any],
118 |     activation_dir: str,
119 |     output_root: str,
120 |     num_components: int,
121 |     num_observations: int,
122 |     perplexity: int,
123 |     data_type,
124 | ):
125 |     try:
126 |         ray.init(address=ray_server, **init_kwargs)
127 | 
128 |         # Find activation paths for each environment & victim-path tuple
129 |         stem_pattern = re.compile(r"(.*)_opponent_.*\.npz")
130 |         opponent_pattern = re.compile(r".*_opponent_([^\s]+)_[^\s]+\.npz")
131 |         activation_paths = {}
132 |         for fname in os.listdir(activation_dir):
133 |             stem_match = stem_pattern.match(fname)
134 |             if stem_match is None:
135 |                 logger.debug(f"Skipping {fname}")
136 |                 continue
137 |             stem = stem_match.groups()[0]
138 | 
139 |             opponent_match = opponent_pattern.match(fname)
140 |             opponent_type = opponent_match.groups()[0]
141 | 
142 |             path = osp.join(activation_dir, fname)
143 |             activation_paths.setdefault(stem, {})[opponent_type] = path
144 | 
145 |         # Create temporary output directory (if needed)
146 |         tmp_dir = None
147 |         if output_root is None:
148 |             tmp_dir = tempfile.TemporaryDirectory()
149 |             output_root = tmp_dir.name
150 | 
151 |         # Fit t-SNE and save model weights
152 |         results = []
153 |         for stem, paths in activation_paths.items():
154 |             output_dir = osp.join(output_root, stem)
155 |             os.makedirs(output_dir)
156 |             future = fit_tsne_helper.remote(
157 |                 paths, output_dir, num_components, num_observations, perplexity, data_type
158 |             )
159 |             results.append(future)
160 | 
161 |         ray.get(results)  # block until all jobs have finished
162 |         utils.add_artifacts(_run, output_root, ingredient=fit_model_ex)
163 |     finally:
164 |         # Clean up temporary directory (if needed)
165 |         if tmp_dir is not None:
166 |             tmp_dir.cleanup()
167 |         ray.shutdown()
168 | 
169 | 
170 | def main():
171 |     observer = FileStorageObserver(osp.join("data", "sacred", "tsne_fit"))
172 |     fit_model_ex.observers.append(observer)
173 |     fit_model_ex.run_commandline()
174 | 
175 | 
176 | if __name__ == "__main__":
177 |     main()
178 | 


--------------------------------------------------------------------------------
/src/aprl/activations/tsne/pipeline.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import os.path as osp
 4 | 
 5 | import sacred
 6 | from sacred.observers import FileStorageObserver
 7 | 
 8 | from aprl.activations import generate_activations
 9 | from aprl.activations.tsne import visualize
10 | from aprl.activations.tsne.fit_model import fit_model, fit_model_ex
11 | from aprl.common import utils
12 | 
13 | tsne_ex = sacred.Experiment(
14 |     "tsne",
15 |     ingredients=[
16 |         generate_activations.generate_activations_ex,
17 |         fit_model_ex,
18 |         visualize.visualize_ex,
19 |     ],
20 | )
21 | logger = logging.getLogger("aprl.activations.tsne.pipeline")
22 | 
23 | 
24 | @tsne_ex.config
25 | def activation_storing_config():
26 |     output_root = "data/tsne"  # where to produce output
27 |     exp_name = "default"  # experiment name
28 | 
29 |     _ = locals()  # quieten flake8 unused variable warning
30 |     del _
31 | 
32 | 
33 | @tsne_ex.named_config
34 | def debug_config(generate_activations, tsne_visualize):
35 |     generate_activations = dict(generate_activations)
36 |     generate_activations["score_configs"] = [("debug_two_agents",)]
37 |     generate_activations["score_update"] = {"score": {"timesteps": 100}}
38 | 
39 |     tsne_visualize = dict(tsne_visualize)
40 |     tsne_visualize["ordering"] = ["Zoo", "Rand"]
41 | 
42 |     exp_name = "debug"
43 | 
44 |     _ = locals()  # quieten flake8 unused variable warning
45 |     del _
46 | 
47 | 
48 | @tsne_ex.main
49 | def pipeline(_run, output_root, exp_name):
50 |     out_dir = osp.join(output_root, exp_name, utils.make_timestamp())
51 |     os.makedirs(out_dir)
52 | 
53 |     activation_dst_dir = osp.join(out_dir, "activations")
54 |     generate_activations.generate_activations(out_dir=activation_dst_dir)
55 | 
56 |     model_dir = osp.join(out_dir, "fitted")
57 |     fit_model(activation_dir=activation_dst_dir, output_root=model_dir)
58 | 
59 |     figure_dst_dir = osp.join(out_dir, "figures")
60 |     visualize.visualize(model_glob=osp.join(model_dir, "*"), output_root=figure_dst_dir)
61 | 
62 |     return out_dir
63 | 
64 | 
65 | def main():
66 |     observer = FileStorageObserver(osp.join("data", "sacred", "tsne"))
67 |     tsne_ex.observers.append(observer)
68 |     tsne_ex.run_commandline()
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     main()
73 | 


--------------------------------------------------------------------------------
/src/aprl/agents/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa: F401
2 | 
3 | from aprl.agents.monte_carlo import MonteCarloParallel, MonteCarloSingle, MujocoResettableWrapper
4 | from aprl.agents.mujoco_lqr import (
5 |     MujocoFiniteDiffCost,
6 |     MujocoFiniteDiffDynamicsBasic,
7 |     MujocoFiniteDiffDynamicsPerformance,
8 | )
9 | 


--------------------------------------------------------------------------------
/src/aprl/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/common/__init__.py


--------------------------------------------------------------------------------
/src/aprl/common/mujoco.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from collections import namedtuple
 3 | 
 4 | import gym
 5 | import numpy as np
 6 | 
 7 | 
 8 | # TODO: Cythonize
 9 | class MujocoState(namedtuple("MujocoStateBase", "qpos qvel")):
10 |     """Represents state from the MuJoCo simulator needed for planning,
11 |     namely position and velocity."""
12 | 
13 |     @staticmethod
14 |     def from_mjdata(data):
15 |         return MujocoState(data.qpos, data.qvel)
16 | 
17 |     @staticmethod
18 |     def from_flattened(flattened, sim):
19 |         qpos = flattened[0 : sim.model.nq]
20 |         qvel = flattened[sim.model.nq : sim.model.nq + sim.model.nv]
21 |         return MujocoState(qpos, qvel)
22 | 
23 |     def set_mjdata(self, data):
24 |         try:
25 |             data.qpos[:] = self.qpos
26 |             data.qvel[:] = self.qvel
27 |         except ValueError:  # older mujoco version
28 |             data.qpos = self.qpos
29 |             data.qvel = self.qvel
30 | 
31 |     def flatten(self):
32 |         return np.concatenate((self.qpos, self.qvel))
33 | 
34 | 
35 | class ResettableEnv(gym.Env, abc.ABC):
36 |     """A Gym environment that can be reset to an arbitrary state."""
37 | 
38 |     @abc.abstractmethod
39 |     def get_state(self):
40 |         """Returns a serialized representation of the current state."""
41 |         pass
42 | 
43 |     @abc.abstractmethod
44 |     def set_state(self, x):
45 |         """Restores the environment to a previously saved state.
46 |         :param x: return value of a previous call to get_state()."""
47 |         pass
48 | 


--------------------------------------------------------------------------------
/src/aprl/common/multi_monitor.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import numpy as np
 4 | from stable_baselines.bench import Monitor
 5 | 
 6 | from aprl.common.utils import getattr_unwrapped
 7 | 
 8 | 
 9 | class MultiMonitor(Monitor):
10 |     def __init__(
11 |         self,
12 |         env,
13 |         filename,
14 |         our_idx=None,
15 |         allow_early_resets=False,
16 |         reset_keywords=(),
17 |         info_keywords=(),
18 |     ):
19 |         num_agents = getattr_unwrapped(env, "num_agents")
20 |         extra_rks = tuple("r{:d}".format(i) for i in range(num_agents))
21 |         super().__init__(
22 |             env,
23 |             filename,
24 |             allow_early_resets=allow_early_resets,
25 |             reset_keywords=reset_keywords,
26 |             info_keywords=extra_rks + info_keywords,
27 |         )
28 |         self.our_idx = our_idx
29 |         self.info_keywords = info_keywords
30 | 
31 |     def step(self, action):
32 |         """
33 |         Step the environment with the given action
34 | 
35 |         :param action: ([int] or [float]) the action
36 |         :return: ([int] or [float], [float], [bool], dict) observation, reward, done, information
37 |         """
38 |         if self.needs_reset:
39 |             raise RuntimeError("Tried to step environment that needs reset")
40 |         observation, reward, done, info = self.env.step(action)
41 |         self.rewards.append(reward)
42 |         if done:
43 |             self.needs_reset = True
44 |             eplen = len(self.rewards)
45 |             ep_rew = np.asarray(self.rewards).sum(axis=0).round(6)
46 |             our_rew = float("nan") if self.our_idx is None else ep_rew[self.our_idx]
47 |             ep_info = {"r": our_rew, "l": eplen, "t": round(time.time() - self.t_start, 6)}
48 |             for i, rew in enumerate(ep_rew):
49 |                 ep_info["r{:d}".format(i)] = rew
50 |             for key in self.info_keywords:
51 |                 ep_info[key] = info[key]
52 |             self.episode_rewards.append(ep_rew)
53 |             self.episode_lengths.append(eplen)
54 |             self.episode_times.append(time.time() - self.t_start)
55 |             ep_info.update(self.current_reset_info)
56 |             if self.logger:
57 |                 self.logger.writerow(ep_info)
58 |                 self.file_handler.flush()
59 |             info["episode"] = ep_info
60 |         self.total_steps += 1
61 |         return observation, reward, done, info
62 | 
63 |     def __getattr__(self, name):
64 |         return getattr(self.env, name)
65 | 


--------------------------------------------------------------------------------
/src/aprl/common/utils.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import datetime
 3 | import os
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | def make_session(graph=None):
 9 |     tf_config = tf.ConfigProto()
10 |     tf_config.gpu_options.allow_growth = True
11 |     sess = tf.Session(graph=graph, config=tf_config)
12 |     return sess
13 | 
14 | 
15 | def make_timestamp():
16 |     ISO_TIMESTAMP = "%Y%m%d_%H%M%S"
17 |     return datetime.datetime.now().strftime(ISO_TIMESTAMP)
18 | 
19 | 
20 | def add_artifacts(run, dirname, ingredient=None):
21 |     """Convenience function for Sacred to add artifacts inside directory dirname to current run.
22 | 
23 |     :param run: (sacred.Run) object representing current experiment. Can be captured as `_run`.
24 |     :param dirname: (str) root of directory to save.
25 |     :param ingredient: (sacred.Ingredient or None) optional, ingredient that generated the
26 |                        artifacts. Will be used to tag saved files. This is ignored if ingredient
27 |                        is equal to the currently running experiment.
28 |     :return None"""
29 |     prefix = ""
30 |     if ingredient is not None:
31 |         exp_name = run.experiment_info["name"]
32 |         ingredient_name = ingredient.path
33 |         if exp_name != ingredient_name:
34 |             prefix = ingredient_name + "_"
35 | 
36 |     for root, dirs, files in os.walk(dirname):
37 |         for file in files:
38 |             path = os.path.join(root, file)
39 |             relroot = os.path.relpath(path, dirname)
40 |             name = prefix + relroot.replace("/", "_") + "_" + file
41 |             run.add_artifact(path, name=name)
42 | 
43 | 
44 | # TODO(adam): delete this once Sacred issue #498 & #499 are resolved
45 | def sacred_copy(o):
46 |     """Perform a deep copy on nested dictionaries and lists.
47 | 
48 |     If `d` is an instance of dict or list, copies `d` to a dict or list
49 |     where the values are recursively copied using `sacred_copy`. Otherwise, `d`
50 |     is copied using `copy.deepcopy`. Note this intentionally loses subclasses.
51 |     This is useful if e.g. `d` is a Sacred read-only dict. However, it can be
52 |     undesirable if e.g. `d` is an OrderedDict.
53 | 
54 |     :param o: (object) if dict, copy recursively; otherwise, use `copy.deepcopy`.
55 |     :return A deep copy of d."""
56 |     if isinstance(o, dict):
57 |         return {k: sacred_copy(v) for k, v in o.items()}
58 |     elif isinstance(o, list):
59 |         return [sacred_copy(v) for v in o]
60 |     else:
61 |         return copy.deepcopy(o)
62 | 
63 | 
64 | def getattr_unwrapped(env, attr):
65 |     """Get attribute attr from env, or one of the nested environments.
66 |     Args:
67 |         - env(gym.Wrapper or gym.Env): a (possibly wrapped) environment.
68 |         - attr: name of the attribute
69 |     Returns:
70 |         env.attr, if present, otherwise env.unwrapped.attr and so on recursively.
71 |     """
72 |     try:
73 |         return getattr(env, attr)
74 |     except AttributeError:
75 |         if env.env == env:
76 |             raise
77 |         else:
78 |             return getattr_unwrapped(env.env, attr)
79 | 


--------------------------------------------------------------------------------
/src/aprl/configs/.gitignore:
--------------------------------------------------------------------------------
1 | *-localcfg*
2 | 


--------------------------------------------------------------------------------
/src/aprl/configs/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | DATA_LOCATION = os.path.abspath(os.environ.get("DATA_LOC", "data"))
4 | 


--------------------------------------------------------------------------------
/src/aprl/configs/multi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/configs/multi/__init__.py


--------------------------------------------------------------------------------
/src/aprl/configs/multi/common.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from aprl.envs import gym_compete
 5 | 
 6 | BANSAL_ENVS = ["multicomp/" + env for env in gym_compete.POLICY_STATEFUL.keys()]
 7 | BANSAL_ENVS += ["multicomp/SumoHumansAutoContact-v0", "multicomp/SumoAntsAutoContact-v0"]
 8 | BANSAL_GOOD_ENVS = [  # Environments well-suited to adversarial attacks
 9 |     "multicomp/KickAndDefend-v0",
10 |     "multicomp/SumoHumansAutoContact-v0",
11 |     "multicomp/SumoAntsAutoContact-v0",
12 |     "multicomp/YouShallNotPassHumans-v0",
13 | ]
14 | 
15 | 
16 | def get_adversary_paths():
17 |     """Load adversary paths from ADVERSARY_PATHS environment variable.
18 | 
19 |     We can't make this a Sacred config param since Sacred named_configs execute before configs.
20 |     """
21 |     path = os.getenv("ADVERSARY_PATHS")
22 |     if path is None:
23 |         raise ValueError(
24 |             "Specify path to JSON file containing adversaries in ADVERSARY_PATHS "
25 |             "environment variable. (Run 'experiments/modelfree/highest_win_rate.py'"
26 |             "to generate this.)"
27 |         )
28 |     with open(path, "r") as f:
29 |         return json.load(f)["policies"]
30 | 


--------------------------------------------------------------------------------
/src/aprl/configs/noise/SumoHumans-cond.json:
--------------------------------------------------------------------------------
1 | {
2 |   "metric": "sparse",
3 |   "min_wait": 3000,
4 |   "window_size": 1000,
5 |   "start_val": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/src/aprl/configs/noise/SumoHumans.json:
--------------------------------------------------------------------------------
1 | {
2 |   "anneal_frac": 0.5,
3 |   "param": 0.5
4 | }
5 | 


--------------------------------------------------------------------------------
/src/aprl/configs/noise/default.json:
--------------------------------------------------------------------------------
1 | {
2 |   "anneal_frac": 0.5,
3 |   "param": 0.5
4 | }
5 | 


--------------------------------------------------------------------------------
/src/aprl/configs/ray/aws.yaml:
--------------------------------------------------------------------------------
  1 | cluster_name: aprl
  2 | min_workers: 0
  3 | max_workers: 10
  4 | initial_workers: 0
  5 | target_utilization_fraction: 0.8
  6 | idle_timeout_minutes: 5
  7 | 
  8 | docker:
  9 |   image: humancompatibleai/adversarial_policies:latest
 10 |   container_name: ray
 11 |   run_options:
 12 |     # NOTE: MUJOCO_KEY must be set in environment (it is in our AMI)
 13 |     - "--env MUJOCO_KEY=${MUJOCO_KEY}"
 14 |     # Open port 6006 for TensorBoard
 15 |     - "-p 6006:6006"
 16 |     # These are to work around Ray bug #4403.
 17 |     - "-v /home/ubuntu/ray_results:/home/ubuntu/ray_results"
 18 |     - "-v /home/ubuntu/aws_private:/home/ubuntu/aws_private"
 19 |     - "--env LOGNAME=ubuntu"
 20 |     - "--env HOME=/home/ubuntu"
 21 |     - "--env DATA_LOC=/home/ubuntu/aws_private"
 22 |     - "--env ADVERSARY_PATHS=/home/ubuntu/aws_private/multi_train/paper/highest_win_policies_and_rates.json"
 23 | 
 24 | provider:
 25 |   type: aws
 26 |   region: us-west-2
 27 |   availability_zone: us-west-2a,us-west-2b,us-west-2c,us-west-2d
 28 |   cache_stopped_nodes: False  # TODO(adam): remove when Ray issue #6128 is closed
 29 | 
 30 | # How Ray will authenticate with newly launched nodes.
 31 | auth:
 32 |   ssh_user: ubuntu
 33 |   ssh_private_key: ~/.ssh/adversarial-policies
 34 | 
 35 | head_node:
 36 |   InstanceType: c5.4xlarge
 37 |   ImageId: ami-03cb2176bb0ac9ec7  # CHAI Ubuntu 18.04
 38 |   KeyName: Adversarial Policies
 39 |   BlockDeviceMappings:
 40 |     - DeviceName: /dev/sda1
 41 |       Ebs:
 42 |         VolumeSize: 50
 43 |         VolumeType: gp2
 44 | 
 45 | worker_nodes:
 46 |   InstanceType: c5.9xlarge  # half a machine, less likely to get preempted than full c5.18xlarge
 47 |   ImageId: ami-03cb2176bb0ac9ec7  # CHAI Ubuntu 18.04
 48 |   KeyName: Adversarial Policies
 49 |   BlockDeviceMappings:
 50 |     - DeviceName: /dev/sda1
 51 |       Ebs:
 52 |         VolumeSize: 50
 53 |         VolumeType: gp2
 54 |   InstanceMarketOptions:
 55 |     MarketType: spot
 56 |   IamInstanceProfile:
 57 |     Name: EC2AccessS3
 58 | 
 59 | # List of shell commands to run to set up nodes.
 60 | initialization_commands:  # before entering Docker
 61 |   - "curl -L -o $HOME/goofys https://github.com/kahing/goofys/releases/latest/download/goofys && chmod a+x $HOME/goofys"
 62 |   - "[[ -d /home/ubuntu/aws_private ]] || sudo mkdir /home/ubuntu/aws_private"
 63 |   - sudo $HOME/goofys -o allow_other --file-mode=0666 --dir-mode=0777 adversarial-policies /home/ubuntu/aws_private
 64 |   # TODO(adam): remove once Ray #6111 merged
 65 |   - docker pull humancompatibleai/adversarial_policies:latest
 66 | setup_commands:
 67 |   # Part of Ray bug #4403 workaround.
 68 |   - ln -sf /root/.mujoco /home/ubuntu/.mujoco
 69 |   - ln -sf /home/ubuntu/ray_bootstrap_key.pem /root/ray_bootstrap_key.pem
 70 | head_setup_commands: []
 71 | worker_setup_commands: []
 72 | 
 73 | head_start_ray_commands:
 74 |   # Set up alarm for if node is left running by mistake
 75 |   # Then (re)start Ray.
 76 |   - >
 77 |     DEBIAN_FRONTEND=noninteractive apt-get install -y cloud-guest-utils &&
 78 |     git remote set-url origin https://github.com/HumanCompatibleAI/adversarial-policies.git &&
 79 |     git pull &&
 80 |     . /adversarial-policies/ci/prepare_env.sh &&
 81 |     python3 setup.py sdist bdist_wheel &&
 82 |     pip install --force dist/aprl-*.whl &&
 83 |     /adversarial-policies/scripts/aws/cloudwatch.sh &&
 84 |     ray stop &&
 85 |     ulimit -n 65536 &&
 86 |     ray start --head --redis-port=6379 --object-manager-port=8076 \
 87 |               --autoscaling-config=~/ray_bootstrap_config.yaml --num-cpus=8
 88 | 
 89 | worker_start_ray_commands:
 90 |   # (Re)start spot termination monitor.
 91 |   # Then (re)start Ray.
 92 |   - >
 93 |     (pkill -x -f "python /adversarial-policies/scripts/aws/termination.py" || true) &&
 94 |     git remote set-url origin https://github.com/HumanCompatibleAI/adversarial-policies.git &&
 95 |     git pull &&
 96 |     . /adversarial-policies/ci/prepare_env.sh &&
 97 |     python3 setup.py sdist bdist_wheel &&
 98 |     pip install --force dist/aprl-*.whl &&
 99 |     ray stop &&
100 |     (python /adversarial-policies/scripts/aws/termination.py >> /tmp/aws_termination.log 2>&1 &) &&
101 |     ulimit -n 65536 &&
102 |     ray start --redis-address=$RAY_HEAD_IP:6379 --object-manager-port=8076
103 | 


--------------------------------------------------------------------------------
/src/aprl/configs/ray/baremetal.yaml:
--------------------------------------------------------------------------------
 1 | cluster_name: adversarial-policies
 2 | min_workers: 0
 3 | max_workers: 0
 4 | initial_workers: 0
 5 | target_utilization_fraction: 0.8
 6 | idle_timeout_minutes: 5
 7 | 
 8 | docker:
 9 |   image: humancompatibleai/adversarial_policies:latest
10 |   container_name: ray
11 |   run_options:
12 |     # NOTE: MUJOCO_KEY must be set in environment (it is in our AMI)
13 |     - "--env MUJOCO_KEY=${MUJOCO_KEY}"
14 |     # Open port 6006 for TensorBoard
15 |     - "-p 6006:6006"
16 | 
17 | provider:
18 |     type: local
19 |     head_ip: YOUR HOSTNAME
20 |     worker_ips:
21 |         - HOSTNAME 1
22 |         - HOSTNAME 2
23 | 
24 | auth:
25 |     ssh_user: YOUR USERNAME
26 |     ssh_private_key: ~/.ssh/id_rsa
27 | 
28 | head_node: {}
29 | worker_nodes: {}
30 | 
31 | # List of shell commands to run to set up nodes.
32 | initialization_commands:  # before entering Docker
33 |   # TODO(adam): remove once Ray #6111 merged
34 |   - docker pull humancompatibleai/adversarial_policies:latest
35 | setup_commands: []
36 | head_setup_commands: []
37 | worker_setup_commands: []
38 | head_start_ray_commands:
39 |     - >
40 |       git remote set-url origin https://github.com/HumanCompatibleAI/adversarial-policies.git &&
41 |       git pull &&
42 |       . /adversarial-policies/ci/prepare_env.sh &&
43 |       python3 setup.py sdist bdist_wheel &&
44 |       pip install dist/aprl-*.whl &&
45 |       ray stop &&
46 |       ulimit -n 65536 &&
47 |       ray start --head --redis-port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
48 | worker_start_ray_commands:
49 |     - >
50 |       git remote set-url origin https://github.com/HumanCompatibleAI/adversarial-policies.git &&
51 |       git pull &&
52 |       . /adversarial-policies/ci/prepare_env.sh &&
53 |       python3 setup.py sdist bdist_wheel &&
54 |       pip install dist/aprl-*.whl &&
55 |       ray stop &&
56 |       ulimit -n 65536 &&
57 |       ray start --redis-address=$RAY_HEAD_IP:6379 --object-manager-port=8076
58 | 


--------------------------------------------------------------------------------
/src/aprl/configs/rew/Humanoid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "weights": {
 3 |         "dense": {
 4 |             "reward_linvel": 0.1,
 5 |             "reward_quadctrl": 0.1,
 6 |             "reward_alive": 0.1,
 7 |             "reward_impact": 0.1
 8 |         },
 9 |         "sparse": {
10 |         }
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/src/aprl/configs/rew/HumanoidStand.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "weights": {
 3 |         "dense": {
 4 |             "reward_linvel": 0,
 5 |             "reward_quadctrl": 0.1,
 6 |             "reward_alive": 0.1,
 7 |             "reward_impact": 0.1
 8 |         },
 9 |         "sparse": {
10 |         }
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/src/aprl/configs/rew/SumoHumans-cond.json:
--------------------------------------------------------------------------------
1 | {
2 |     "metric": "length",
3 |     "window_size": 100,
4 |     "min_wait": 100,
5 |     "thresh": 100
6 | }
7 | 


--------------------------------------------------------------------------------
/src/aprl/configs/rew/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "weights": {
 3 |         "dense": {
 4 |             "reward_move": 0.1
 5 |         },
 6 |         "sparse": {
 7 |             "reward_remaining": 0.01
 8 |         }
 9 |     }
10 | }


--------------------------------------------------------------------------------
/src/aprl/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa: F401
 2 | import collections
 3 | 
 4 | import gym
 5 | from gym.envs import registration
 6 | from pkg_resources import resource_filename
 7 | 
 8 | 
 9 | def register(id, **kwargs):
10 |     """Idempotent version of gym.envs.registration.registry.
11 | 
12 |     Needed since aprl.envs can get imported multiple times, e.g. when deserializing policies.
13 |     """
14 |     try:
15 |         existing_spec = registration.spec(id)
16 |         new_spec = registration.EnvSpec(id, **kwargs)
17 |         assert existing_spec.__dict__ == new_spec.__dict__
18 |     except gym.error.UnregisteredEnv:  # not previously registered
19 |         registration.register(id, **kwargs)
20 | 
21 | 
22 | # Low-dimensional multi-agent environments
23 | 
24 | register(
25 |     id="aprl/CrowdedLine-v0",
26 |     entry_point="aprl.envs.crowded_line:CrowdedLineEnv",
27 |     max_episode_steps=200,
28 |     reward_threshold=0,
29 |     kwargs={"num_agents": 3},
30 | )
31 | 
32 | register(
33 |     id="aprl/IteratedMatchingPennies-v0",
34 |     entry_point="aprl.envs.matrix_game:IteratedMatchingPenniesEnv",
35 |     max_episode_steps=200,
36 |     reward_threshold=100,
37 | )
38 | 
39 | register(
40 |     id="aprl/RockPaperScissors-v0",
41 |     entry_point="aprl.envs.matrix_game:RockPaperScissorsEnv",
42 |     max_episode_steps=200,
43 |     reward_threshold=100,
44 | )
45 | 
46 | # Our variants of environments from gym_compete
47 | 
48 | register(
49 |     id="multicomp/SumoHumansAutoContact-v0",
50 |     entry_point="aprl.envs.sumo_auto_contact:SumoAutoContactEnv",
51 |     kwargs={
52 |         "agent_names": ["humanoid_fighter", "humanoid_fighter"],
53 |         "scene_xml_path": resource_filename(
54 |             "gym_compete", "new_envs/assets/world_body_arena.humanoid_body.humanoid_body.xml"
55 |         ),
56 |         "init_pos": [(-1, 0, 1.4), (1, 0, 1.4)],
57 |         "max_episode_steps": 500,
58 |         "min_radius": 1.5,
59 |         "max_radius": 3.5,
60 |     },
61 | )
62 | 
63 | register(
64 |     id="multicomp/SumoAntsAutoContact-v0",
65 |     entry_point="aprl.envs.sumo_auto_contact:SumoAutoContactEnv",
66 |     kwargs={
67 |         "agent_names": ["ant_fighter", "ant_fighter"],
68 |         "scene_xml_path": resource_filename(
69 |             "gym_compete", "new_envs/assets/world_body_arena.ant_body.ant_body.xml"
70 |         ),
71 |         "world_xml_path": resource_filename("gym_compete", "new_envs/assets/world_body_arena.xml"),
72 |         "init_pos": [(-1, 0, 2.5), (1, 0, 2.5)],
73 |         "max_episode_steps": 500,
74 |         "min_radius": 2.5,
75 |         "max_radius": 4.5,
76 |     },
77 | )
78 | 
79 | 
80 | # Which index does the victim play in?
81 | # This is really an experiment parameter rather than an environment parameter.
82 | # However, it's used so widely (training, evaluation, figure and video generation) and is
83 | # held fixed across all experiments it's convenient to treat it as if it is static.
84 | VICTIM_INDEX = collections.defaultdict(lambda: 0)
85 | VICTIM_INDEX.update(
86 |     {
87 |         # YouShallNotPass: 1 is the walker, 0 is the blocker agent.
88 |         # An adversarial walker makes little sense, but a blocker can be adversarial.
89 |         "multicomp/YouShallNotPassHumans-v0": 1,
90 |     }
91 | )
92 | 


--------------------------------------------------------------------------------
/src/aprl/envs/crowded_line.py:
--------------------------------------------------------------------------------
 1 | """Agents want to be close to 'food' but not be too crowded on a 1D line."""
 2 | 
 3 | from gym.spaces import Box, Tuple
 4 | import numpy as np
 5 | 
 6 | from aprl.envs.multi_agent import MultiAgentEnv
 7 | 
 8 | 
 9 | class CrowdedLineEnv(MultiAgentEnv):
10 |     dt = 1e-1
11 | 
12 |     """Agents live on a line in [-1,1]. States consist of a position and velocity
13 |     for each agent, with actions consisting of acceleration."""
14 | 
15 |     def __init__(self, num_agents):
16 |         agent_action_space = Box(-1.0, 1.0, shape=(1,), dtype=np.float32)
17 |         agent_observation_space = Box(-1.0, 1.0, shape=(2,), dtype=np.float32)
18 |         self.action_space = Tuple(tuple(agent_action_space for _ in range(num_agents)))
19 |         self.observation_space = Tuple(tuple(agent_observation_space for _ in range(num_agents)))
20 |         super().__init__(num_agents=num_agents)
21 |         self.np_random = np.random.RandomState()
22 | 
23 |     def _get_obs(self):
24 |         return tuple((np.array(row) for row in self.state))
25 | 
26 |     def reset(self):
27 |         self.state = self.np_random.rand(self.num_agents, 2) * 2 - 1
28 |         return self._get_obs()
29 | 
30 |     def step(self, action_n):
31 |         # Dynamics
32 |         positions = self.state[:, 0]
33 |         velocities = self.state[:, 1]
34 |         positions += velocities * self.dt
35 |         velocities += np.array(action_n).flatten()
36 |         self.state = np.clip(self.state, -1, 1)
37 | 
38 |         # Reward: zero-sum game, agents want to be close to food items that other
39 |         # agents are not close to. They should end up spreading out to cover the line.
40 |         # One food item per agent, equally spaced:
41 |         # at [-1, -1 + 2/(N-1), ..., 0, 1 - 2/(N-1), 1]
42 |         # Each agent induces a quasi-Gaussian around its current position,
43 |         # and gets a weighted average of the value of each of the food items.
44 |         # The value of the food item is inversely proportional to the weights
45 |         # induced by the agents.
46 |         foods = np.arange(self.num_agents) * 2 / (self.num_agents - 1) - 1
47 |         positions = positions.reshape(self.num_agents, 1)
48 |         foods = foods.reshape(1, self.num_agents)
49 |         # (num_agents, num_agents) matrix where rows are agents and columns food
50 |         distance = positions - foods
51 |         weights = np.exp(-np.square(distance))
52 |         food_values = 1 / weights.sum(axis=0)
53 |         rewards = tuple(weights.dot(food_values) - 1)
54 | 
55 |         obs = self._get_obs()
56 |         done = False
57 |         info = {}
58 |         return obs, rewards, done, info
59 | 
60 |     def seed(self, seed):
61 |         self.np_random.seed(seed)
62 | 
63 |     def render(self, mode="human"):
64 |         return ", ".join(["{:3f} @ {:3f}".format(pos, vel) for pos, vel in self.state])
65 | 


--------------------------------------------------------------------------------
/src/aprl/envs/matrix_game.py:
--------------------------------------------------------------------------------
 1 | """Two-player, normal-form games with symmetric action spaces."""
 2 | 
 3 | from gym.spaces import Discrete, Tuple
 4 | import numpy as np
 5 | 
 6 | from aprl.envs.multi_agent import MultiAgentEnv
 7 | 
 8 | 
 9 | class MatrixGameEnv(MultiAgentEnv):
10 |     """Models two-player, normal-form games with symmetrically sized action space."""
11 | 
12 |     metadata = {"render.modes": ["human"]}
13 |     ACTION_TO_SYM = None
14 | 
15 |     def __init__(self, num_actions, payoff):
16 |         """payoff_matrices must be a pair of num_actions*num_actions payoff matrices."""
17 |         agent_space = Discrete(num_actions)
18 |         overall_space = Tuple((agent_space, agent_space))
19 |         self.action_space = overall_space
20 |         self.observation_space = overall_space
21 |         super().__init__(num_agents=2)
22 | 
23 |         payoff = np.array(payoff)
24 |         assert payoff.shape == (2, num_actions, num_actions)
25 |         self.payoff = payoff
26 | 
27 |     def step(self, action_n):
28 |         assert len(action_n) == 2
29 |         i, j = action_n
30 |         # observation is the other players move
31 |         self.obs_n = (j, i)
32 |         rew_n = self.payoff[:, i, j]
33 |         done = False
34 |         return self.obs_n, rew_n, done, dict()
35 | 
36 |     def reset(self):
37 |         # State is previous players action, so this doesn't make much sense;
38 |         # just assume (0, 0) is start.
39 |         self.obs_n = (0, 0)
40 |         return self.obs_n
41 | 
42 |     def seed(self, seed=None):
43 |         # No-op, there is no randomness in this environment.
44 |         return
45 | 
46 |     def render(self, mode="human"):
47 |         # note observations are flipped -- observe other agents actions
48 |         p2, p1 = self.obs_n
49 |         if self.ACTION_TO_SYM is not None:
50 |             p1, p2 = tuple(map(self.ACTION_TO_SYM.get, (p1, p2)))
51 |         return f"P1: {p1}, P2: {p2}"
52 | 
53 | 
54 | class IteratedMatchingPenniesEnv(MatrixGameEnv):
55 |     ACTION_TO_SYM = {0: "H", 1: "T"}
56 | 
57 |     def __init__(self):
58 |         p1_payoff = np.array([[1, -1], [-1, 1]])
59 |         payoff = [p1_payoff, -p1_payoff]
60 |         return super().__init__(num_actions=2, payoff=payoff)
61 | 
62 | 
63 | class RockPaperScissorsEnv(MatrixGameEnv):
64 |     ACTION_TO_SYM = {0: "R", 1: "P", 2: "S"}
65 | 
66 |     def __init__(self):
67 |         p1_payoff = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]])
68 |         payoff = [p1_payoff, -p1_payoff]
69 |         return super().__init__(num_actions=3, payoff=payoff)
70 | 


--------------------------------------------------------------------------------
/src/aprl/envs/observation_masking.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | from gym.envs import registration
 4 | from gym_compete.new_envs.agents.agent import Agent
 5 | from gym_compete.new_envs.multi_agent_env import MultiAgentEnv
 6 | import numpy as np
 7 | 
 8 | 
 9 | def make_mask_from_class(cls):
10 |     if not issubclass(cls, Agent):
11 |         raise TypeError("You have passed in '{cls}', expected subclass of 'Agent'")
12 | 
13 |     class AdversaryMaskedGymCompeteAgent(cls):
14 |         def __init__(
15 |             self,
16 |             agent_to_mask,
17 |             agents_to_hide=None,
18 |             masking_type="initialization",
19 |             noise_magnitude=None,
20 |         ):
21 |             if not isinstance(agent_to_mask, cls):
22 |                 raise TypeError(
23 |                     f"You have passed in '{type(agent_to_mask)}', " f"requires instance of '{cls}'"
24 |                 )
25 | 
26 |             self.agent_to_mask = agent_to_mask
27 |             self.agents_to_hide = agents_to_hide
28 |             self.noise_magnitude = noise_magnitude
29 |             self.masking_type = masking_type
30 |             if self.masking_type == "additive_noise" and self.noise_magnitude is None:
31 |                 raise ValueError(
32 |                     "To create a noisy observation masker, you must specify magnitude"
33 |                     "of desired Gaussian noise"
34 |                 )
35 | 
36 |             other_agent_qpos = super(AdversaryMaskedGymCompeteAgent, self).get_other_agent_qpos()
37 |             self.initial_values = {}
38 |             for other_agent_id in other_agent_qpos:
39 |                 self.initial_values[other_agent_id] = other_agent_qpos[other_agent_id]
40 |             self.initial_other_qpos = super(AdversaryMaskedGymCompeteAgent, self).get_other_qpos()
41 | 
42 |         def _get_masking_given_initial(self, initial_position_value, true_current_position):
43 |             if self.masking_type == "zeros":
44 |                 return np.zeros_like(initial_position_value)
45 |             elif self.masking_type == "debug":
46 |                 return np.full_like(initial_position_value, fill_value=-4.2)
47 |             elif self.masking_type == "initialization":
48 |                 return initial_position_value
49 |             elif self.masking_type == "additive_noise":
50 |                 noise = np.random.normal(
51 |                     scale=self.noise_magnitude, size=initial_position_value.shape
52 |                 )
53 |                 return true_current_position + noise
54 |             else:
55 |                 raise ValueError(f"Unsupported masking type '{self.masking_type}'")
56 | 
57 |         def get_other_agent_qpos(self):
58 |             outp = {}
59 |             for other_agent_id in self.initial_values:
60 |                 if self.agents_to_hide is None or other_agent_id in self.agents_to_hide:
61 |                     true_current_pos = self.agent_to_mask.get_other_agent_qpos()[other_agent_id]
62 |                     outp[other_agent_id] = self._get_masking_given_initial(
63 |                         initial_position_value=self.initial_values[other_agent_id],
64 |                         true_current_position=true_current_pos,
65 |                     )
66 |             return outp
67 | 
68 |         def get_other_qpos(self):
69 |             true_current_pos = self.agent_to_mask.get_other_qpos()
70 |             return self._get_masking_given_initial(
71 |                 initial_position_value=self.initial_other_qpos,
72 |                 true_current_position=true_current_pos,
73 |             )
74 | 
75 |         def __getattr__(self, item):
76 |             return getattr(self.agent_to_mask, item)
77 | 
78 |     return AdversaryMaskedGymCompeteAgent
79 | 
80 | 
81 | def make_mask_for_env(env_name, agent_index):
82 |     spec = registration.registry.spec(env_name)
83 |     agent_names = spec._kwargs["agent_names"]
84 |     agent_name = agent_names[agent_index]
85 |     agent_cls = MultiAgentEnv.AGENT_MAP[agent_name][1]
86 |     return make_mask_from_class(agent_cls)
87 | 
88 | 
89 | def make_mask_agent_wrappers(env_name, agent_index, **kwargs):
90 |     masker = make_mask_for_env(env_name, agent_index)
91 |     masker = functools.partial(masker, **kwargs)
92 |     return {agent_index: masker}
93 | 


--------------------------------------------------------------------------------
/src/aprl/envs/sumo_auto_contact.py:
--------------------------------------------------------------------------------
 1 | from gym_compete.new_envs import SumoEnv
 2 | 
 3 | 
 4 | class SumoAutoContactEnv(SumoEnv):
 5 |     """
 6 |     Same as SumoEnv but agents automatically contact one another.
 7 |     This is so that falling or exiting the stage without touching
 8 |     the opponent counts as a loss and not a tie.
 9 |     """
10 | 
11 |     def reset(self, margins=None, version=None):
12 |         ob = super(SumoAutoContactEnv, self).reset(margins, version)
13 |         self.agent_contacts = True
14 |         return ob
15 | 


--------------------------------------------------------------------------------
/src/aprl/multi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/multi/__init__.py


--------------------------------------------------------------------------------
/src/aprl/multi/common.py:
--------------------------------------------------------------------------------
  1 | """Configuration that is common between multi.train and multi.score.
  2 | 
  3 | In particular, configures sensible defaults for upload directory and Ray server
  4 | depending on if running on EC2 or baremetal.
  5 | """
  6 | 
  7 | import functools
  8 | import getpass
  9 | import hashlib
 10 | import json
 11 | import os
 12 | import os.path as osp
 13 | import shlex
 14 | import socket
 15 | import subprocess
 16 | from typing import Any, Dict
 17 | import urllib
 18 | import uuid
 19 | 
 20 | import ray
 21 | from ray import tune
 22 | 
 23 | from aprl.common import utils
 24 | 
 25 | 
 26 | def _detect_ec2():
 27 |     """Auto-detect if we are running on EC2."""
 28 |     try:
 29 |         EC2_ID_URL = "http://169.254.169.254/latest/dynamic/instance-identity/document"
 30 |         with urllib.request.urlopen(EC2_ID_URL, timeout=3) as f:
 31 |             response = f.read().decode()
 32 |             if "availabilityZone" in response:
 33 |                 return True
 34 |             else:
 35 |                 raise ValueError(f"Received unexpected response from '{EC2_ID_URL}'")
 36 |     except urllib.error.URLError:
 37 |         return False
 38 | 
 39 | 
 40 | def _rsync_func(local_dir, remote_uri):
 41 |     """rsync data from worker to a remote location (by default the driver)."""
 42 |     # SOMEDAY: This function blocks until syncing completes, which is unfortunate.
 43 |     # If we instead specified a shell command, ray.tune._LogSyncer would run it asynchronously.
 44 |     # But we need to do a two-stage command, creating the directories first, because rsync will
 45 |     # balk if destination directory does not exist; so no easy way to do that.
 46 |     remote_host, ssh_key, *remainder = remote_uri.split(":")
 47 |     remote_dir = ":".join(remainder)  # remote directory may contain :
 48 |     remote_dir = shlex.quote(remote_dir)  # make safe for SSH/rsync call
 49 | 
 50 |     ssh_command = ["ssh", "-o", "StrictHostKeyChecking=no", "-i", ssh_key]
 51 |     ssh_mkdir = ssh_command + [remote_host, "mkdir", "-p", remote_dir]
 52 |     subprocess.run(ssh_mkdir, check=True)
 53 | 
 54 |     rsync = [
 55 |         "rsync",
 56 |         "-rlptv",
 57 |         "-e",
 58 |         " ".join(ssh_command),
 59 |         f"{local_dir}/",
 60 |         f"{remote_host}:{remote_dir}",
 61 |     ]
 62 |     subprocess.run(rsync)
 63 | 
 64 | 
 65 | def make_sacred(ex, worker_name, worker_fn):
 66 |     @ex.config
 67 |     def default_config():
 68 |         spec = {}  # Ray spec
 69 |         platform = None  # hosting: 'baremetal' or 'ec2'
 70 |         s3_bucket = None  # results storage on 'ec2' platform
 71 |         baremetal = {}  # config options for 'baremetal' platform
 72 |         local_dir = None  # results storage on 'local' platform
 73 |         ray_server = None  # if None, start cluster on local machine
 74 |         upload_root = None  # root of upload_dir
 75 |         init_kwargs = {}  # options for ray.init
 76 |         exp_name = "default"  # experiment name
 77 | 
 78 |         _ = locals()  # quieten flake8 unused variable warning
 79 |         del _
 80 | 
 81 |     @ex.config
 82 |     def ec2_config(platform, s3_bucket, spec):
 83 |         """When running on AWS EC2 cloud.
 84 | 
 85 |         If you are not the authors of this project, you will need to override s3_bucket."""
 86 |         if platform is None:
 87 |             if _detect_ec2():
 88 |                 platform = "ec2"
 89 | 
 90 |         if platform == "ec2":
 91 |             # We're running on EC2
 92 |             if s3_bucket is None:
 93 |                 s3_bucket = "adversarial-policies"
 94 | 
 95 |             spec["sync_config"] = {"upload_dir": f"s3://{s3_bucket}/"}
 96 |             ray_server = "localhost:6379"
 97 | 
 98 |         _ = locals()  # quieten flake8 unused variable warning
 99 |         del _
100 | 
101 |     @ex.config
102 |     def baremetal_config(platform, baremetal, spec):
103 |         """When running in bare-metal Ray cluster (i.e. not in cloud).
104 | 
105 |         Assumes we're running on the head node. Requires the worker have permission to rsync
106 |         to the head node. The intended config is they run with an SSH key that allows login to
107 |         the user from any machine in the cluster."""
108 |         if platform is None:
109 |             if osp.exists(osp.expanduser("~/ray_bootstrap_config.yaml")):
110 |                 platform = "baremetal"
111 | 
112 |         if platform == "baremetal":
113 |             baremetal = dict(baremetal)
114 |             if "ssh_key" not in baremetal:
115 |                 baremetal["ssh_key"] = osp.expanduser("~/ray_bootstrap_key.pem")
116 |             if "host" not in baremetal:
117 |                 baremetal["host"] = f"{getpass.getuser()}@{socket.getfqdn()}"
118 |             if "dir" not in baremetal:
119 |                 baremetal["dir"] = osp.expanduser("~/adversarial-policies/data")
120 | 
121 |             spec["sync_config"] = {
122 |                 "upload_dir": ":".join([baremetal["host"], baremetal["ssh_key"], baremetal["dir"]]),
123 |                 "sync_to_cloud": tune.function(_rsync_func),
124 |             }
125 |             ray_server = "localhost:6379"
126 | 
127 |         _ = locals()  # quieten flake8 unused variable warning
128 |         del _
129 | 
130 |     @ex.config
131 |     def local_config(platform, local_dir, spec):
132 |         if platform is None:
133 |             # No platform specified; assume local if no previous config autodetected.
134 |             platform = "local"
135 | 
136 |         if platform == "local":
137 |             if local_dir is None:
138 |                 local_dir = osp.abspath(osp.join(os.getcwd(), "data"))
139 |             spec["sync_config"] = {
140 |                 "sync_to_cloud": ("mkdir -p {target} && " "rsync -rlptv {source}/ {target}"),
141 |                 "upload_dir": local_dir,
142 |             }
143 | 
144 |     @ex.capture
145 |     def run(
146 |         base_config: Dict[str, Any],
147 |         ray_server: str,
148 |         init_kwargs: Dict[str, Any],
149 |         exp_name: str,
150 |         spec: Dict[str, Any],
151 |     ) -> ray.tune.ExperimentAnalysis:
152 |         ray.init(address=ray_server, **init_kwargs)
153 | 
154 |         # We have to register the function we're going to call with Ray.
155 |         # We partially apply worker_fn, so it's different for each experiment.
156 |         # Compute a hash based on the config to make sure it has a unique name!
157 |         # Note Ray does let you pass a worker_fn directly without registering, but then
158 |         # it registers using the function name (which may not be unique).
159 |         cfg = {
160 |             # ReadOnlyDict's aren't serializable: see sacred issue #499
161 |             "base_config": utils.sacred_copy(base_config),
162 |             "exp_name": exp_name,
163 |         }
164 |         cfg_str = json.dumps(cfg)
165 |         hasher = hashlib.md5()  # we are not worried about security here
166 |         hasher.update(cfg_str.encode("utf8"))
167 |         cfg_hash = hasher.hexdigest()
168 | 
169 |         trainable_name = f"{worker_name}-{cfg_hash}"
170 |         base_config = utils.sacred_copy(base_config)
171 |         trainable_fn = functools.partial(worker_fn, base_config)
172 |         tune.register_trainable(trainable_name, trainable_fn)
173 | 
174 |         exp_id = f"{ex.path}/{exp_name}/{utils.make_timestamp()}-{uuid.uuid4().hex}"
175 |         spec = utils.sacred_copy(spec)
176 | 
177 |         # Disable TensorBoard logger: fails due to the spec containing string variables.
178 |         tune_loggers = [tune.logger.JsonLogger, tune.logger.CSVLogger]
179 |         sync_config = None
180 |         if "sync_config" in spec:
181 |             sync_config = tune.SyncConfig(**spec["sync_config"])
182 |         try:
183 |             result = tune.run(
184 |                 trainable_name,
185 |                 name=exp_id,
186 |                 config=spec["config"],
187 |                 sync_config=sync_config,
188 |                 loggers=tune_loggers,
189 |                 **spec["run_kwargs"],
190 |             )
191 |         finally:
192 |             ray.shutdown()
193 | 
194 |         return result, exp_id
195 | 
196 |     return run
197 | 


--------------------------------------------------------------------------------
/src/aprl/multi/common_worker.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | import sacred
 4 | 
 5 | 
 6 | def flatten_config(config):
 7 |     """Take dict with ':'-separated keys and values or tuples of values,
 8 |     flattening to single key-value pairs.
 9 | 
10 |     Example: _flatten_config({'a:b': (1, 2), 'c': 3}) -> {'a: 1, 'b': 2, 'c': 3}."""
11 |     new_config = {}
12 |     for ks, vs in config.items():
13 |         ks = ks.split(":")
14 |         if len(ks) == 1:
15 |             vs = (vs,)
16 | 
17 |         for k, v in zip(ks, vs):
18 |             assert k not in new_config, f"duplicate key '{k}'"
19 |             new_config[k] = v
20 | 
21 |     return new_config
22 | 
23 | 
24 | def update(d, u):
25 |     """Recursive dictionary update."""
26 |     for k, v in u.items():
27 |         if isinstance(v, collections.Mapping):
28 |             d[k] = update(d.get(k, {}), v)
29 |         else:
30 |             d[k] = v
31 |     return d
32 | 
33 | 
34 | def fix_sacred_capture():
35 |     """Workaround for Sacred stdout capture issue #195 and Ray issue #5718."""
36 |     # TODO(adam): remove once Sacred issue #195 is closed
37 |     sacred.SETTINGS.CAPTURE_MODE = "sys"
38 | 


--------------------------------------------------------------------------------
/src/aprl/multi/score.py:
--------------------------------------------------------------------------------
  1 | """Hyperparameter search for train.py using Ray Tune."""
  2 | 
  3 | import json
  4 | import logging
  5 | import math
  6 | import os
  7 | import os.path as osp
  8 | import shutil
  9 | import tempfile
 10 | 
 11 | from ray import tune
 12 | from sacred import Experiment
 13 | from sacred.observers import FileStorageObserver
 14 | 
 15 | from aprl.configs.multi.score import make_configs
 16 | from aprl.envs import VICTIM_INDEX
 17 | from aprl.envs.gym_compete import env_name_to_canonical
 18 | from aprl.multi import common, score_worker
 19 | from aprl.score_agent import score_ex
 20 | 
 21 | multi_score_ex = Experiment("multi_score", ingredients=[score_ex])
 22 | pylog = logging.getLogger("aprl.multi.score")
 23 | 
 24 | # Load common configs (e.g. upload directories) and define the run command
 25 | run = common.make_sacred(multi_score_ex, "score", score_worker.score_worker)
 26 | 
 27 | # Load named configs for individual experiments (these change a lot, so keep out of this file)
 28 | make_configs(multi_score_ex)
 29 | 
 30 | 
 31 | @multi_score_ex.config
 32 | def default_config(score):
 33 |     spec = {  # experiment specification
 34 |         "run_kwargs": {"resources_per_trial": {"cpu": math.ceil(score["num_env"] / 2)}},
 35 |         "config": {},
 36 |     }
 37 |     save_path = None  # path to save JSON results. If None, do not save.
 38 | 
 39 |     _ = locals()  # quieten flake8 unused variable warning
 40 |     del _
 41 | 
 42 | 
 43 | @score_ex.config
 44 | def score_config():
 45 |     render = False
 46 |     videos = False
 47 | 
 48 |     _ = locals()  # quieten flake8 unused variable warning
 49 |     del _
 50 | 
 51 | 
 52 | @multi_score_ex.named_config
 53 | def debug_config(score):
 54 |     """Try zero-agent and random-agent against pre-trained zoo policies."""
 55 |     score = dict(score)
 56 |     score["episodes"] = 1
 57 |     score["agent_a_type"] = "zoo"
 58 |     score["agent_b_type"] = "zoo"
 59 |     spec = {"config": {"agent_a_path": tune.grid_search(["1", "2"])}}
 60 |     exp_suffix = "debug"
 61 |     _ = locals()  # quieten flake8 unused variable warning
 62 |     del _
 63 | 
 64 | 
 65 | def _remap_keys(d):
 66 |     return [{"k": k, "v": v} for k, v in d.items()]
 67 | 
 68 | 
 69 | @multi_score_ex.main
 70 | def multi_score(score, save_path):
 71 |     f = None
 72 |     try:
 73 |         tmp_path = None
 74 |         if save_path is not None:
 75 |             f = open(save_path, "w")  # open it now so we fail fast if file is unwriteable
 76 |         else:
 77 |             fd, tmp_path = tempfile.mkstemp(prefix="multi_score")
 78 |             f = os.fdopen(fd, mode="w")
 79 |             save_path = tmp_path
 80 | 
 81 |         analysis, exp_id = run(base_config=score)
 82 |         trials = analysis.trials
 83 |         additional_index_keys = score.get("index_keys", [])
 84 |         results = {}
 85 |         for trial in trials:
 86 |             idx = trial.last_result["idx"]
 87 |             cols = ["env_name", "agent_a_type", "agent_a_path", "agent_b_type", "agent_b_path"]
 88 |             cols += additional_index_keys
 89 |             key = tuple(idx[col] for col in cols)
 90 |             results[key] = trial.last_result["score"]
 91 | 
 92 |         json.dump(_remap_keys(results), f)
 93 |     finally:
 94 |         if f is not None:
 95 |             f.close()
 96 |             multi_score_ex.add_artifact(save_path, name="scores.json")
 97 |         if tmp_path is not None:
 98 |             os.unlink(tmp_path)
 99 | 
100 |     return {"scores": results, "exp_id": exp_id}
101 | 
102 | 
103 | def run_external(named_configs, post_named_configs, config_updates, adversary_path=None):
104 |     """Run multiple multi_score experiments. Intended for use by external scripts,
105 |     not accessible from commandline.
106 | 
107 |     :param named_configs: (list<str>) list of named configs, executed one by one
108 |     :param post_named_configs: (list<str>) list of base named configs, applied after the
109 |                                            current config from `named_configs`.
110 |     :param config_updates: (dict) a dict of config options, overriding the named config.
111 |     :param adversary_path: (str or None) path to JSON, needed by adversary_transfer config.
112 |     :return (dict) mapping from named configs to their output directory
113 |     """
114 |     # Sad workaround for Sacred config limitation,
115 |     # see aprl.configs.multi.score:_get_adversary_paths
116 |     os.environ["ADVERSARY_PATHS"] = adversary_path
117 | 
118 |     output_dir = {}
119 |     for trial_configs in named_configs:
120 |         configs = list(trial_configs) + list(post_named_configs)
121 |         run = multi_score_ex.run(named_configs=configs, config_updates=config_updates)
122 |         assert run.status == "COMPLETED"
123 |         exp_id = run.result["exp_id"]
124 |         output_dir[tuple(trial_configs)] = exp_id
125 | 
126 |     return output_dir
127 | 
128 | 
129 | def extract_data(path_generator, out_dir, experiment_dirs, ray_upload_dir):
130 |     """Helper method to extract data from multiple_score experiments."""
131 |     for experiment, experiment_dir in experiment_dirs.items():
132 |         experiment_root = osp.join(ray_upload_dir, experiment_dir)
133 |         # video_root contains one directory for each score_agent trial.
134 |         # These directories have names of form score-<hash>_<id_num>_<k=v>...
135 |         for dir_entry in os.scandir(experiment_root):
136 |             if not dir_entry.is_dir():
137 |                 continue
138 | 
139 |             trial_name = dir_entry.name
140 |             # Each trial contains the Sacred output from score_agent.
141 |             # Note Ray Tune is running with a fresh working directory per trial, so Sacred
142 |             # output will always be at score/1.
143 |             trial_root = osp.join(experiment_root, trial_name)
144 | 
145 |             sacred_config = osp.join(trial_root, "data", "sacred", "score", "1", "config.json")
146 |             with open(sacred_config, "r") as f:
147 |                 cfg = json.load(f)
148 | 
149 |             def agent_key(agent):
150 |                 return cfg[agent + "_type"], cfg[agent + "_path"]
151 | 
152 |             env_name = cfg["env_name"]
153 |             victim_index = VICTIM_INDEX[env_name]
154 |             if victim_index == 0:
155 |                 victim_type, victim_path = agent_key("agent_a")
156 |                 opponent_type, opponent_path = agent_key("agent_b")
157 |             else:
158 |                 victim_type, victim_path = agent_key("agent_b")
159 |                 opponent_type, opponent_path = agent_key("agent_a")
160 | 
161 |             if "multicomp" in cfg["env_name"]:
162 |                 env_name = env_name_to_canonical(env_name)
163 |             env_name = env_name.replace("/", "-")  # sanitize
164 | 
165 |             src_path, new_name, suffix = path_generator(
166 |                 trial_root=trial_root,
167 |                 cfg=cfg,
168 |                 env_sanitized=env_name,
169 |                 victim_index=victim_index,
170 |                 victim_type=victim_type,
171 |                 victim_path=victim_path,
172 |                 opponent_type=opponent_type,
173 |                 opponent_path=opponent_path,
174 |             )
175 |             dst_path = osp.join(out_dir, f"{new_name}.{suffix}")
176 |             shutil.copy(src_path, dst_path)
177 |             dst_config = osp.join(out_dir, f"{new_name}_sacred.json")
178 |             shutil.copy(sacred_config, dst_config)
179 | 
180 | 
181 | def main():
182 |     observer = FileStorageObserver(osp.join("data", "sacred", "multi_score"))
183 |     multi_score_ex.observers.append(observer)
184 |     multi_score_ex.run_commandline()
185 | 
186 | 
187 | if __name__ == "__main__":
188 |     main()
189 | 


--------------------------------------------------------------------------------
/src/aprl/multi/score_worker.py:
--------------------------------------------------------------------------------
 1 | """Helper functions for training.py executed on worker nodes using Ray Tune.
 2 | 
 3 | It's important these are all pickleable."""
 4 | 
 5 | import os.path as osp
 6 | 
 7 | from sacred import observers
 8 | 
 9 | from aprl.multi import common_worker
10 | 
11 | 
12 | def score_worker(base_config, tune_config, reporter):
13 |     """Run a aprl.score experiment with specified config, logging to reporter.
14 | 
15 |     :param base_config: (dict) default config
16 |     :param tune_config: (dict) overrides values in base_config
17 |     :param reporter: (ray.tune.StatusReporter) Ray Tune internal logger."""
18 |     common_worker.fix_sacred_capture()
19 | 
20 |     # score_ex is not pickleable, so we cannot close on it.
21 |     # Instead, import inside the function.
22 |     from aprl.score_agent import score_ex
23 | 
24 |     config = dict(base_config)
25 |     tune_config = common_worker.flatten_config(tune_config)
26 |     common_worker.update(config, tune_config)
27 | 
28 |     # We're breaking the Sacred interface by running an experiment from within another experiment.
29 |     # This is the best thing we can do, since we need to run the experiment with varying configs.
30 |     # Just be careful: this could easily break things.
31 |     observer = observers.FileStorageObserver(osp.join("data", "sacred", "score"))
32 |     score_ex.observers.append(observer)
33 |     run = score_ex.run(config_updates=config)
34 |     index_keys = config.get("index_keys", [])
35 | 
36 |     idx = {
37 |         k: v
38 |         for k, v in config.items()
39 |         if k.startswith("agent") or k == "env_name" or k in index_keys
40 |     }
41 | 
42 |     reporter(done=True, score=run.result, idx=idx)
43 | 


--------------------------------------------------------------------------------
/src/aprl/multi/train.py:
--------------------------------------------------------------------------------
 1 | """Hyperparameter search for train.py using Ray Tune."""
 2 | 
 3 | import logging
 4 | import math
 5 | import os.path as osp
 6 | 
 7 | from ray import tune
 8 | from sacred import Experiment
 9 | from sacred.observers import FileStorageObserver
10 | 
11 | from aprl.configs.multi.train import make_configs
12 | from aprl.multi import common, train_worker
13 | from aprl.train import train_ex
14 | 
15 | multi_train_ex = Experiment("multi_train", ingredients=[train_ex])
16 | pylog = logging.getLogger("aprl.multi.train")
17 | 
18 | # Load common configs (e.g. upload directories) and define the run command
19 | run = common.make_sacred(multi_train_ex, "train_rl", train_worker.train_rl)
20 | 
21 | # Load named configs for individual experiments (these change a lot, so keep out of this file)
22 | make_configs(multi_train_ex)
23 | 
24 | 
25 | @multi_train_ex.config
26 | def default_config(train):
27 |     spec = {  # experiment specification
28 |         "run_kwargs": {"resources_per_trial": {"cpu": math.ceil(train["num_env"] / 2)}},
29 |         "config": {},
30 |     }
31 | 
32 |     _ = locals()  # quieten flake8 unused variable warning
33 |     del _
34 | 
35 | 
36 | @multi_train_ex.named_config
37 | def debug_config():
38 |     spec = {
39 |         "config": {"seed": tune.grid_search([0, 1])},
40 |     }
41 |     exp_name = "debug"
42 |     _ = locals()  # quieten flake8 unused variable warning
43 |     del _
44 | 
45 | 
46 | @multi_train_ex.main
47 | def multi_train(train):
48 |     return run(base_config=train)
49 | 
50 | 
51 | def main():
52 |     observer = FileStorageObserver(osp.join("data", "sacred", "multi_train"))
53 |     multi_train_ex.observers.append(observer)
54 |     multi_train_ex.run_commandline()
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     main()
59 | 


--------------------------------------------------------------------------------
/src/aprl/multi/train_worker.py:
--------------------------------------------------------------------------------
 1 | """Helper functions for training.py executed on worker nodes using Ray Tune.
 2 | 
 3 | It's important these are all pickleable."""
 4 | 
 5 | import os.path as osp
 6 | 
 7 | from sacred import observers
 8 | from stable_baselines import logger
 9 | 
10 | from aprl.multi import common_worker
11 | 
12 | 
13 | class ReporterOutputFormat(logger.KVWriter):
14 |     """Key-value logging plugin for Stable Baselines that writes to a Ray Tune StatusReporter."""
15 | 
16 |     def __init__(self, reporter):
17 |         self.last_kvs = dict()
18 |         self.reporter = reporter
19 | 
20 |     def writekvs(self, kvs):
21 |         self.last_kvs = kvs
22 |         self.reporter(**kvs)
23 | 
24 | 
25 | def train_rl(base_config, tune_config, reporter):
26 |     """Run a aprl.train experiment with specified config, logging to reporter.
27 | 
28 |     :param base_config: (dict) default config
29 |     :param tune_config: (dict) overrides values in base_config
30 |     :param reporter: (ray.tune.StatusReporter) Ray Tune internal logger."""
31 |     common_worker.fix_sacred_capture()
32 | 
33 |     # train_ex is not pickleable, so we cannot close on it.
34 |     # Instead, import inside the function.
35 |     from aprl.train import train_ex
36 | 
37 |     config = dict(base_config)
38 |     tune_config = common_worker.flatten_config(tune_config)
39 |     common_worker.update(config, tune_config)
40 |     tune_kv_str = "-".join([f"{k}={v}" for k, v in tune_config.items()])
41 |     config["exp_name"] = config["exp_name"] + "-" + tune_kv_str
42 | 
43 |     output_format = ReporterOutputFormat(reporter)
44 |     config["log_output_formats"] = [output_format]
45 | 
46 |     # We're breaking the Sacred interface by running an experiment from within another experiment.
47 |     # This is the best thing we can do, since we need to run the experiment with varying configs.
48 |     # Just be careful: this could easily break things.
49 |     observer = observers.FileStorageObserver(osp.join("data", "sacred", "train"))
50 | 
51 |     train_ex.observers.append(observer)
52 |     train_ex.run(config_updates=config)
53 |     reporter(done=True, **output_format.last_kvs)
54 | 


--------------------------------------------------------------------------------
/src/aprl/policies/__init__.py:
--------------------------------------------------------------------------------
1 | """RL policies, models and related functionality."""
2 | 


--------------------------------------------------------------------------------
/src/aprl/policies/base.py:
--------------------------------------------------------------------------------
  1 | """RL policies, models and adaptor classes."""
  2 | 
  3 | from typing import Optional, Type
  4 | 
  5 | import gym
  6 | import numpy as np
  7 | from stable_baselines.common import BaseRLModel
  8 | from stable_baselines.common.policies import BasePolicy
  9 | import tensorflow as tf
 10 | 
 11 | 
 12 | class PredictOnlyModel(BaseRLModel):
 13 |     """Abstract class for policies pretending to be RL algorithms (models).
 14 | 
 15 |     Provides stub implementations that raise NotImplementedError.
 16 |     The predict method is left as abstract and must be implemented in base class."""
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         policy: Type[BasePolicy],
 21 |         sess: Optional[tf.Session],
 22 |         observation_space: gym.Space,
 23 |         action_space: gym.Space,
 24 |     ):
 25 |         """Constructs a DummyModel with given policy and session.
 26 |         :param policy: (BasePolicy) a loaded policy.
 27 |         :param sess: (tf.Session or None) a TensorFlow session.
 28 |         :return an instance of BaseRLModel.
 29 |         """
 30 |         super().__init__(policy=policy, env=None, requires_vec_env=True, policy_base="Dummy")
 31 |         self.sess = sess
 32 |         self.observation_space = observation_space
 33 |         self.action_space = action_space
 34 | 
 35 |     def setup_model(self):
 36 |         raise NotImplementedError()
 37 | 
 38 |     def learn(self):
 39 |         raise NotImplementedError()
 40 | 
 41 |     def action_probability(self, observation, state=None, mask=None, actions=None):
 42 |         raise NotImplementedError()
 43 | 
 44 |     def save(self, save_path):
 45 |         raise NotImplementedError()
 46 | 
 47 |     def load(self):
 48 |         raise NotImplementedError()
 49 | 
 50 |     def _get_pretrain_placeholders(self):
 51 |         raise NotImplementedError()
 52 | 
 53 |     def get_parameter_list(self):
 54 |         raise NotImplementedError()
 55 | 
 56 | 
 57 | class ModelWrapper(PredictOnlyModel):
 58 |     """Base class for wrapping RL algorithms (models)."""
 59 | 
 60 |     def __init__(self, model: BaseRLModel):
 61 |         super().__init__(
 62 |             policy=model.policy,
 63 |             sess=model.sess,
 64 |             observation_space=model.observation_space,
 65 |             action_space=model.action_space,
 66 |         )
 67 |         self.model = model
 68 | 
 69 | 
 70 | class PolicyToModel(PredictOnlyModel):
 71 |     """Converts BasePolicy to a BaseRLModel with only predict implemented."""
 72 | 
 73 |     def __init__(self, policy_obj: BasePolicy):
 74 |         """Constructs a BaseRLModel using policy for predictions.
 75 |         :param policy: a loaded policy.
 76 |         :return an instance of BaseRLModel.
 77 |         """
 78 |         super().__init__(
 79 |             policy=type(policy_obj),
 80 |             sess=policy_obj.sess,
 81 |             observation_space=policy_obj.ob_space,
 82 |             action_space=policy_obj.ac_space,
 83 |         )
 84 |         self.policy_obj = policy_obj
 85 | 
 86 |     def _get_policy_out(self, observation, state, mask, transparent, deterministic=False):
 87 |         if state is None:
 88 |             state = self.policy_obj.initial_state
 89 |         if mask is None:
 90 |             mask = [False for _ in range(self.policy_obj.n_env)]
 91 | 
 92 |         step_fn = self.policy_obj.step_transparent if transparent else self.policy_obj.step
 93 |         return step_fn(observation, state, mask, deterministic=deterministic)
 94 | 
 95 |     def predict(self, observation, state=None, mask=None, deterministic=False):
 96 |         policy_out = self._get_policy_out(
 97 |             observation, state, mask, transparent=False, deterministic=deterministic
 98 |         )
 99 |         actions, _val, states, _neglogp = policy_out
100 |         return actions, states
101 | 
102 |     def predict_transparent(self, observation, state=None, mask=None, deterministic=False):
103 |         """Returns same values as predict, as well as a dictionary with transparent data."""
104 |         policy_out = self._get_policy_out(
105 |             observation, state, mask, transparent=True, deterministic=deterministic
106 |         )
107 |         actions, _val, states, _neglogp, data = policy_out
108 |         return actions, states, data
109 | 
110 | 
111 | class OpenAIToStablePolicy(BasePolicy):
112 |     """Converts an OpenAI Baselines Policy to a Stable Baselines policy."""
113 | 
114 |     def __init__(self, old_policy, ob_space: gym.Space, ac_space: gym.Space):
115 |         super().__init__(
116 |             sess=old_policy.sess,
117 |             ob_space=ob_space,
118 |             ac_space=ac_space,
119 |             n_env=1,
120 |             n_steps=1,
121 |             n_batch=1,
122 |         )
123 |         self.old = old_policy
124 | 
125 |     @property
126 |     def initial_state(self):
127 |         return self.old.initial_state
128 | 
129 |     def step(self, obs, state=None, mask=None, deterministic=False):
130 |         stochastic = not deterministic
131 |         return self.old.step(obs, S=state, M=mask, stochastic=stochastic)
132 | 
133 |     def proba_step(self, obs, state=None, mask=None):
134 |         raise NotImplementedError()
135 | 
136 | 
137 | class ConstantPolicy(BasePolicy):
138 |     """Policy that returns a constant action."""
139 | 
140 |     def __init__(self, env, constant):
141 |         assert env.action_space.contains(constant)
142 |         super().__init__(
143 |             sess=None,
144 |             ob_space=env.observation_space,
145 |             ac_space=env.action_space,
146 |             n_env=env.num_envs,
147 |             n_steps=1,
148 |             n_batch=1,
149 |         )
150 |         self.constant = constant
151 | 
152 |     def step(self, obs, state=None, mask=None, deterministic=False):
153 |         actions = np.array([self.constant] * self.n_env)
154 |         return actions, None, None, None
155 | 
156 |     def proba_step(self, obs, state=None, mask=None):
157 |         return self.step(obs, state=state, mask=mask)
158 | 
159 | 
160 | class ZeroPolicy(ConstantPolicy):
161 |     """Policy that returns a zero action."""
162 | 
163 |     def __init__(self, env):
164 |         super().__init__(env, np.zeros(env.action_space.shape))
165 | 
166 | 
167 | class RandomPolicy(BasePolicy):
168 |     def __init__(self, env):
169 |         super().__init__(
170 |             sess=None,
171 |             ob_space=env.observation_space,
172 |             ac_space=env.action_space,
173 |             n_env=env.num_envs,
174 |             n_steps=1,
175 |             n_batch=1,
176 |         )
177 | 
178 |     def step(self, obs, state=None, mask=None, deterministic=False):
179 |         actions = np.array([self.ac_space.sample() for _ in range(self.n_env)])
180 |         return actions, None, None, None
181 | 
182 |     def proba_step(self, obs, state=None, mask=None):
183 |         raise NotImplementedError()
184 | 


--------------------------------------------------------------------------------
/src/aprl/policies/loader.py:
--------------------------------------------------------------------------------
  1 | """Load serialized policies of different types."""
  2 | 
  3 | import logging
  4 | import os
  5 | import pickle
  6 | import sys
  7 | 
  8 | import stable_baselines
  9 | from stable_baselines.common import vec_env
 10 | import tensorflow as tf
 11 | 
 12 | from aprl.envs.gym_compete import load_zoo_agent
 13 | from aprl.envs.multi_agent import FakeSingleSpacesVec
 14 | from aprl.policies.base import (
 15 |     ModelWrapper,
 16 |     OpenAIToStablePolicy,
 17 |     PolicyToModel,
 18 |     RandomPolicy,
 19 |     ZeroPolicy,
 20 | )
 21 | 
 22 | pylog = logging.getLogger("aprl.policy_loader")
 23 | 
 24 | 
 25 | class NormalizeModel(ModelWrapper):
 26 |     def __init__(
 27 |         self,
 28 |         model: stable_baselines.common.base_class.BaseRLModel,
 29 |         vec_normalize: vec_env.VecNormalize,
 30 |     ):
 31 |         super().__init__(model=model)
 32 |         self.vec_normalize = vec_normalize
 33 | 
 34 |     def predict(self, observation, state=None, mask=None, deterministic=False):
 35 |         norm_obs = self.vec_normalize.normalize_obs(observation)
 36 |         return self.model.predict(norm_obs, state, mask, deterministic)
 37 | 
 38 |     def predict_transparent(self, observation, state=None, mask=None, deterministic=False):
 39 |         """Returns same values as predict, as well as a dictionary with transparent data."""
 40 |         norm_obs = self.vec_normalize.normalize_obs(observation)
 41 |         return self.model.predict_transparent(norm_obs, state, mask, deterministic)
 42 | 
 43 | 
 44 | def load_vec_normalize(root_dir: str, venv: vec_env.VecEnv) -> vec_env.VecNormalize:
 45 |     try:
 46 |         normalize_path = os.path.join(root_dir, "vec_normalize.pkl")
 47 |         vec_normalize = vec_env.VecNormalize.load(normalize_path, venv)
 48 |         vec_normalize.training = False
 49 |         pylog.info(f"Loaded normalization statistics from '{normalize_path}'")
 50 |         return vec_normalize
 51 |     except FileNotFoundError:
 52 |         pass
 53 | 
 54 |     # Could not find vec_normalize.pkl: try loading old-style vec normalize.
 55 |     vec_normalize = vec_env.VecNormalize(venv, training=False)
 56 |     vec_normalize.load_running_average(root_dir)
 57 |     pylog.info(f"Loaded normalization statistics from '{root_dir}'")
 58 |     return vec_normalize
 59 | 
 60 | 
 61 | def load_stable_baselines(cls):
 62 |     def f(root_dir, env, env_name, index, transparent_params):
 63 |         denv = FakeSingleSpacesVec(env, agent_id=index)
 64 |         pylog.info(f"Loading Stable Baselines policy for '{cls}' from '{root_dir}'")
 65 |         model = load_backward_compatible_model(cls, root_dir, denv)
 66 | 
 67 |         try:
 68 |             vec_normalize = load_vec_normalize(root_dir, denv)
 69 |             model = NormalizeModel(model, vec_normalize)
 70 |         except FileNotFoundError:
 71 |             # No saved VecNormalize, must have not trained with normalization.
 72 |             pass
 73 | 
 74 |         return model
 75 | 
 76 |     return f
 77 | 
 78 | 
 79 | def load_old_ppo2(root_dir, env, env_name, index, transparent_params):
 80 |     try:
 81 |         from baselines.ppo2 import ppo2 as ppo2_old
 82 |     except ImportError as e:
 83 |         msg = "{}. HINT: you need to install (OpenAI) Baselines to use old_ppo2".format(e)
 84 |         raise ImportError(msg)
 85 | 
 86 |     denv = FakeSingleSpacesVec(env, agent_id=index)
 87 |     possible_fnames = ["model.pkl", "final_model.pkl"]
 88 |     model_path = None
 89 |     for fname in possible_fnames:
 90 |         candidate_path = os.path.join(root_dir, fname)
 91 |         if os.path.exists(candidate_path):
 92 |             model_path = candidate_path
 93 |     if model_path is None:
 94 |         raise FileNotFoundError(
 95 |             f"Could not find model at '{root_dir}' " f"under any filename '{possible_fnames}'"
 96 |         )
 97 | 
 98 |     graph = tf.Graph()
 99 |     sess = tf.Session(graph=graph)
100 |     with sess.as_default():
101 |         with graph.as_default():
102 |             pylog.info(f"Loading Baselines PPO2 policy from '{model_path}'")
103 |             policy = ppo2_old.learn(
104 |                 network="mlp",
105 |                 env=denv,
106 |                 total_timesteps=1,
107 |                 seed=0,
108 |                 nminibatches=4,
109 |                 log_interval=1,
110 |                 save_interval=1,
111 |                 load_path=model_path,
112 |             )
113 |     stable_policy = OpenAIToStablePolicy(
114 |         policy, ob_space=denv.observation_space, ac_space=denv.action_space
115 |     )
116 |     model = PolicyToModel(stable_policy)
117 | 
118 |     try:
119 |         normalize_path = os.path.join(root_dir, "normalize.pkl")
120 |         with open(normalize_path, "rb") as f:
121 |             old_vec_normalize = pickle.load(f)
122 |         vec_normalize = vec_env.VecNormalize(denv, training=False)
123 |         vec_normalize.obs_rms = old_vec_normalize.ob_rms
124 |         vec_normalize.ret_rms = old_vec_normalize.ret_rms
125 |         model = NormalizeModel(model, vec_normalize)
126 |         pylog.info(f"Loaded normalization statistics from '{normalize_path}'")
127 |     except FileNotFoundError:
128 |         # We did not use VecNormalize during training, skip
129 |         pass
130 | 
131 |     return model
132 | 
133 | 
134 | def load_zero(path, env, env_name, index, transparent_params):
135 |     denv = FakeSingleSpacesVec(env, agent_id=index)
136 |     policy = ZeroPolicy(denv)
137 |     return PolicyToModel(policy)
138 | 
139 | 
140 | def load_random(path, env, env_name, index, transparent_params):
141 |     denv = FakeSingleSpacesVec(env, agent_id=index)
142 |     policy = RandomPolicy(denv)
143 |     return PolicyToModel(policy)
144 | 
145 | 
146 | def mpi_unavailable_error(*args, **kwargs):
147 |     raise ImportError("This algorithm requires MPI, which is not available.")
148 | 
149 | 
150 | # Lazy import for PPO1 and SAC, which have optional mpi dependency
151 | AGENT_LOADERS = {
152 |     "zoo": load_zoo_agent,
153 |     "ppo2": load_stable_baselines(stable_baselines.PPO2),
154 |     "old_ppo2": load_old_ppo2,
155 |     "zero": load_zero,
156 |     "random": load_random,
157 | }
158 | 
159 | try:
160 |     # MPI algorithms -- only visible if mpi4py installed
161 |     from stable_baselines import PPO1, SAC
162 | 
163 |     AGENT_LOADERS["ppo1"] = load_stable_baselines(PPO1)
164 |     AGENT_LOADERS["sac"] = load_stable_baselines(SAC)
165 | except ImportError:
166 |     AGENT_LOADERS["ppo1"] = mpi_unavailable_error
167 |     AGENT_LOADERS["sac"] = mpi_unavailable_error
168 | 
169 | 
170 | def load_policy(policy_type, policy_path, env, env_name, index, transparent_params=None):
171 |     agent_loader = AGENT_LOADERS.get(policy_type)
172 |     if agent_loader is None:
173 |         raise ValueError(f"Unrecognized agent type '{policy_type}'")
174 |     return agent_loader(policy_path, env, env_name, index, transparent_params)
175 | 
176 | 
177 | def load_backward_compatible_model(cls, root_dir, denv=None, **kwargs):
178 |     """Backwards compatibility hack to load old pickled policies
179 |     which still expect modelfree.* to exist.
180 |     """
181 |     import aprl.training.scheduling  # noqa: F401
182 | 
183 |     mock_modules = {
184 |         "modelfree": "aprl",
185 |         "modelfree.scheduling": "aprl.training.scheduling",
186 |         "modelfree.training.scheduling": "aprl.training.scheduling",
187 |     }
188 |     for old, new in mock_modules.items():
189 |         sys.modules[old] = sys.modules[new]
190 |     if "env" in kwargs:
191 |         denv = kwargs["env"]
192 |         del kwargs["env"]
193 |     model_path = os.path.join(root_dir, "model.pkl")
194 |     model = cls.load(model_path, env=denv, **kwargs)
195 |     for old in mock_modules:
196 |         del sys.modules[old]
197 |     return model
198 | 


--------------------------------------------------------------------------------
/src/aprl/policies/transparent.py:
--------------------------------------------------------------------------------
  1 | """Policies """
  2 | 
  3 | from abc import ABC
  4 | 
  5 | import numpy as np
  6 | from stable_baselines.common.policies import FeedForwardPolicy, nature_cnn
  7 | import tensorflow as tf
  8 | 
  9 | from aprl.envs.wrappers import _filter_dict
 10 | 
 11 | TRANSPARENCY_KEYS = set(["obs", "ff_policy", "ff_value", "hid"])
 12 | 
 13 | 
 14 | class TransparentPolicy(ABC):
 15 |     """Policy which returns its observations and/or activations in its call to self.predict
 16 | 
 17 |     :param transparent_params: (set) a subset of TRANSPARENCY_KEYS.
 18 |            If key is present, that data will be included in the transparency_dict
 19 |            returned in step_transparent.
 20 |     """
 21 | 
 22 |     def __init__(self, transparent_params):
 23 |         if transparent_params is None:
 24 |             transparent_params = set()
 25 |         unexpected_keys = set(transparent_params).difference(TRANSPARENCY_KEYS)
 26 |         if unexpected_keys:
 27 |             raise KeyError(f"Unrecognized transparency keys: {unexpected_keys}")
 28 |         self.transparent_params = transparent_params
 29 | 
 30 |     def _get_default_transparency_dict(self, obs, ff, hid):
 31 |         """This structure is typical for subclasses of TransparentPolicy
 32 | 
 33 |         :param obs: ([float]) array of observations
 34 |         :param ff: (dict<str>, [float]) dictionary of lists of feedforward activations.
 35 |         :param hid: ([float] or None) LSTM hidden state.
 36 |         """
 37 |         # TODO: Do not consolidate -- have this happen later down the pipeline.
 38 |         def consolidate(acts):
 39 |             """Turn a list of activations into one array with shape (num_env,) + action_space"""
 40 |             return np.squeeze(np.concatenate(acts))
 41 | 
 42 |         transparency_dict = {
 43 |             "obs": obs,
 44 |             "hid": hid,
 45 |             "ff_policy": consolidate(ff["policy"]),
 46 |             "ff_value": consolidate(ff["value"]),
 47 |         }
 48 |         transparency_dict = _filter_dict(transparency_dict, self.transparent_params)
 49 |         return transparency_dict
 50 | 
 51 | 
 52 | class TransparentFeedForwardPolicy(TransparentPolicy, FeedForwardPolicy):
 53 |     """stable_baselines FeedForwardPolicy which is also transparent."""
 54 | 
 55 |     def __init__(
 56 |         self,
 57 |         sess,
 58 |         ob_space,
 59 |         ac_space,
 60 |         n_env,
 61 |         n_steps,
 62 |         n_batch,
 63 |         transparent_params,
 64 |         reuse=False,
 65 |         layers=None,
 66 |         net_arch=None,
 67 |         act_fun=tf.tanh,
 68 |         cnn_extractor=nature_cnn,
 69 |         feature_extraction="cnn",
 70 |         **kwargs,
 71 |     ):
 72 |         FeedForwardPolicy.__init__(
 73 |             self,
 74 |             sess,
 75 |             ob_space,
 76 |             ac_space,
 77 |             n_env,
 78 |             n_steps,
 79 |             n_batch,
 80 |             reuse,
 81 |             layers,
 82 |             net_arch,
 83 |             act_fun,
 84 |             cnn_extractor,
 85 |             feature_extraction,
 86 |             **kwargs,
 87 |         )
 88 |         TransparentPolicy.__init__(self, transparent_params)
 89 | 
 90 |     def step_transparent(self, obs, state=None, mask=None, deterministic=False):
 91 |         action_op = self.deterministic_action if deterministic else self.action
 92 |         outputs = [action_op, self.value_flat, self.neglogp, self.ff_out]
 93 | 
 94 |         action, value, neglogp, ff = self.sess.run(outputs, {self.obs_ph: obs})
 95 |         return action, value, self.initial_state, neglogp, ff
 96 | 
 97 | 
 98 | class TransparentMlpPolicy(TransparentFeedForwardPolicy):
 99 |     def __init__(
100 |         self,
101 |         sess,
102 |         ob_space,
103 |         ac_space,
104 |         n_env,
105 |         n_steps,
106 |         n_batch,
107 |         transparent_params,
108 |         reuse=False,
109 |         **_kwargs,
110 |     ):
111 |         super(TransparentMlpPolicy, self).__init__(
112 |             sess,
113 |             ob_space,
114 |             ac_space,
115 |             n_env,
116 |             n_steps,
117 |             n_batch,
118 |             transparent_params,
119 |             reuse,
120 |             feature_extraction="mlp",
121 |             **_kwargs,
122 |         )
123 | 


--------------------------------------------------------------------------------
/src/aprl/training/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/training/__init__.py


--------------------------------------------------------------------------------
/src/aprl/training/embedded_agents.py:
--------------------------------------------------------------------------------
  1 | """Wrappers to embed a fixed agent in an environment."""
  2 | 
  3 | from aprl.envs.multi_agent import VecMultiWrapper, _tuple_pop, _tuple_space_filter
  4 | 
  5 | 
  6 | class CurryVecEnv(VecMultiWrapper):
  7 |     """Substitutes in a fixed agent for one of the players in a VecMultiEnv.
  8 | 
  9 |     The agent's session will be closed, if it exists, when the environment is closed."""
 10 | 
 11 |     def __init__(self, venv, policy, agent_idx=0, deterministic=False):
 12 |         """Fixes one of the players in a VecMultiEnv.
 13 |         :param venv(VecMultiEnv): the environments.
 14 |         :param policy(Policy): the policy to use for the agent at agent_idx.
 15 |         :param agent_idx(int): the index of the agent that should be fixed.
 16 |         :return: a new VecMultiEnv with num_agents decremented. It behaves like env but
 17 |                  with all actions at index agent_idx set to those returned by agent."""
 18 |         super().__init__(venv)
 19 | 
 20 |         assert venv.num_agents >= 1  # allow currying the last agent
 21 |         self.num_agents = venv.num_agents - 1
 22 |         self.observation_space = _tuple_space_filter(self.observation_space, agent_idx)
 23 |         self.action_space = _tuple_space_filter(self.action_space, agent_idx)
 24 | 
 25 |         self._agent_to_fix = agent_idx
 26 |         self._policy = policy
 27 |         self._state = None
 28 |         self._obs = None
 29 |         self._dones = [False] * venv.num_envs
 30 |         self.deterministic = deterministic
 31 | 
 32 |     def step_async(self, actions):
 33 |         action, self._state = self._policy.predict(
 34 |             self._obs, state=self._state, mask=self._dones, deterministic=self.deterministic
 35 |         )
 36 |         actions.insert(self._agent_to_fix, action)
 37 |         self.venv.step_async(actions)
 38 | 
 39 |     def step_wait(self):
 40 |         observations, rewards, self._dones, infos = self.venv.step_wait()
 41 |         observations, self._obs = _tuple_pop(observations, self._agent_to_fix)
 42 |         rewards, _ = _tuple_pop(rewards, self._agent_to_fix)
 43 |         return observations, rewards, self._dones, infos
 44 | 
 45 |     def reset(self):
 46 |         observations = self.venv.reset()
 47 |         observations, self._obs = _tuple_pop(observations, self._agent_to_fix)
 48 |         return observations
 49 | 
 50 |     def get_policy(self):
 51 |         return self._policy
 52 | 
 53 |     def get_curry_venv(self):
 54 |         """Helper method to locate self in a stack of nested VecEnvWrappers"""
 55 |         return self
 56 | 
 57 |     def set_curry_obs(self, obs, env_idx=None):
 58 |         """Setter for observation of embedded agent
 59 | 
 60 |         :param obs ([float]) a vectorized observation from either one or all environments
 61 |         :param env_idx (int,None) indices of observations to set. None means all.
 62 |         """
 63 |         if env_idx is None:
 64 |             self._obs = obs
 65 |         else:
 66 |             self._obs[env_idx] = obs
 67 | 
 68 |     def get_curry_obs(self, env_idx=None):
 69 |         """Getter for observation of embedded agent
 70 | 
 71 |         :param env_idx (int,None) indices of observations to get. None means all.
 72 |         :return: ([float]) observations from specified environments
 73 |         """
 74 |         if env_idx is None:
 75 |             return self._obs
 76 |         else:
 77 |             return self._obs[env_idx]
 78 | 
 79 |     def close(self):
 80 |         if hasattr(self._policy, "sess") and self._policy.sess is not None:
 81 |             self._policy.sess.close()
 82 |         super().close()
 83 | 
 84 | 
 85 | class TransparentCurryVecEnv(CurryVecEnv):
 86 |     """CurryVecEnv that provides transparency data about its policy by updating infos dicts."""
 87 | 
 88 |     def __init__(self, venv, policy, agent_idx=0, deterministic=False):
 89 |         """
 90 |         :param venv (VecMultiEnv): the environments
 91 |         :param policy (BaseRLModel): model which wraps a BasePolicy object
 92 |         :param agent_idx (int): the index of the agent that should be fixed.
 93 |         :return: a new VecMultiEnv with num_agents decremented. It behaves like env but
 94 |                  with all actions at index agent_idx set to those returned by agent."""
 95 |         super().__init__(venv, policy, agent_idx, deterministic)
 96 |         if not hasattr(self._policy.policy, "step_transparent"):
 97 |             raise TypeError("Error: policy must be transparent")
 98 |         self._action = None
 99 | 
100 |     def step_async(self, actions):
101 |         policy_out = self._policy.predict_transparent(
102 |             self._obs, state=self._state, mask=self._dones, deterministic=self.deterministic
103 |         )
104 |         self._action, self._state, self._data = policy_out
105 |         actions.insert(self._agent_to_fix, self._action)
106 |         self.venv.step_async(actions)
107 | 
108 |     def step_wait(self):
109 |         observations, rewards, self._dones, infos = self.venv.step_wait()
110 |         observations, self._obs = _tuple_pop(observations, self._agent_to_fix)
111 |         for env_idx in range(self.num_envs):
112 |             env_data = {k: v[env_idx] for k, v in self._data.items()}
113 |             infos[env_idx][self._agent_to_fix].update(env_data)
114 |         return observations, rewards, self._dones, infos
115 | 


--------------------------------------------------------------------------------
/src/aprl/training/gail_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from stable_baselines.gail.dataset.dataset import ExpertDataset
 3 | 
 4 | 
 5 | class ExpertDatasetFromOurFormat(ExpertDataset):
 6 |     """GAIL Expert Dataset. Loads in our format, rather than the GAIL default.
 7 | 
 8 |     In particular, GAIL expects a dict of flattened arrays, with episodes concatenated together.
 9 |     The episode start is delineated by an `episode_starts` array. See `ExpertDataset` base class
10 |     for more information.
11 | 
12 |     By contrast, our format consists of a list of NumPy arrays, one for each episode."""
13 | 
14 |     def __init__(self, expert_path, **kwargs):
15 |         traj_data = np.load(expert_path, allow_pickle=True)
16 | 
17 |         # Add in episode starts
18 |         episode_starts = []
19 |         for reward_dict in traj_data["rewards"]:
20 |             ep_len = len(reward_dict)
21 |             # used to index episodes since they are flattened in GAIL format.
22 |             ep_starts = [True] + [False] * (ep_len - 1)
23 |             episode_starts.append(np.array(ep_starts))
24 | 
25 |         # Flatten arrays
26 |         traj_data = {k: np.concatenate(v) for k, v in traj_data.items()}
27 |         traj_data["episode_starts"] = np.concatenate(episode_starts)
28 | 
29 |         # Rename observations->obs
30 |         traj_data["obs"] = traj_data["observations"]
31 |         del traj_data["observations"]
32 | 
33 |         super().__init__(traj_data=traj_data, **kwargs)
34 | 


--------------------------------------------------------------------------------
/src/aprl/training/logger.py:
--------------------------------------------------------------------------------
  1 | """Logging for RL algorithms.
  2 | 
  3 | Configures Baseline's logger and TensorBoard appropriately."""
  4 | 
  5 | import os
  6 | from os import path as osp
  7 | 
  8 | from stable_baselines import logger
  9 | from tensorboard.plugins.custom_scalar import layout_pb2
 10 | import tensorboard.summary as summary_lib
 11 | from tensorflow.core.util import event_pb2
 12 | 
 13 | from aprl.common import utils
 14 | 
 15 | 
 16 | def gen_multiline_charts(cfg):
 17 |     charts = []
 18 |     for title, tags in cfg:
 19 |         charts.append(
 20 |             layout_pb2.Chart(title=title, multiline=layout_pb2.MultilineChartContent(tag=tags))
 21 |         )
 22 |     return charts
 23 | 
 24 | 
 25 | def tb_layout():
 26 |     episode_rewards = layout_pb2.Category(
 27 |         title="Episode Reward",
 28 |         chart=gen_multiline_charts(
 29 |             [
 30 |                 ("Shaped Reward", [r"shaping/eprewmean_true"]),
 31 |                 ("Episode Length", [r"eplenmean"]),
 32 |                 ("Sparse Reward", [r"shaping/epsparsemean"]),
 33 |                 ("Dense Reward", [r"shaping/epdensemean"]),
 34 |                 ("Dense Reward Annealing", [r"shaping/rew_anneal_c"]),
 35 |                 ("Unshaped Reward", [r"ep_rewmean"]),
 36 |                 ("Victim Action Noise", [r"shaping/victim_noise"]),
 37 |             ]
 38 |         ),
 39 |     )
 40 | 
 41 |     game_outcome = layout_pb2.Category(
 42 |         title="Game Outcomes",
 43 |         chart=gen_multiline_charts(
 44 |             [
 45 |                 ("Agent 0 Win Proportion", [r"game_win0"]),
 46 |                 ("Agent 1 Win Proportion", [r"game_win1"]),
 47 |                 ("Tie Proportion", [r"game_tie"]),
 48 |                 ("# of games", [r"game_total"]),
 49 |             ]
 50 |         ),
 51 |     )
 52 | 
 53 |     training = layout_pb2.Category(
 54 |         title="Training",
 55 |         chart=gen_multiline_charts(
 56 |             [
 57 |                 ("Policy Loss", [r"policy_loss"]),
 58 |                 ("Value Loss", [r"value_loss"]),
 59 |                 ("Policy Entropy", [r"policy_entropy"]),
 60 |                 ("Explained Variance", [r"explained_variance"]),
 61 |                 ("Approx KL", [r"approxkl"]),
 62 |                 ("Clip Fraction", [r"clipfrac"]),
 63 |             ]
 64 |         ),
 65 |     )
 66 | 
 67 |     # Intentionally unused:
 68 |     # + serial_timesteps (just total_timesteps / num_envs)
 69 |     # + time_elapsed (TensorBoard already logs wall-clock time)
 70 |     # + nupdates (this is already logged as step)
 71 |     time = layout_pb2.Category(
 72 |         title="Time",
 73 |         chart=gen_multiline_charts([("Total Timesteps", [r"total_timesteps"]), ("FPS", [r"fps"])]),
 74 |     )
 75 | 
 76 |     categories = [episode_rewards, game_outcome, training, time]
 77 |     return summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories))
 78 | 
 79 | 
 80 | def setup_logger(out_dir="results", exp_name="test", output_formats=None):
 81 |     timestamp = utils.make_timestamp()
 82 |     exp_name = exp_name.replace("/", "_")  # environment names can contain /'s
 83 |     filename = "{}-{}".format(timestamp, exp_name)[0:255]  # Linux has filename limit of 255
 84 |     out_dir = osp.join(out_dir, filename)
 85 |     os.makedirs(out_dir, exist_ok=True)
 86 | 
 87 |     logger.configure(folder=osp.join(out_dir, "rl"), format_strs=["tensorboard", "stdout"])
 88 |     logger_instance = logger.Logger.CURRENT
 89 | 
 90 |     if output_formats is not None:
 91 |         logger_instance.output_formats += output_formats
 92 | 
 93 |     for fmt in logger_instance.output_formats:
 94 |         if isinstance(fmt, logger.TensorBoardOutputFormat):
 95 |             writer = fmt.writer
 96 |             layout = tb_layout()
 97 |             event = event_pb2.Event(summary=layout)
 98 |             writer.WriteEvent(event)
 99 |             writer.Flush()
100 | 
101 |     return out_dir, logger_instance
102 | 


--------------------------------------------------------------------------------
/src/aprl/training/shaping_wrappers.py:
--------------------------------------------------------------------------------
  1 | from collections import deque
  2 | from itertools import islice
  3 | 
  4 | from stable_baselines.common.vec_env import VecEnvWrapper
  5 | 
  6 | from aprl.policies.wrappers import NoisyAgentWrapper
  7 | from aprl.training.scheduling import ConditionalAnnealer, ConstantAnnealer, LinearAnnealer
  8 | 
  9 | REW_TYPES = set(("sparse", "dense"))
 10 | 
 11 | 
 12 | class RewardShapingVecWrapper(VecEnvWrapper):
 13 |     """
 14 |     A more direct interface for shaping the reward of the attacking agent.
 15 |     - shaping_params schema: {'sparse': {k: v}, 'dense': {k: v}, **kwargs}
 16 |     """
 17 | 
 18 |     def __init__(self, venv, agent_idx, shaping_params, reward_annealer=None):
 19 |         super().__init__(venv)
 20 |         assert shaping_params.keys() == REW_TYPES
 21 |         self.shaping_params = {}
 22 |         for rew_type, params in shaping_params.items():
 23 |             for rew_term, weight in params.items():
 24 |                 self.shaping_params[rew_term] = (rew_type, weight)
 25 | 
 26 |         self.reward_annealer = reward_annealer
 27 |         self.agent_idx = agent_idx
 28 |         queue_keys = REW_TYPES.union(["length"])
 29 |         self.ep_logs = {k: deque([], maxlen=10000) for k in queue_keys}
 30 |         self.ep_logs["total_episodes"] = 0
 31 |         self.ep_logs["last_callback_episode"] = 0
 32 |         self.step_rew_dict = {
 33 |             rew_type: [[] for _ in range(self.num_envs)] for rew_type in REW_TYPES
 34 |         }
 35 | 
 36 |     def log_callback(self, logger):
 37 |         """Logs various metrics. This is given as a callback to PPO2.learn()"""
 38 |         num_episodes = self.ep_logs["total_episodes"] - self.ep_logs["last_callback_episode"]
 39 |         if num_episodes == 0:
 40 |             return
 41 | 
 42 |         means = {}
 43 |         for rew_type in REW_TYPES:
 44 |             if len(self.ep_logs[rew_type]) < num_episodes:
 45 |                 raise AssertionError(f"Data missing in ep_logs for {rew_type}")
 46 |             rews = islice(self.ep_logs[rew_type], num_episodes)
 47 |             means[rew_type] = sum(rews) / num_episodes
 48 |             logger.logkv(f"shaping/ep{rew_type}mean", means[rew_type])
 49 | 
 50 |         overall_mean = _anneal(means, self.reward_annealer)
 51 |         logger.logkv("shaping/eprewmean_true", overall_mean)
 52 |         c = self.reward_annealer()
 53 |         logger.logkv("shaping/rew_anneal_c", c)
 54 |         self.ep_logs["last_callback_episode"] = self.ep_logs["total_episodes"]
 55 | 
 56 |     def get_logs(self):
 57 |         """Interface to access self.ep_logs which contains data about episodes"""
 58 |         if self.ep_logs["total_episodes"] == 0:
 59 |             return None
 60 |         # keys: 'dense', 'sparse', 'length', 'total_episodes', 'last_callback_episode'
 61 |         return self.ep_logs
 62 | 
 63 |     def reset(self):
 64 |         return self.venv.reset()
 65 | 
 66 |     def step_wait(self):
 67 |         obs, rew, done, infos = self.venv.step_wait()
 68 |         for env_num in range(self.num_envs):
 69 |             # Compute shaped_reward for each rew_type
 70 |             shaped_reward = {k: 0 for k in REW_TYPES}
 71 |             for rew_term, rew_value in infos[env_num][self.agent_idx].items():
 72 |                 if rew_term not in self.shaping_params:
 73 |                     continue
 74 |                 rew_type, weight = self.shaping_params[rew_term]
 75 |                 shaped_reward[rew_type] += weight * rew_value
 76 | 
 77 |             # Compute total shaped reward, optionally annealing
 78 |             rew[env_num] = _anneal(shaped_reward, self.reward_annealer)
 79 | 
 80 |             # Log the results of an episode into buffers and then pass on the shaped reward
 81 |             for rew_type, val in shaped_reward.items():
 82 |                 self.step_rew_dict[rew_type][env_num].append(val)
 83 | 
 84 |             if done[env_num]:
 85 |                 ep_length = max(len(self.step_rew_dict[k]) for k in self.step_rew_dict.keys())
 86 |                 self.ep_logs["length"].appendleft(ep_length)  # pytype:disable=attribute-error
 87 |                 for rew_type in REW_TYPES:
 88 |                     rew_type_total = sum(self.step_rew_dict[rew_type][env_num])
 89 |                     rew_type_logs = self.ep_logs[rew_type]
 90 |                     rew_type_logs.appendleft(rew_type_total)  # pytype:disable=attribute-error
 91 |                     self.step_rew_dict[rew_type][env_num] = []
 92 |                 self.ep_logs["total_episodes"] += 1
 93 |         return obs, rew, done, infos
 94 | 
 95 | 
 96 | def apply_reward_wrapper(single_env, shaping_params, agent_idx, scheduler):
 97 |     if "metric" in shaping_params:
 98 |         rew_shape_annealer = ConditionalAnnealer.from_dict(shaping_params, get_logs=None)
 99 |         scheduler.set_conditional("rew_shape")
100 |     else:
101 |         anneal_frac = shaping_params.get("anneal_frac")
102 |         if anneal_frac is not None:
103 |             rew_shape_annealer = LinearAnnealer(1, 0, anneal_frac)
104 |         else:
105 |             # In this case, we weight the reward terms as per shaping_params
106 |             # but the ratio of sparse to dense reward remains constant.
107 |             rew_shape_annealer = ConstantAnnealer(0.5)
108 | 
109 |     scheduler.set_annealer("rew_shape", rew_shape_annealer)
110 |     return RewardShapingVecWrapper(
111 |         single_env,
112 |         agent_idx=agent_idx,
113 |         shaping_params=shaping_params["weights"],
114 |         reward_annealer=scheduler.get_annealer("rew_shape"),
115 |     )
116 | 
117 | 
118 | def apply_embedded_agent_wrapper(embedded, noise_params, scheduler):
119 |     if "metric" in noise_params:
120 |         noise_annealer = ConditionalAnnealer.from_dict(noise_params, get_logs=None)
121 |         scheduler.set_conditional("noise")
122 |     else:
123 |         noise_anneal_frac = noise_params.get("anneal_frac", 0)
124 |         noise_param = noise_params.get("param", 0)
125 | 
126 |         if noise_anneal_frac <= 0:
127 |             msg = "victim_noise_params.anneal_frac must be >0 if using a NoisyAgentWrapper."
128 |             raise ValueError(msg)
129 |         noise_annealer = LinearAnnealer(noise_param, 0, noise_anneal_frac)
130 |     scheduler.set_annealer("noise", noise_annealer)
131 |     return NoisyAgentWrapper(embedded, noise_annealer=scheduler.get_annealer("noise"))
132 | 
133 | 
134 | def _anneal(reward_dict, reward_annealer):
135 |     c = reward_annealer()
136 |     assert 0 <= c <= 1
137 |     sparse_weight = 1 - c
138 |     dense_weight = c
139 |     return reward_dict["sparse"] * sparse_weight + reward_dict["dense"] * dense_weight
140 | 


--------------------------------------------------------------------------------
/src/aprl/visualize/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/visualize/__init__.py


--------------------------------------------------------------------------------
/src/aprl/visualize/benchmark_ffmpeg.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | for resolution in 1920x1080 1280x720 854x480; do
4 |     for threads in 1 2 4 6 8 12; do
5 |         echo "*** RESOLUTION ${resolution} with THREADS ${threads}"
6 |         time (ffmpeg -y -i $1 -s ${resolution} -c:v libx264 -preset slow -crf 28 -threads ${threads} /tmp/ffmpeg_benchmark.mp4 >/dev/null 2>&1)
7 |     done
8 | done
9 | 


--------------------------------------------------------------------------------
/src/aprl/visualize/compress_videos.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [[ $# -ne 2 ]]; then
 4 |     echo "usage: $0 <in dir> <out dir>"
 5 |     exit -1
 6 | fi
 7 | 
 8 | IN_DIR=$1
 9 | OUT_DIR=$2
10 | 
11 | fnames=""
12 | for in_path in ${IN_DIR}/*.mp4; do
13 |     fnames="${fnames} $(basename -s .mp4 ${in_path})"
14 | done
15 | 
16 | FFMPEG_COMMAND="ffmpeg -i ${IN_DIR}/{prefix}.mp4 -c:v libx264 -preset slow -crf 28"
17 | 
18 | # These were tuned for my machine. See benchmark_ffmpeg.sh to choose reasonable values.
19 | # Generally there are diminishing returns to using more threads per video.
20 | # Since we have a large number of videos, favor large job count and small thread count.
21 | parallel --header : -j 50% ${FFMPEG_COMMAND} -threads 2 \
22 |                            ${OUT_DIR}/{prefix}_1080p.mp4 ::: prefix ${fnames}
23 | parallel --header : -j 100% ${FFMPEG_COMMAND} -threads 1 -s 1280x720 \
24 |                             ${OUT_DIR}/{prefix}_720p.mp4 ::: prefix ${fnames}
25 | parallel --header : -j 100% ${FFMPEG_COMMAND} -threads 1 -s 854x480 \
26 |                             ${OUT_DIR}/{prefix}_480p.mp4 ::: prefix ${fnames}
27 | 


--------------------------------------------------------------------------------
/src/aprl/visualize/generate_website_json.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import functools
  3 | import json
  4 | import logging
  5 | import os
  6 | import re
  7 | import sys
  8 | from typing import Any, Iterable, Sequence, Tuple
  9 | 
 10 | import boto3
 11 | 
 12 | from aprl.visualize import util
 13 | 
 14 | logger = logging.getLogger("aprl.visualize.generate_website_json")
 15 | 
 16 | ENV_NAME_LOOKUP = {
 17 |     "KickAndDefend-v0": "Kick and Defend",
 18 |     "SumoHumans-v0": "Sumo Humans",
 19 |     "YouShallNotPassHumans-v0": "You Shall Not Pass",
 20 |     "SumoAnts-v0": "Sumo Ants",
 21 | }
 22 | BUCKET_NAME = "adversarial-policies-public"
 23 | PREFIX = "videos"
 24 | 
 25 | EXCLUDE_ABBREV = [r"ZooM[SD].*"]
 26 | 
 27 | 
 28 | class NestedDict(OrderedDict):
 29 |     """Implementation of perl's autovivification feature."""
 30 | 
 31 |     def __getitem__(self, item):
 32 |         try:
 33 |             return super().__getitem__(item)  # pytype:disable=unsupported-operands
 34 |         except KeyError:
 35 |             value = self[item] = type(self)()
 36 |             return value
 37 | 
 38 | 
 39 | def get_s3_files() -> Iterable[str]:
 40 |     s3 = boto3.resource("s3")
 41 |     adv_policies_bucket = s3.Bucket(BUCKET_NAME)
 42 |     objs = adv_policies_bucket.objects.filter(Prefix=PREFIX).all()
 43 |     return [os.path.basename(o.key) for o in objs]
 44 | 
 45 | 
 46 | def is_excluded(abbrev: str) -> bool:
 47 |     for exclude in EXCLUDE_ABBREV:
 48 |         if re.match(exclude, abbrev):
 49 |             return True
 50 |     return False
 51 | 
 52 | 
 53 | def get_videos(video_files: Iterable[str]) -> NestedDict:
 54 |     video_files = [path for path in video_files if path.endswith(".mp4")]
 55 |     stem_pattern = re.compile(r"(.*)_[0-9]+p.mp4")
 56 |     agent_pattern = re.compile(r"(\w*-v\d)_victim_(.*)_opponent_(.*)")
 57 | 
 58 |     nested = NestedDict()
 59 |     for path in video_files:
 60 |         stem_match = stem_pattern.match(path)
 61 |         if stem_match is None:
 62 |             logger.info(f"Skipping path '{path}: malformed filename, cannot extract stem.")
 63 |             continue
 64 | 
 65 |         stem = stem_match.groups()[0]
 66 |         assert isinstance(stem, str)
 67 | 
 68 |         agent_match = agent_pattern.match(stem)
 69 |         if agent_match is None:
 70 |             logger.info(f"Skipping path '{path}: malformed filename, cannot extract agent.")
 71 |             continue
 72 | 
 73 |         env_name, victim_abbrev, opponent_abbrev = agent_match.groups()
 74 |         if is_excluded(victim_abbrev) or is_excluded(opponent_abbrev):
 75 |             logger.info(f"Skipping path '{path}': explicitly excluded.")
 76 |             continue
 77 | 
 78 |         env_name = ENV_NAME_LOOKUP.get(env_name)
 79 |         victim = f"{util.friendly_agent_label(victim_abbrev)} ({victim_abbrev})"
 80 |         opponent = f"{util.friendly_agent_label(opponent_abbrev)} ({opponent_abbrev})"
 81 |         nested[env_name][opponent][victim] = stem  # pytype:disable=unsupported-operands
 82 | 
 83 |     return nested
 84 | 
 85 | 
 86 | def sort_fn(item: Tuple[str, Any], groups: Sequence[str]) -> str:
 87 |     """Prepends index of key in groups: can sort in order of groups with alphabetical sort.
 88 | 
 89 |     :param item: key-value pair.
 90 |     :param groups: sequence of regexps."""
 91 |     k, v = item
 92 |     match = re.match(r".* \((.*)\)", k)
 93 |     assert match
 94 |     abbrev = match.groups()[0]
 95 |     for i, grp in enumerate(groups):
 96 |         if re.match(grp, abbrev):
 97 |             break
 98 |     return f"{i}{abbrev}"
 99 | 
100 | 
101 | def sort_nested(nested: NestedDict) -> NestedDict:
102 |     new_nested = NestedDict()
103 |     victim_sort = functools.partial(sort_fn, groups=util.GROUPS["rows"])
104 |     opponent_sort = functools.partial(sort_fn, groups=util.GROUPS["cols"])
105 | 
106 |     for env, d1 in nested.items():
107 |         new_d1 = {}
108 |         for opponent, d2 in d1.items():
109 |             new_d1[opponent] = OrderedDict(sorted(d2.items(), key=victim_sort))
110 |         new_nested[env] = OrderedDict(sorted(new_d1.items(), key=opponent_sort))
111 |     return new_nested
112 | 
113 | 
114 | def main():
115 |     logging.basicConfig(level=logging.INFO)
116 |     paths = get_s3_files()
117 |     nested = get_videos(paths)
118 |     nested = sort_nested(nested)
119 | 
120 |     out_path = "file_list.json"
121 |     if len(sys.argv) > 1:
122 |         out_path = sys.argv[1]
123 | 
124 |     print(nested)
125 |     with open(out_path, "w") as fp:
126 |         json.dump(nested, fp, indent=4)
127 |     logger.info(f"Saved files to '{out_path}'.")
128 | 
129 | 
130 | if __name__ == "__main__":
131 |     main()
132 | 


--------------------------------------------------------------------------------
/src/aprl/visualize/make_videos.py:
--------------------------------------------------------------------------------
  1 | """Generate videos for adversaries and standard baselines."""
  2 | 
  3 | import logging
  4 | import os
  5 | import os.path as osp
  6 | 
  7 | from sacred import Experiment
  8 | from sacred.observers import FileStorageObserver
  9 | 
 10 | from aprl.common.utils import make_timestamp
 11 | from aprl.configs import DATA_LOCATION
 12 | from aprl.multi.score import extract_data, run_external
 13 | from aprl.visualize import util
 14 | 
 15 | make_videos_ex = Experiment("make_videos")
 16 | make_videos_logger = logging.getLogger("make_videos")
 17 | 
 18 | 
 19 | @make_videos_ex.config
 20 | def default_config():
 21 |     adversary_path = osp.join(
 22 |         DATA_LOCATION, "multi_train", "paper", "highest_win_policies_and_rates.json"
 23 |     )
 24 |     ray_upload_dir = "data"  # where Ray will upload multi.score outputs. 'data' works on baremetal
 25 |     score_configs = [("normal",), ("normal", "mask_observations_of_victim")]
 26 |     multi_score = {}
 27 |     root_dir = "data/videos"
 28 |     exp_name = "default"
 29 |     _ = locals()  # quieten flake8 unused variable warning
 30 |     del _
 31 | 
 32 | 
 33 | @make_videos_ex.named_config
 34 | def defense_config():
 35 |     score_configs = [("defenses",), ("defenses", "mask_observations_of_victim")]
 36 |     exp_name = "defense"
 37 |     _ = locals()  # quieten flake8 unused variable warning
 38 |     del _
 39 | 
 40 | 
 41 | @make_videos_ex.named_config
 42 | def slides_config():
 43 |     """Generate a subset of videos, with tighter-cropped camera.
 44 |     Intended for slideshows/demos."""
 45 |     score_configs = [("summary",), ("summary", "mask_observations_of_victim")]
 46 |     multi_score = {
 47 |         "score": {
 48 |             "video_params": {"annotation_params": {"camera_config": "close", "short_labels": True}}
 49 |         }
 50 |     }
 51 |     exp_name = "slides"
 52 |     _ = locals()  # quieten flake8 unused variable warning
 53 |     del _
 54 | 
 55 | 
 56 | LOW_RES = {
 57 |     "score": {"video_params": {"annotation_params": {"resolution": (640, 480), "font_size": 24}}}
 58 | }
 59 | 
 60 | 
 61 | @make_videos_ex.named_config
 62 | def low_res():
 63 |     multi_score = LOW_RES  # noqa: F841
 64 | 
 65 | 
 66 | @make_videos_ex.named_config
 67 | def debug_config():
 68 |     score_configs = [
 69 |         ("debug_one_each_type",),
 70 |         ("debug_one_each_type", "mask_observations_of_victim"),
 71 |     ]
 72 |     multi_score = dict(LOW_RES)
 73 |     multi_score["score"]["episodes"] = 2
 74 |     exp_name = "debug"
 75 |     _ = locals()  # quieten flake8 unused variable warning
 76 |     del _
 77 | 
 78 | 
 79 | @make_videos_ex.capture
 80 | def generate_videos(score_configs, multi_score, adversary_path):
 81 |     """Uses multi.score to generate videos."""
 82 |     return run_external(
 83 |         score_configs,
 84 |         post_named_configs=["video"],
 85 |         config_updates=multi_score,
 86 |         adversary_path=adversary_path,
 87 |     )
 88 | 
 89 | 
 90 | @make_videos_ex.capture
 91 | def extract_videos(out_dir, video_dirs, ray_upload_dir):
 92 |     def path_generator(
 93 |         trial_root,
 94 |         env_sanitized,
 95 |         victim_index,
 96 |         victim_type,
 97 |         victim_path,
 98 |         opponent_type,
 99 |         opponent_path,
100 |         cfg,
101 |     ):
102 |         src_path = osp.join(
103 |             trial_root, "data", "sacred", "score", "1", "videos", "env_0_episode_0_recording.mp4"
104 |         )
105 | 
106 |         victim_suffix = ""
107 |         opponent_suffix = ""
108 |         mask_index = cfg["mask_agent_index"]
109 |         if mask_index is not None:
110 |             if mask_index == victim_index:
111 |                 victim_suffix = "M"
112 |             else:
113 |                 opponent_suffix == "M"
114 | 
115 |         victim = util.abbreviate_agent_config(
116 |             cfg["env_name"], victim_type, victim_path, victim_suffix, victim=True
117 |         )
118 |         opponent = util.abbreviate_agent_config(
119 |             cfg["env_name"], opponent_type, opponent_path, opponent_suffix, victim=False
120 |         )
121 | 
122 |         new_name = f"{env_sanitized}_victim_{victim}_opponent_{opponent}"
123 |         return src_path, new_name, "mp4"
124 | 
125 |     return extract_data(path_generator, out_dir, video_dirs, ray_upload_dir)
126 | 
127 | 
128 | @make_videos_ex.main
129 | def make_videos(root_dir, exp_name):
130 |     out_dir = osp.join(root_dir, exp_name, make_timestamp())
131 |     os.makedirs(out_dir)
132 | 
133 |     video_dirs = generate_videos()
134 |     extract_videos(out_dir=out_dir, video_dirs=video_dirs)
135 | 
136 | 
137 | def main():
138 |     observer = FileStorageObserver(osp.join("data", "sacred", "make_videos"))
139 |     make_videos_ex.observers.append(observer)
140 |     make_videos_ex.run_commandline()
141 |     make_videos_logger.info("Sacred run completed, files stored at {}".format(observer.dir))
142 | 
143 | 
144 | if __name__ == "__main__":
145 |     main()
146 | 


--------------------------------------------------------------------------------
/src/aprl/visualize/noisy_observations.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import math
  3 | import os
  4 | import os.path as osp
  5 | 
  6 | from matplotlib import pyplot as plt
  7 | import pandas as pd
  8 | from sacred import Experiment
  9 | from sacred.observers import FileStorageObserver
 10 | import seaborn as sns
 11 | 
 12 | from aprl.envs.gym_compete import NUM_ZOO_POLICIES
 13 | from aprl.visualize import styles
 14 | 
 15 | plot_noisy_obs_exp = Experiment("plot_noisy_observations")
 16 | 
 17 | 
 18 | @plot_noisy_obs_exp.config
 19 | def base_config():
 20 |     root_dir = "data/aws/score_agents/victim_masked_noise/"
 21 |     out_dir = "data/aws/score_agents/masked_obs_visualization/"
 22 |     input_run = "ep_500_5-22_all_zoo"
 23 |     # Runs known to work: ["ep_500_5-22_single_zoo", "ep_100_5-21", "ep_500_5-22_all_zoo"]
 24 |     _ = locals()  # quieten flake8 unused variable warning
 25 |     del _
 26 | 
 27 | 
 28 | def transform(df, transform_list):
 29 |     new_df = df.copy()
 30 |     for trans_dict in transform_list:
 31 |         new_df[trans_dict["new_col"]] = new_df[trans_dict["old_col"]].apply(trans_dict["func"])
 32 |     return new_df
 33 | 
 34 | 
 35 | def subset(df, spec):
 36 |     ret = df.copy()
 37 |     for constraint, constraint_value in spec.items():
 38 |         ret = ret[ret[constraint] == constraint_value]
 39 |     return ret
 40 | 
 41 | 
 42 | def process_element_into_flat_dict(el, key_order):
 43 |     outp = {}
 44 |     for i, k in enumerate(key_order):
 45 |         outp[k] = el["k"][i]
 46 |     outp["agent0_wins"] = el["v"]["win0"]
 47 |     outp["agent1_wins"] = el["v"]["win1"]
 48 |     outp["ties"] = el["v"]["ties"]
 49 |     return outp
 50 | 
 51 | 
 52 | def noisy_adversary_opponent_subset_plot(
 53 |     original_df, subset_specs, transform_specs, logistic=True, plot_line=True, savefile=None
 54 | ):
 55 |     subset_df = subset(original_df, subset_specs)
 56 |     if len(subset_df) == 0:
 57 |         return
 58 |     transformed_df = transform(subset_df, transform_specs)
 59 |     plt.figure(figsize=(10, 7))
 60 |     if plot_line:
 61 |         sns.lmplot(data=transformed_df, x="log_noise", y="agent0_win_perc", logistic=logistic)
 62 |     else:
 63 |         sns.scatterplot(data=transformed_df, x="log_noise", y="agent0_win_perc")
 64 |     plt.title(
 65 |         "{}: Noisy Zoo{} Observations vs Adversary".format(
 66 |             subset_specs["env"], subset_specs["agent0_path"]
 67 |         )
 68 |     )
 69 |     if savefile is not None:
 70 |         plt.savefig(savefile)
 71 |     else:
 72 |         plt.show()
 73 |     plt.close()
 74 | 
 75 | 
 76 | def noisy_multiple_opponent_subset_plot(
 77 |     original_df, subset_specs, transform_specs, logistic=True, savefile=None
 78 | ):
 79 |     subset_df = subset(original_df, subset_specs)
 80 |     if len(subset_df) == 0:
 81 |         return
 82 |     transformed_df = transform(subset_df, transform_specs)
 83 |     plt.figure(figsize=(10, 7))
 84 |     sns.lmplot(
 85 |         data=transformed_df,
 86 |         x="log_noise",
 87 |         y="agent0_win_perc",
 88 |         hue="agent1_path",
 89 |         logistic=logistic,
 90 |     )
 91 |     plt.title(
 92 |         "{}: Noisy Zoo{} Observations vs Normal Zoos".format(
 93 |             subset_specs["env"], subset_specs["agent0_path"]
 94 |         )
 95 |     )
 96 |     if savefile is not None:
 97 |         plt.savefig(savefile)
 98 |     else:
 99 |         plt.show()
100 |     plt.close()
101 | 
102 | 
103 | @plot_noisy_obs_exp.main
104 | def generate_plots(input_run, root_dir, out_dir):
105 |     num_episodes = int(input_run.split("_")[1])
106 |     baseline_transformations = [
107 |         {"new_col": "log_noise", "old_col": "noise_magnitude", "func": lambda x: math.log(x)},
108 |         {
109 |             "new_col": "agent0_win_perc",
110 |             "old_col": "agent0_wins",
111 |             "func": lambda x: x / num_episodes,
112 |         },
113 |         {
114 |             "new_col": "agent1_win_perc",
115 |             "old_col": "agent1_wins",
116 |             "func": lambda x: x / num_episodes,
117 |         },
118 |     ]
119 | 
120 |     zoo_path = os.path.join(root_dir, input_run, "noisy_zoo_observations.json")
121 |     adversary_path = os.path.join(root_dir, input_run, "noisy_adversary_observations.json")
122 |     with open(adversary_path, "r") as fp:
123 |         noisy_obs_against_adv = json.load(fp)
124 | 
125 |     DATAFRAME_KEYS = [
126 |         "env",
127 |         "agent0_type",
128 |         "agent0_path",
129 |         "agent1_type",
130 |         "agent1_path",
131 |         "masking_param",
132 |         "noise_magnitude",
133 |     ]
134 | 
135 |     with open(zoo_path, "r") as fp:
136 |         noisy_obs_against_zoo = json.load(fp)
137 |     noisy_zoo_obs_df = pd.DataFrame(
138 |         [
139 |             process_element_into_flat_dict(el, key_order=DATAFRAME_KEYS)
140 |             for el in noisy_obs_against_zoo
141 |         ]
142 |     )
143 |     noisy_adv_obs_df = pd.DataFrame(
144 |         [
145 |             process_element_into_flat_dict(el, key_order=DATAFRAME_KEYS)
146 |             for el in noisy_obs_against_adv
147 |         ]
148 |     )
149 |     experiment_out_dir = os.path.join(out_dir, input_run)
150 | 
151 |     if not os.path.exists(experiment_out_dir):
152 |         os.mkdir(experiment_out_dir)
153 | 
154 |     for env_name, pretty_env in styles.PRETTY_ENV.items():
155 |         short_env = pretty_env.replace(" ", "")
156 |         if env_name == "multicomp/YouShallNotPassHumans-v0":
157 |             # skip for now as has different victim index, need to fix plotting code
158 |             continue
159 | 
160 |         for zoo_id in range(1, NUM_ZOO_POLICIES[short_env] + 1):
161 |             subset_params = {"agent0_path": str(zoo_id), "env": env_name}
162 | 
163 |             zoo_plot_path = os.path.join(
164 |                 experiment_out_dir, f"{input_run}_ZooBaseline_" f"{short_env}_AgainstZoo{zoo_id}"
165 |             )
166 | 
167 |             adversary_plot_path = os.path.join(
168 |                 experiment_out_dir,
169 |                 f"{input_run}_AdversaryTrained_" f"{short_env}_AgainstZoo{zoo_id}",
170 |             )
171 |             noisy_multiple_opponent_subset_plot(
172 |                 noisy_zoo_obs_df,
173 |                 subset_specs=subset_params,
174 |                 transform_specs=baseline_transformations,
175 |                 savefile=zoo_plot_path,
176 |             )
177 |             noisy_adversary_opponent_subset_plot(
178 |                 noisy_adv_obs_df,
179 |                 subset_specs=subset_params,
180 |                 transform_specs=baseline_transformations,
181 |                 savefile=os.path.join(adversary_plot_path),
182 |             )
183 | 
184 | 
185 | def main():
186 |     observer = FileStorageObserver(osp.join("data", "sacred", "plot_noisy_observations"))
187 |     plot_noisy_obs_exp.observers.append(observer)
188 |     plot_noisy_obs_exp.run_commandline()
189 | 
190 | 
191 | if __name__ == "__main__":
192 |     main()
193 | 


--------------------------------------------------------------------------------
/src/aprl/visualize/scores.py:
--------------------------------------------------------------------------------
  1 | from distutils.dir_util import copy_tree
  2 | import logging
  3 | import os.path
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import pandas as pd
  7 | from sacred import Experiment
  8 | from sacred.observers import FileStorageObserver
  9 | 
 10 | from aprl.visualize import styles as vis_styles
 11 | from aprl.visualize import util
 12 | 
 13 | logger = logging.getLogger("aprl.visualize.scores")
 14 | visualize_score_ex = Experiment("visualize_score")
 15 | 
 16 | 
 17 | @visualize_score_ex.capture
 18 | def heatmap_opponent(single_env, cmap, row_starts, row_ends, col_ends):
 19 |     xlabel = single_env.name in col_ends
 20 |     ylabel = single_env.name in row_starts
 21 |     cbar = single_env.name in row_ends
 22 |     return util.heatmap_one_col(
 23 |         single_env, col="Opponent Win", cmap=cmap, xlabel=xlabel, ylabel=ylabel, cbar=cbar
 24 |     )
 25 | 
 26 | 
 27 | def _make_old_paths(timestamped_path, **kwargs):
 28 |     """Paths in traditional format, before refactoring multi.score.
 29 | 
 30 |     Specifically, expects a timestamped directory containing `adversary_transfer.json`.
 31 |     In the same directory as the timestamped directory, there should be `fixed_baseline.json` and
 32 |     `zoo_baseline.json`.
 33 |     """
 34 |     score_dir = os.path.dirname(timestamped_path)
 35 |     paths = [
 36 |         os.path.join(timestamped_path, "adversary_transfer.json"),
 37 |         os.path.join(score_dir, "fixed_baseline.json"),
 38 |         os.path.join(score_dir, "zoo_baseline.json"),
 39 |     ]
 40 |     return [dict(path=path, **kwargs) for path in paths]
 41 | 
 42 | 
 43 | SMALL_SCORE_PATHS = _make_old_paths(
 44 |     os.path.join("normal", "2019-05-05T18:12:24+00:00")
 45 | ) + _make_old_paths(
 46 |     os.path.join("victim_masked_init", "2019-05-05T18:12:24+00:00"), victim_suffix="M"
 47 | )
 48 | DEFENSE_SCORE_PATHS = [
 49 |     {"path": os.path.join("defenses", "normal.json")},
 50 |     {"path": os.path.join("defenses", "victim_masked_init.json"), "victim_suffix": "M"},
 51 | ]
 52 | 
 53 | 
 54 | @visualize_score_ex.config
 55 | def default_config():
 56 |     score_root = os.path.join("data", "aws", "score_agents")
 57 |     score_paths = _make_old_paths(os.path.join("normal", "2019-05-05T18:12:24+00:00"))
 58 | 
 59 |     command = util.heatmap_full
 60 |     styles = ["paper", "a4"]
 61 |     palette = "Blues"
 62 |     publication = False
 63 | 
 64 |     fig_dir = os.path.join("data", "figs", "scores")
 65 | 
 66 |     seed = 0  # we don't use it for anything, but stop config changing each time as we version it
 67 | 
 68 |     _ = locals()  # quieten flake8 unused variable warning
 69 |     del _
 70 | 
 71 | 
 72 | @visualize_score_ex.named_config
 73 | def full_masked_config():
 74 |     score_paths = (  # noqa: F841
 75 |         _make_old_paths(
 76 |             os.path.join("normal", "2019-05-05T18:12:24+00:00"),
 77 |             victim_suffix="N",
 78 |             opponent_suffix="N",
 79 |         )
 80 |         + _make_old_paths(
 81 |             os.path.join("victim_masked_init", "2019-05-05T18:12:24+00:00"),
 82 |             victim_suffix="BI",
 83 |             opponent_suffix="N",
 84 |         )
 85 |         + _make_old_paths(
 86 |             os.path.join("victim_masked_zero", "2019-05-05T18:12:24+00:00"),
 87 |             victim_suffix="BZ",
 88 |             opponent_suffix="N",
 89 |         )
 90 |         + [
 91 |             {
 92 |                 "path": os.path.join(
 93 |                     "adversary_masked_init", "2019-05-05T18:12:24+00:00", "adversary_transfer.json"
 94 |                 ),
 95 |                 "victim_suffix": "N",
 96 |                 "opponent_suffix": "BI",
 97 |             }
 98 |         ]
 99 |     )
100 | 
101 | 
102 | @visualize_score_ex.named_config
103 | def paper_config():
104 |     score_paths = SMALL_SCORE_PATHS
105 | 
106 |     styles = ["paper", "scores", "scores_twocol"]
107 |     row_starts = ["multicomp/KickAndDefend-v0", "multicomp/SumoHumansAutoContact-v0"]
108 |     row_ends = ["multicomp/YouShallNotPassHumans-v0", "multicomp/SumoAntsAutoContact-v0"]
109 |     col_ends = ["multicomp/SumoHumansAutoContact-v0", "multicomp/SumoAntsAutoContact-v0"]
110 |     command = heatmap_opponent
111 |     publication = True
112 | 
113 |     fig_dir = os.path.expanduser("~/dev/adversarial-policies-paper/figs/scores_single")
114 | 
115 |     _ = locals()  # quieten flake8 unused variable warning
116 |     del _
117 | 
118 | 
119 | @visualize_score_ex.named_config
120 | def supplementary_config():
121 |     score_paths = SMALL_SCORE_PATHS
122 | 
123 |     styles = ["paper", "scores", "scores_monolithic"]
124 |     publication = True
125 | 
126 |     fig_dir = os.path.expanduser("~/dev/adversarial-policies-paper/figs/scores")
127 | 
128 |     _ = locals()  # quieten flake8 unused variable warning
129 |     del _
130 | 
131 | 
132 | @visualize_score_ex.named_config
133 | def defense_paper_config():
134 |     score_paths = DEFENSE_SCORE_PATHS
135 |     styles = ["paper", "scores", "scores_twocol"]
136 |     row_starts = []
137 |     row_ends = ["multicomp/YouShallNotPassHumans-v0"]
138 |     col_ends = []
139 |     command = heatmap_opponent
140 |     publication = True
141 | 
142 |     fig_dir = os.path.expanduser("~/dev/adversarial-policies-paper/figs/scores_defense_single")
143 | 
144 |     _ = locals()  # quieten flake8 unused variable warning
145 |     del _
146 | 
147 | 
148 | @visualize_score_ex.named_config
149 | def defense_supplementary_config():
150 |     score_paths = DEFENSE_SCORE_PATHS
151 |     # can use short as currently just YSNP environment
152 |     styles = ["paper", "scores", "scores_monolithic_short"]
153 |     publication = True
154 | 
155 |     fig_dir = os.path.expanduser("~/dev/adversarial-policies-paper/figs/scores_defense")
156 | 
157 |     _ = locals()  # quieten flake8 unused variable warning
158 |     del _
159 | 
160 | 
161 | @visualize_score_ex.named_config
162 | def poster_config():
163 |     score_paths = DEFENSE_SCORE_PATHS
164 | 
165 |     styles = ["poster", "scores_poster_threecol"]
166 |     row_starts = ["multicomp/KickAndDefend-v0"]
167 |     row_ends = ["multicomp/YouShallNotPassHumans-v0"]
168 |     col_ends = [
169 |         "multicomp/KickAndDefend-v0",
170 |         "multicomp/SumoHumansAutoContact-v0",
171 |         "multicomp/YouShallNotPassHumans-v0",
172 |     ]
173 |     command = heatmap_opponent
174 |     publication = True
175 | 
176 |     fig_dir = os.path.expanduser("~/dev/adversarial-policies-paper/figs/scores_poster")
177 | 
178 |     _ = locals()  # quieten flake8 unused variable warning
179 |     del _
180 | 
181 | 
182 | @visualize_score_ex.main
183 | def visualize_score(command, styles, palette, publication, fig_dir, score_root, score_paths):
184 |     datasets = [
185 |         util.load_datasets(
186 |             os.path.join(score_root, item["path"]),
187 |             victim_suffix=item.get("victim_suffix", ""),
188 |             opponent_suffix=item.get("opponent_suffix", ""),
189 |         )
190 |         for item in score_paths
191 |     ]
192 |     dataset = pd.concat(datasets)
193 | 
194 |     for style in styles:
195 |         plt.style.use(vis_styles.STYLES[style])
196 | 
197 |     suptitle = not publication
198 |     combine = not publication
199 |     generator = util.apply_per_env(dataset, command, suptitle=suptitle, cmap=palette)
200 |     for out_path in util.save_figs(fig_dir, generator, combine=combine):
201 |         visualize_score_ex.add_artifact(filename=out_path)
202 | 
203 |     for observer in visualize_score_ex.observers:
204 |         if hasattr(observer, "dir"):
205 |             logger.info(f"Copying from {observer.dir} to {fig_dir}")
206 |             copy_tree(observer.dir, fig_dir)
207 |             break
208 | 
209 | 
210 | def main():
211 |     observer = FileStorageObserver(os.path.join("data", "sacred", "visualize_score"))
212 |     visualize_score_ex.observers.append(observer)
213 |     visualize_score_ex.run_commandline()
214 | 
215 | 
216 | if __name__ == "__main__":
217 |     main()
218 | 


--------------------------------------------------------------------------------
/src/aprl/visualize/styles.py:
--------------------------------------------------------------------------------
 1 | PRETTY_ENV = {
 2 |     "multicomp/KickAndDefend-v0": "Kick and Defend",
 3 |     "multicomp/SumoAntsAutoContact-v0": "Sumo Ants",
 4 |     "multicomp/SumoAnts-v0": "Sumo Ants",
 5 |     "multicomp/SumoHumansAutoContact-v0": "Sumo Humans",
 6 |     "multicomp/SumoHumans-v0": "Sumo Humans",
 7 |     "multicomp/YouShallNotPassHumans-v0": "You Shall Not Pass",
 8 | }
 9 | 
10 | PRETTY_LABELS = {
11 |     "Adv": "Adversary (Adv)",
12 |     "Zoo": "Normal (Zoo)",
13 |     "Rand": "Random (Rand)",
14 |     "Zero": "Zero",
15 | }
16 | 
17 | STYLES = {
18 |     "paper": {
19 |         "figure.figsize": (5.5, 7.5),
20 |         "font.family": "serif",
21 |         "font.serif": "Times New Roman",
22 |         "font.weight": "bold",
23 |         "font.size": 9,
24 |         "legend.fontsize": 9,
25 |         "axes.unicode_minus": False,  # workaround bug with Unicode minus signs not appearing
26 |         "axes.titlesize": 9,
27 |         "axes.labelsize": 9,
28 |         "xtick.labelsize": 9,
29 |         "ytick.labelsize": 9,
30 |     },
31 |     "slides": {"figure.figsize": (9.32, 3)},
32 |     "slides_density": {"figure.figsize": (5, 3)},
33 |     "poster": {
34 |         "font.family": "sans-serif",
35 |         "font.sans-serif": "Arial",
36 |         "font.weight": "bold",
37 |         "font.size": 14,
38 |         "legend.fontsize": 14,
39 |         "axes.titlesize": 14,
40 |         "axes.labelsize": 14,
41 |         "xtick.labelsize": 14,
42 |         "ytick.labelsize": 14,
43 |     },
44 |     "monolithic": {"figure.figsize": (5.5, 2.0625)},
45 |     "twocol": {"figure.figsize": (2.75, 2.0625)},
46 |     "threecol": {"figure.figsize": (1.83, 1.7)},
47 |     "scores": {"font.size": 8, "ytick.labelsize": 8, "xtick.labelsize": 8},
48 |     "scores_monolithic": {"figure.figsize": (5.5, 1.4)},
49 |     "scores_monolithic_short": {"figure.figsize": (5.5, 1.3)},
50 |     "scores_twocol": {"figure.figsize": (2.7, 1.61)},
51 |     "scores_threecol": {"figure.figsize": (1.76, 1.6)},
52 |     "density_twocol": {"figure.figsize": (2.7, 2.0625), "legend.fontsize": 8},
53 |     "scores_poster_threecol": {"figure.figsize": (5.15, 3.1)},
54 |     "a4": {"figure.figsize": (8.27, 11.69)},
55 | }
56 | 


--------------------------------------------------------------------------------
/src/aprl/visualize/tb.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import fnmatch
 3 | import functools
 4 | import itertools
 5 | import json
 6 | import logging
 7 | import multiprocessing
 8 | import os
 9 | import traceback
10 | 
11 | import tensorflow as tf
12 | 
13 | logger = logging.getLogger("aprl.visualize.tb")
14 | 
15 | 
16 | def find_tfevents(log_dir):
17 |     result = []
18 |     for root, dirs, files in os.walk(log_dir, followlinks=True):
19 |         if root.endswith("rl/tb"):
20 |             for name in files:
21 |                 if fnmatch.fnmatch(name, "events.out.tfevents.*"):
22 |                     result.append(os.path.join(root, name))
23 |     return result
24 | 
25 | 
26 | def exp_root_from_event(event_path):
27 |     # tb_dirname = ...experiment/data/baselines/TIMESTAMP/rl/tb/events.*
28 |     # exp_root = ...experiment/
29 |     return os.path.sep.join(event_path.split(os.path.sep)[:-6])
30 | 
31 | 
32 | def read_events_file(events_filename, keys=None):
33 |     events = []
34 |     try:
35 |         for event in tf.train.summary_iterator(events_filename):
36 |             row = {"wall_time": event.wall_time, "step": event.step}
37 |             for value in event.summary.value:
38 |                 if keys is not None and value.tag not in keys:
39 |                     continue
40 |                 row[value.tag] = value.simple_value
41 |             events.append(row)
42 |     except Exception:  # noqa:B902
43 |         logger.error(f"While reading '{events_filename}': {traceback.print_exc()}")
44 |     return events
45 | 
46 | 
47 | def read_sacred_config(exp_root, kind):
48 |     sacred_config_path = os.path.join(exp_root, "data", "sacred", kind, "1", "config.json")
49 |     with open(sacred_config_path, "r") as f:
50 |         return json.load(f)
51 | 
52 | 
53 | def load_tb_data(log_dir, keys=None):
54 |     event_paths = find_tfevents(log_dir)
55 | 
56 |     pool = multiprocessing.Pool()
57 |     events_by_path = pool.map(functools.partial(read_events_file, keys=keys), event_paths)
58 | 
59 |     events_by_dir = {}
60 |     for event_path, events in zip(event_paths, events_by_path):
61 |         exp_root = exp_root_from_event(event_path)
62 |         if exp_root not in events_by_dir:
63 |             events_by_dir[exp_root] = []
64 |         events_by_dir[exp_root] += events
65 | 
66 |     config_by_dir = {
67 |         dirname: read_sacred_config(dirname, "train") for dirname in events_by_dir.keys()
68 |     }
69 | 
70 |     return config_by_dir, events_by_dir
71 | 
72 | 
73 | def split_by_keys(configs, events, keys):
74 |     res = collections.defaultdict(list)
75 |     for dirname, config in configs.items():
76 |         event = events[dirname]
77 |         cfg_vals = tuple(config[k] for k in keys)
78 |         res[cfg_vals].append({"dir": dirname, "config": config, "events": event})
79 |     return res
80 | 
81 | 
82 | def tb_apply(configs, events, split_keys, fn, **kwargs):
83 |     events_by_plot = split_by_keys(configs, events, split_keys)
84 | 
85 |     pool = multiprocessing.Pool()
86 |     map_fn = functools.partial(fn, **kwargs)
87 |     res = pool.map(map_fn, events_by_plot.items())
88 |     res = itertools.chain(*res)
89 |     return res
90 | 


--------------------------------------------------------------------------------
/tests/SumoAnts_traj/agent_0.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/SumoAnts_traj/agent_0.npz


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/__init__.py


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/old_ppo2/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/old_ppo2/model.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/old_ppo2/obs_rms.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/old_ppo2/obs_rms.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/old_ppo2/ret_rms.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/old_ppo2/ret_rms.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/ppo1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo1/model.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/ppo1/obs_rms.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo1/obs_rms.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/ppo1/ret_rms.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo1/ret_rms.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/ppo2/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo2/model.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/ppo2/obs_rms.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo2/obs_rms.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/ppo2/ret_rms.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo2/ret_rms.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/sac/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/sac/model.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/sac/obs_rms.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/sac/obs_rms.pkl


--------------------------------------------------------------------------------
/tests/dummy_sumo_ants/sac/ret_rms.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/sac/ret_rms.pkl


--------------------------------------------------------------------------------
/tests/policies/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/policies/__init__.py


--------------------------------------------------------------------------------
/tests/test_agents.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from ilqr import iLQR
  3 | import numpy as np
  4 | import pytest
  5 | 
  6 | from aprl.agents.monte_carlo import (
  7 |     MonteCarloParallel,
  8 |     MonteCarloSingle,
  9 |     MujocoResettableWrapper,
 10 |     receding_horizon,
 11 | )
 12 | from aprl.agents.mujoco_lqr import (
 13 |     MujocoFiniteDiffCost,
 14 |     MujocoFiniteDiffDynamicsBasic,
 15 |     MujocoFiniteDiffDynamicsPerformance,
 16 | )
 17 | 
 18 | dynamics_list = [MujocoFiniteDiffDynamicsBasic, MujocoFiniteDiffDynamicsPerformance]
 19 | 
 20 | 
 21 | @pytest.mark.parametrize("dynamics_cls", dynamics_list)
 22 | def test_lqr_mujoco(dynamics_cls):
 23 |     """Smoke test for MujcooFiniteDiff{Dynamics,Cost}.
 24 |     Jupyter notebook experiments/mujoco_control.ipynb has quantitative results
 25 |     attained; for efficiency, we only run for a few iterations here."""
 26 |     env = gym.make("Reacher-v2").unwrapped
 27 |     env.seed(42)
 28 |     env.reset()
 29 |     dynamics = dynamics_cls(env)
 30 |     cost = MujocoFiniteDiffCost(env)
 31 |     N = 10
 32 |     ilqr = iLQR(dynamics, cost, N)
 33 |     x0 = dynamics.get_state()
 34 |     us_init = np.array([env.action_space.sample() for _ in range(N)])
 35 |     xs, us = ilqr.fit(x0, us_init, n_iterations=3)
 36 |     assert x0.shape == xs[0].shape
 37 |     assert xs.shape[0] == N + 1
 38 |     assert us.shape == (N, 2)
 39 |     assert env.action_space.contains(us[0])
 40 | 
 41 | 
 42 | def rollout(env, actions):
 43 |     obs, rews, dones, infos = [], [], [], []
 44 |     for a in actions:
 45 |         ob, rew, done, info = env.step(a)
 46 |         obs.append(ob)
 47 |         rews.append(rew)
 48 |         dones.append(done)
 49 |         infos.append(info)
 50 |     obs = np.array(obs)
 51 |     rews = np.array(rews)
 52 |     dones = np.array(dones)
 53 |     return obs, rews, dones, infos
 54 | 
 55 | 
 56 | def make_mujoco_env(env_name, seed):
 57 |     env = gym.make(env_name)
 58 |     env = MujocoResettableWrapper(env.unwrapped)
 59 |     env.seed(seed)
 60 |     env.reset()
 61 |     return env
 62 | 
 63 | 
 64 | MONTE_CARLO_ENVS = ["Reacher-v2", "HalfCheetah-v2", "Hopper-v2"]
 65 | 
 66 | 
 67 | @pytest.mark.parametrize("env_name", MONTE_CARLO_ENVS)
 68 | def test_mujoco_reset_env(env_name, horizon=10, seed=42):
 69 |     env = make_mujoco_env(env_name, seed)
 70 |     state = env.get_state()
 71 |     actions = [env.action_space.sample() for _ in range(horizon)]
 72 | 
 73 |     first_obs, first_rews, first_dones, _first_infos = rollout(env, actions)
 74 |     env.set_state(state)
 75 |     second_obs, second_rews, second_dones, _second_infos = rollout(env, actions)
 76 | 
 77 |     np.testing.assert_almost_equal(second_obs, first_obs, decimal=5)
 78 |     np.testing.assert_almost_equal(second_rews, first_rews, decimal=5)
 79 |     assert (first_dones == second_dones).all()
 80 | 
 81 | 
 82 | def check_monte_carlo(
 83 |     kind, score_thresholds, total_horizon, planning_horizon, trajectories, seed=42
 84 | ):
 85 |     def f(env_name):
 86 |         # Setup
 87 |         env = make_mujoco_env(env_name, seed)
 88 |         if kind == "single":
 89 |             mc = MonteCarloSingle(env, planning_horizon, trajectories)
 90 |         elif kind == "parallel":
 91 |             env_fns = [lambda: make_mujoco_env(env_name, seed) for _ in range(2)]
 92 |             mc = MonteCarloParallel(env_fns, planning_horizon, trajectories)
 93 |         else:  # pragma: no cover
 94 |             raise ValueError("Unrecognized kind '{}'".format(kind))
 95 |         mc.seed(seed)
 96 | 
 97 |         # Check for side-effects
 98 |         state = env.get_state()
 99 |         _ = mc.best_action(state)
100 |         assert (env.get_state() == state).all(), "Monte Carlo search has side effects"
101 | 
102 |         # One receding horizon rollout of Monte Carlo search
103 |         total_rew = 0
104 |         prev_done = False
105 |         for i, (a, ob, rew, done, info) in enumerate(receding_horizon(mc, env)):
106 |             assert not prev_done, "should terminate if env returns done"
107 |             prev_done = done
108 |             assert env.action_space.contains(a)
109 |             assert env.observation_space.contains(ob)
110 |             total_rew += rew
111 | 
112 |             if i >= total_horizon:
113 |                 break
114 |         assert i == total_horizon or done
115 | 
116 |         # Check it does better than random sequences
117 |         random_rews = []
118 |         for i in range(10):
119 |             env.action_space.np_random.seed(seed + i)
120 |             action_seq = [env.action_space.sample() for _ in range(total_horizon)]
121 |             env.set_state(state)
122 |             _, rews, _, _ = rollout(env, action_seq)
123 |             random_rew = sum(rews)
124 |             random_rews.append(random_rew)
125 |             assert total_rew >= random_rew, "random sequence {}".format(i)
126 |         print(
127 |             f"Random actions on {env_name} for {total_horizon} obtains "
128 |             f"mean {np.mean(random_rews)} s.d. {np.std(random_rews)}"
129 |         )
130 | 
131 |         # Check against pre-defined score threshold
132 |         assert total_rew >= score_thresholds[env_name]
133 | 
134 |         # Cleanup
135 |         if kind == "parallel":
136 |             mc.close()
137 |             with pytest.raises(BrokenPipeError):
138 |                 mc.best_action(state)
139 | 
140 |     return f
141 | 
142 | 
143 | MC_SINGLE_THRESHOLDS = {
144 |     "Reacher-v2": -11,  # tested -9.5, random -17.25 s.d. 1.5
145 |     "HalfCheetah-v2": 19,  # tested 21.6, random -4.2 s.d. 3.7
146 |     "Hopper-v2": 29,  # tested 31.1, random 15.2 s.d. 5.9
147 | }
148 | MC_PARALLEL_THRESHOLDS = {
149 |     "Reacher-v2": -17,  # tested at -15.3; random -25.8 s.d. 1.8
150 |     "HalfCheetah-v2": 33,  # tested at 35.5; random -6.0 s.d. 7.1
151 |     "Hopper-v2": 52,  # tested at 54.7; random 21.1 s.d. 13.2
152 | }
153 | _test_mc_single = check_monte_carlo(
154 |     "single", MC_SINGLE_THRESHOLDS, total_horizon=20, planning_horizon=10, trajectories=100
155 | )
156 | _test_mc_parallel = check_monte_carlo(
157 |     "parallel", MC_PARALLEL_THRESHOLDS, total_horizon=30, planning_horizon=15, trajectories=200
158 | )
159 | test_mc_single = pytest.mark.parametrize("env_name", MONTE_CARLO_ENVS)(_test_mc_single)
160 | test_mc_parallel = pytest.mark.parametrize("env_name", MONTE_CARLO_ENVS)(_test_mc_parallel)
161 | 


--------------------------------------------------------------------------------
/tests/test_common.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | import gym
 5 | 
 6 | from aprl.common.multi_monitor import MultiMonitor
 7 | import aprl.envs  # noqa: F401
 8 | 
 9 | 
10 | def test_multi_monitor():
11 |     """Smoke test for MultiMonitor."""
12 |     env = gym.make("aprl/IteratedMatchingPennies-v0")
13 |     env.seed(42)
14 |     with tempfile.TemporaryDirectory(prefix="test_multi_mon") as d:
15 |         env = MultiMonitor(env, filename=os.path.join(d, "test"))
16 |         for eps in range(5):
17 |             env.reset()
18 |             done = False
19 |             while not done:
20 |                 a = env.action_space.sample()
21 |                 _, _, done, info = env.step(a)
22 |             epinfo = info["episode"]
23 |             assert set(epinfo.keys()) == {"r", "r0", "r1", "l", "t"}
24 | 


--------------------------------------------------------------------------------
/tests/test_envs.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.spaces import Tuple
  3 | import numpy as np
  4 | import pytest
  5 | 
  6 | from aprl.envs import multi_agent
  7 | 
  8 | spec_list = [
  9 |     spec
 10 |     for spec in sorted(gym.envs.registration.registry.all(), key=lambda x: x.id)
 11 |     if spec.id.startswith("aprl/") or spec.id.startswith("multicomp/")
 12 | ]
 13 | 
 14 | 
 15 | def make_env(spec, i=0):
 16 |     env = spec.make()
 17 |     env.seed(42 + i)
 18 |     return env
 19 | 
 20 | 
 21 | @pytest.yield_fixture
 22 | def env_from_spec(spec):
 23 |     env = make_env(spec)
 24 |     yield env
 25 |     env.close()
 26 | 
 27 | 
 28 | def test_envs_exist():
 29 |     assert len(spec_list) > 0, "No aprl environments detected"
 30 | 
 31 | 
 32 | @pytest.mark.parametrize("spec", spec_list)
 33 | def test_random_rollout(env_from_spec):
 34 |     """Based on Gym smoke test in gym.envs.tests.test_envs."""
 35 |     ob = env_from_spec.reset()
 36 |     for _ in range(1000):
 37 |         assert env_from_spec.observation_space.contains(ob)
 38 |         a = env_from_spec.action_space.sample()
 39 |         assert env_from_spec.action_space.contains(a)
 40 |         ob, reward, done, info = env_from_spec.step(a)
 41 |         if done:
 42 |             break
 43 | 
 44 | 
 45 | @pytest.mark.parametrize("spec", spec_list)
 46 | def test_env(env_from_spec):
 47 |     """Based on Gym smoke test in gym.envs.tests.test_envs."""
 48 |     ob_space = env_from_spec.observation_space
 49 |     act_space = env_from_spec.action_space
 50 |     ob = env_from_spec.reset()
 51 |     assert ob_space.contains(ob), "Reset observation: {!r} not in space".format(ob)
 52 |     a = act_space.sample()
 53 |     ob, reward, done, _info = env_from_spec.step(a)
 54 |     assert ob_space.contains(ob), "Step observation: {!r} not in space".format(ob)
 55 |     assert isinstance(done, bool), "Expected {} to be a boolean".format(done)
 56 | 
 57 |     if hasattr(env_from_spec, "num_agents"):  # multi agent environment
 58 |         assert len(reward) == env_from_spec.num_agents
 59 |         assert isinstance(env_from_spec.observation_space, Tuple), "Observations should be Tuples"
 60 |         assert isinstance(env_from_spec.action_space, Tuple), "Actions should be Tuples"
 61 |         assert len(env_from_spec.observation_space.spaces) == env_from_spec.num_agents
 62 |         assert len(env_from_spec.action_space.spaces) == env_from_spec.num_agents
 63 |     else:  # pragma: no cover
 64 |         assert np.isscalar(reward), "{} is not a scalar for {}".format(reward, env_from_spec)
 65 | 
 66 |     for mode in env_from_spec.metadata.get("render.modes", []):
 67 |         env_from_spec.render(mode=mode)
 68 | 
 69 |     # Make sure we can render the environment after close.
 70 |     for mode in env_from_spec.metadata.get("render.modes", []):
 71 |         env_from_spec.render(mode=mode)
 72 | 
 73 | 
 74 | # Test VecMultiEnv classes
 75 | 
 76 | 
 77 | def assert_envs_equal(env1, env2, num_steps, check_info: bool = True):
 78 |     """
 79 |     Compare two environments over num_steps steps and make sure
 80 |     that the observations produced by each are the same when given
 81 |     the same actions.
 82 |     """
 83 |     assert env1.num_envs == env2.num_envs
 84 |     assert env1.observation_space == env2.observation_space
 85 |     assert env1.action_space == env2.action_space
 86 | 
 87 |     try:
 88 |         obs1, obs2 = env1.reset(), env2.reset()
 89 |         assert type(obs1) == type(obs2)
 90 |         # TODO: sample actions sensitive to num_envs.
 91 |         # (Maybe add a helper function to make this easy in VecEnv? Feels like a design flaw.)
 92 | 
 93 |         if isinstance(obs1, tuple):
 94 |             for x, y in zip(obs1, obs2):
 95 |                 assert x.shape == y.shape
 96 |                 assert np.allclose(x, y)
 97 |         else:  # pragma: no cover
 98 |             assert np.array(obs1).shape == np.array(obs2).shape
 99 |             assert np.allclose(obs1, obs2)
100 | 
101 |         if isinstance(env1.action_space, Tuple):
102 |             for space in env1.action_space.spaces:
103 |                 space.np_random.seed(1337)
104 |         else:  # pragma: no cover
105 |             env1.action_space.np_random.seed(1337)
106 | 
107 |         for _ in range(num_steps):
108 |             actions = tuple((env1.action_space.sample() for _ in range(env1.num_envs)))
109 |             actions = multi_agent.tuple_transpose(actions)
110 |             for env in [env1, env2]:
111 |                 env.step_async(actions)
112 |             outs1 = env1.step_wait()
113 |             outs2 = env2.step_wait()
114 |             # Check ob, rew, done; ignore infos
115 |             for out1, out2 in zip(outs1[:3], outs2[:3]):
116 |                 assert np.allclose(out1, out2)
117 |             if check_info:
118 |                 assert list(outs1[3]) == list(outs2[3])
119 |     finally:
120 |         env1.close()
121 |         env2.close()
122 | 
123 | 
124 | @pytest.mark.parametrize("spec", spec_list)
125 | def test_vec_env(spec):
126 |     """Test that our {Dummy,Subproc}VecMultiEnv gives the same results as
127 |     each other."""
128 |     env_fns = [lambda: make_env(spec, i) for i in range(4)]
129 |     venv1 = multi_agent.make_dummy_vec_multi_env(env_fns)
130 |     venv2 = multi_agent.make_subproc_vec_multi_env(env_fns)
131 |     is_multicomp = spec.id.startswith("multicomp/")
132 |     # Can't easily compare info dicts returned by multicomp/ environments, so just skip that check
133 |     assert_envs_equal(venv1, venv2, 100, check_info=not is_multicomp)
134 | 


--------------------------------------------------------------------------------
/vendor/Xdummy-entrypoint:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import argparse
 3 | import os
 4 | import sys
 5 | import subprocess
 6 | 
 7 | parser = argparse.ArgumentParser()
 8 | args, extra_args = parser.parse_known_args()
 9 | subprocess.Popen(["nohup", "Xdummy"], stdout=open('/dev/null', 'w'), stderr=open('/dev/null', 'w'))
10 | os.environ['DISPLAY'] = ':0'
11 | if not extra_args:
12 |     sys.argv = ['/bin/bash']
13 | else:
14 |     sys.argv = extra_args
15 | # Explicitly flush right before the exec since otherwise things might get
16 | # lost in Python's buffers around stdout/stderr (!).
17 | sys.stdout.flush()
18 | sys.stderr.flush()
19 | os.execvpe(sys.argv[0], sys.argv, os.environ)
20 | 
21 | 


--------------------------------------------------------------------------------