├── .circleci └── config.yml ├── .codecov.yml ├── .codespell.skip ├── .coveragerc ├── .dockerignore ├── .gitattributes ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── ci ├── build_venv.sh ├── code_checks.sh ├── local_tests.sh └── run_tests.sh ├── experiments ├── __init__.py ├── benchmark_vec_env.sh ├── build_and_run.sh ├── common.sh ├── modelfree │ ├── attack_transfer.sh │ ├── baselines.sh │ ├── common.sh │ ├── dec2018replication.sh │ ├── defenses.sh │ ├── density.sh │ ├── highest_win_rate.py │ ├── noisy_actions_and_obs.sh │ └── plot.sh ├── planning │ ├── __init__.py │ ├── common.py │ ├── mujoco_control-halfcheetah.ipynb │ ├── mujoco_control-hopper.ipynb │ ├── mujoco_control-ilqrpr.ipynb │ ├── mujoco_control.ipynb │ ├── mujoco_inverteddoublependulum.ipynb │ ├── mujoco_invertedpendulum.ipynb │ ├── mujoco_mc.ipynb │ ├── mujoco_reacher.ipynb │ └── mujoco_swimmer.ipynb ├── pull_public_s3.sh ├── pull_s3.sh ├── push_public_s3.sh ├── remote_build_and_run.sh └── run_docker.sh ├── pyproject.toml ├── requirements-build.txt ├── requirements-dev.txt ├── requirements.txt ├── scripts ├── aws │ ├── cloudwatch.sh │ └── termination.py ├── doubleblind.sh ├── grab_frame.py └── incomplete_experiments.py ├── setup.cfg ├── setup.py ├── src └── aprl │ ├── __init__.py │ ├── activations │ ├── __init__.py │ ├── density │ │ ├── __init__.py │ │ ├── fit_density.py │ │ ├── pipeline.py │ │ └── visualize.py │ ├── generate_activations.py │ └── tsne │ │ ├── __init__.py │ │ ├── fit_model.py │ │ ├── pipeline.py │ │ └── visualize.py │ ├── agents │ ├── __init__.py │ ├── monte_carlo.py │ └── mujoco_lqr.py │ ├── common │ ├── __init__.py │ ├── mujoco.py │ ├── multi_monitor.py │ └── utils.py │ ├── configs │ ├── .gitignore │ ├── __init__.py │ ├── multi │ │ ├── __init__.py │ │ ├── common.py │ │ ├── score.py │ │ └── train.py │ ├── noise │ │ ├── SumoHumans-cond.json │ │ ├── SumoHumans.json │ │ └── default.json │ ├── ray │ │ ├── aws.yaml │ │ └── baremetal.yaml │ └── rew │ │ ├── Humanoid.json │ │ ├── HumanoidStand.json │ │ ├── SumoHumans-cond.json │ │ └── default.json │ ├── envs │ ├── __init__.py │ ├── crowded_line.py │ ├── gym_compete.py │ ├── matrix_game.py │ ├── mujoco_costs.py │ ├── multi_agent.py │ ├── observation_masking.py │ ├── sumo_auto_contact.py │ └── wrappers.py │ ├── multi │ ├── __init__.py │ ├── common.py │ ├── common_worker.py │ ├── score.py │ ├── score_worker.py │ ├── train.py │ └── train_worker.py │ ├── policies │ ├── __init__.py │ ├── base.py │ ├── loader.py │ ├── transparent.py │ └── wrappers.py │ ├── score_agent.py │ ├── train.py │ ├── training │ ├── __init__.py │ ├── embedded_agents.py │ ├── gail_dataset.py │ ├── logger.py │ ├── lookback.py │ ├── scheduling.py │ └── shaping_wrappers.py │ └── visualize │ ├── __init__.py │ ├── annotated_gym_compete.py │ ├── benchmark_ffmpeg.sh │ ├── compress_videos.sh │ ├── generate_website_json.py │ ├── make_videos.py │ ├── noisy_observations.py │ ├── scores.py │ ├── styles.py │ ├── tb.py │ ├── training.py │ └── util.py ├── tests ├── SumoAnts_traj │ └── agent_0.npz ├── __init__.py ├── dummy_sumo_ants │ ├── old_ppo2 │ │ ├── model.pkl │ │ ├── obs_rms.pkl │ │ └── ret_rms.pkl │ ├── ppo1 │ │ ├── model.pkl │ │ ├── obs_rms.pkl │ │ └── ret_rms.pkl │ ├── ppo2 │ │ ├── model.pkl │ │ ├── obs_rms.pkl │ │ └── ret_rms.pkl │ └── sac │ │ ├── model.pkl │ │ ├── obs_rms.pkl │ │ └── ret_rms.pkl ├── policies │ ├── __init__.py │ └── test_wrappers.py ├── test_agents.py ├── test_common.py ├── test_envs.py └── test_experiments.py └── vendor ├── Xdummy └── Xdummy-entrypoint /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | orbs: 4 | codecov: codecov/codecov@1.0.5 5 | 6 | executors: 7 | my-executor: 8 | docker: 9 | - image: humancompatibleai/adversarial_policies:base 10 | auth: 11 | username: $DOCKERHUB_USERNAME 12 | password: $DOCKERHUB_PASSWORD 13 | working_directory: /adversarial-policies 14 | environment: 15 | # If you change these, also change scripts/code_checks.sh 16 | SRC_FILES: src/ tests/ setup.py 17 | TYPECHECK_FILES: src/ tests/ setup.py 18 | 19 | commands: 20 | dependencies: 21 | # You must still manually update the Docker image if any 22 | # binary (non-Python) dependencies change. 23 | description: "Check out and update Python dependencies." 24 | steps: 25 | - checkout 26 | # Download and cache dependencies 27 | - restore_cache: 28 | keys: 29 | - v2-dependencies-{{ checksum "requirements-build.txt" }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements-dev.txt" }} 30 | 31 | - run: 32 | name: install dependencies 33 | # MUJOCO_KEY is defined in a CircleCI context 34 | # Do some sanity checks to make sure key works 35 | command: "[[ -d /venv ]] || USE_MPI=True ./ci/build_venv.sh /venv" 36 | 37 | - save_cache: 38 | paths: 39 | - /venv 40 | key: v2-dependencies-{{ checksum "requirements-build.txt" }}-{{ checksum "requirements.txt" }}-{{ checksum "requirements-dev.txt" }} 41 | 42 | - run: 43 | name: install adversarial-policies 44 | # Build a wheel then install to avoid copying whole directory (pip issue #2195) 45 | command: | 46 | python setup.py sdist bdist_wheel 47 | pip install --upgrade --force-reinstall dist/aprl-*.whl 48 | 49 | jobs: 50 | lintandtype: 51 | executor: my-executor 52 | 53 | steps: 54 | - dependencies 55 | - run: 56 | name: flake8 57 | command: flake8 ${SRC_FILES} 58 | 59 | - run: 60 | name: black 61 | command: black --check ${SRC_FILES} 62 | 63 | - run: 64 | name: codespell 65 | command: codespell -I .codespell.skip --skip='*.pyc,*.pkl,*.npz' ${SRC_FILES} 66 | 67 | - run: 68 | name: pytype 69 | command: pytype ${TYPECHECK_FILES} 70 | 71 | unit-test: 72 | executor: my-executor 73 | parallelism: 3 74 | steps: 75 | - dependencies 76 | 77 | - run: 78 | name: Memory Monitor 79 | command: | 80 | mkdir /tmp/resource-usage 81 | export FILE=/tmp/resource-usage/memory.txt 82 | while true; do 83 | ps -u root eo pid,%cpu,%mem,args,uname --sort=-%mem >> $FILE 84 | echo "----------" >> $FILE 85 | sleep 1 86 | done 87 | background: true 88 | 89 | - run: 90 | name: Headless X Server 91 | command: nohup Xdummy 92 | background: true 93 | 94 | - run: 95 | name: run tests 96 | command: | 97 | export DISPLAY=:0 98 | pytest --cov=/venv/lib/python3.7/site-packages/aprl --cov=tests \ 99 | --junitxml=/tmp/test-reports/junit.xml \ 100 | --shard-id=${CIRCLE_NODE_INDEX} --num-shards=${CIRCLE_NODE_TOTAL} \ 101 | -vv tests/ 102 | mv .coverage .coverage.aprl 103 | coverage combine # rewrite paths from virtualenv to src/ 104 | - codecov/upload 105 | 106 | - store_artifacts: 107 | path: /tmp/test-reports 108 | destination: test-reports 109 | - store_test_results: 110 | path: /tmp/test-reports 111 | unit-test: 112 | - store_artifacts: 113 | path: /tmp/resource-usage 114 | destination: resource-usage 115 | 116 | workflows: 117 | version: 2 118 | test: 119 | jobs: 120 | - lintandtype: 121 | context: 122 | - MuJoCo 123 | - docker-hub-creds 124 | - unit-test: 125 | context: 126 | - MuJoCo 127 | - docker-hub-creds 128 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: false 5 | main: 6 | paths: 7 | - "src/" 8 | - "!src/imitation/envs/examples/" 9 | - "!src/imitation/scripts/" 10 | auxiliary: 11 | target: 0% 12 | paths: 13 | - "src/aprl/configs/" 14 | tests: 15 | # Should not have dead code in our tests 16 | target: 100% 17 | paths: 18 | - "tests/" 19 | patch: 20 | default: false 21 | main: 22 | paths: 23 | - "src/" 24 | - "!src/imitation/envs/examples/" 25 | - "!src/imitation/scripts/" 26 | auxiliary: 27 | paths: 28 | - "src/aprl/beta/" 29 | - "src/aprl/configs/" 30 | tests: 31 | target: 100% 32 | paths: 33 | - "tests/" 34 | -------------------------------------------------------------------------------- /.codespell.skip: -------------------------------------------------------------------------------- 1 | ith 2 | iff 3 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | include=src/* 3 | 4 | [paths] 5 | aprl_source = 6 | src/aprl 7 | *venv/lib/python*/site-packages/aprl 8 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .gitignore -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-vendored 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Data files 2 | data 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | **/*.pyc 7 | *.py[cod] 8 | *$py.class 9 | 10 | # Vim swap files 11 | *.swp 12 | *.swo 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # PyBuilder 61 | target/ 62 | 63 | # Jupyter Notebook 64 | .ipynb_checkpoints 65 | 66 | # IPython 67 | profile_default/ 68 | ipython_config.py 69 | 70 | # pyenv 71 | .python-version 72 | 73 | # Environments 74 | .env 75 | .venv 76 | env/ 77 | *venv/ 78 | ENV/ 79 | env.bak/ 80 | venv.bak/ 81 | 82 | # Spyder project settings 83 | .spyderproject 84 | .spyproject 85 | 86 | # Rope project settings 87 | .ropeproject 88 | 89 | # mkdocs documentation 90 | /site 91 | 92 | # mypy 93 | .mypy_cache/ 94 | .dmypy.json 95 | dmypy.json 96 | 97 | # Type checking 98 | .pyre/ 99 | .pytype/ 100 | 101 | # IntelliJ/PyCharm 102 | .idea/ 103 | 104 | # MuJoCo 105 | MUJOCO_LOG.TXT 106 | 107 | # Mac 108 | *.DS_Store 109 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Based on OpenAI's mujoco-py Dockerfile 2 | 3 | ARG USE_MPI=True 4 | 5 | # base stage contains just binary dependencies. 6 | # This is used in the CI build. 7 | FROM nvidia/cuda:10.0-runtime-ubuntu18.04 AS base 8 | ARG USE_MPI 9 | ARG DEBIAN_FRONTEND=noninteractive 10 | 11 | RUN echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections \ 12 | && apt-get update -q \ 13 | && apt-get install -y --no-install-recommends \ 14 | build-essential \ 15 | curl \ 16 | ffmpeg \ 17 | git \ 18 | libgl1-mesa-dev \ 19 | libgl1-mesa-glx \ 20 | libglew-dev \ 21 | libosmesa6-dev \ 22 | net-tools \ 23 | parallel \ 24 | patchelf \ 25 | python3.7 \ 26 | python3.7-dev \ 27 | python3-pip \ 28 | rsync \ 29 | software-properties-common \ 30 | unzip \ 31 | vim \ 32 | virtualenv \ 33 | xpra \ 34 | xserver-xorg-dev \ 35 | ttf-mscorefonts-installer \ 36 | && apt-get clean \ 37 | && rm -rf /var/lib/apt/lists/* 38 | 39 | ENV LANG C.UTF-8 40 | 41 | RUN mkdir -p /root/.mujoco \ 42 | && curl -o mjpro150.zip https://www.roboti.us/download/mjpro150_linux.zip \ 43 | && unzip mjpro150.zip -d /root/.mujoco \ 44 | && rm mjpro150.zip \ 45 | && curl -o mujoco131.zip https://www.roboti.us/download/mjpro131_linux.zip \ 46 | && unzip mujoco131.zip -d /root/.mujoco \ 47 | && rm mujoco131.zip \ 48 | && curl -o /root/.mujoco/mjkey.txt https://www.roboti.us/file/mjkey.txt 49 | 50 | COPY vendor/Xdummy /usr/local/bin/Xdummy 51 | RUN chmod +x /usr/local/bin/Xdummy 52 | 53 | RUN if [ $USE_MPI = "True" ]; then \ 54 | add-apt-repository --yes ppa:marmistrz/openmpi \ 55 | && apt-get update -q \ 56 | && apt-get install -y libopenmpi3 libopenmpi-dev \ 57 | && apt-get clean \ 58 | && rm -rf /var/lib/apt/lists/*; \ 59 | fi 60 | 61 | # Set the PATH to the venv before we create the venv, so it's visible in base. 62 | # This is since we may create the venv outside of Docker, e.g. in CI 63 | # or by binding it in for local development. 64 | ENV PATH="/venv/bin:$PATH" 65 | ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:${LD_LIBRARY_PATH} 66 | 67 | # python-req stage contains Python venv, but not code. 68 | # It is useful for development purposes: you can mount 69 | # code from outside the Docker container. 70 | FROM base as python-req 71 | ARG USE_MPI 72 | 73 | WORKDIR /adversarial-policies 74 | # Copy over just requirements.txt at first. That way, the Docker cache doesn't 75 | # expire until we actually change the requirements. 76 | COPY ./requirements-build.txt /adversarial-policies/ 77 | COPY ./requirements.txt /adversarial-policies/ 78 | COPY ./requirements-dev.txt /adversarial-policies/ 79 | COPY ./ci/build_venv.sh /adversarial-policies/ci/build_venv.sh 80 | RUN ci/build_venv.sh /venv && rm -rf $HOME/.cache/pip 81 | 82 | # full stage contains everything. 83 | # Can be used for deployment and local testing. 84 | FROM python-req as full 85 | 86 | # Delay copying (and installing) the code until the very end 87 | COPY . /adversarial-policies 88 | # Build a wheel then install to avoid copying whole directory (pip issue #2195) 89 | RUN python3 setup.py sdist bdist_wheel 90 | RUN pip install --upgrade dist/aprl-*.whl 91 | 92 | # Default entrypoints 93 | ENTRYPOINT ["/adversarial-policies/vendor/Xdummy-entrypoint"] 94 | CMD ["ci/run_tests.sh"] 95 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Adam Gleave 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CircleCI](https://circleci.com/gh/HumanCompatibleAI/adversarial-policies.svg?style=svg)](https://circleci.com/gh/HumanCompatibleAI/adversarial-policies) 2 | [![codecov](https://codecov.io/gh/HumanCompatibleAI/adversarial-policies/branch/master/graph/badge.svg)](https://codecov.io/gh/HumanCompatibleAI/adversarial-policies) 3 | 4 | Codebase to train, evaluate and analyze adversarial policies: policies attacking a fixed victim 5 | agent in a multi-agent system. See [paper](https://arxiv.org/abs/1905.10615) for more information. 6 | 7 | # Installation 8 | 9 | The easiest way to install the code is to build the Docker image in the `Dockerfile`. 10 | This will install all necessary binary and Python dependencies. Build the image by: 11 | 12 | ```bash 13 | $ docker build . 14 | ``` 15 | 16 | You can also pull a Docker image for the latest master commit from 17 | `humancompatibleai/adversarial_policies:latest`. Once you have built the image, run it by: 18 | 19 | ```bash 20 | docker run -it --env MUJOCO_KEY=URL_TO_YOUR_MUJOCO_KEY \ 21 | humancompatibleai/adversarial_policies:latest /bin/bash # change tag if built locally 22 | ``` 23 | 24 | If you want to run outside of Docker (for example, for ease of development), read on. 25 | 26 | This codebase uses Python 3.7. The main binary dependencies are MuJoCo (version 1.3.1, for 27 | `gym_compete` environments, and 2.0 for the others). You may also need to install some other 28 | libraries, such as OpenMPI. 29 | 30 | Create a virtual environment by running `ci/build_venv.sh`. Activate it 31 | by `. ./venv/bin/activate`. Finally, run `pip install -e .` to install 32 | an editable version of this package. 33 | 34 | # Reproducing Results 35 | 36 | Note we use [Sacred](https://github.com/IDSIA/sacred) for 37 | experiment configuration. 38 | 39 | ## Training adversarial policies 40 | 41 | `aprl.train` trains a single adversarial policy. By default it will train on `SumoAnts` for 42 | a brief period of time. You can override any of config parameters, defined in `train_config`, at 43 | the command line. For example, to replicate one of the experiments in the paper, run: 44 | 45 | ```bash 46 | # Train on Sumo Humans for 20M timesteps 47 | python -m aprl.train with env_name=multicomp/SumoHumans-v0 paper 48 | ``` 49 | 50 | `aprl.multi.train` trains multiple adversarial policies, using Ray (see below) for 51 | parallelization. To replicate the results in the paper (there may be slight differences due to 52 | randomness not captured in the seeding), run `python -m aprl.multi.train with paper`. To run 53 | the hyperparameter sweep, run `python -m aprl.multi.train with hyper`. 54 | 55 | You can find results from our training run on s3://adversarial-policies-public/multi_train/paper. 56 | This includes TensorBoard logs, final model weights, checkpoints, and individual policy configs. 57 | Run `experiments/pull_public_s3.sh` to sync this and other data to `data/aws-public/`. 58 | 59 | ## Evaluating adversarial policies 60 | 61 | `aprl.score_agent` evaluates a pair of policies, for example an adversary and a victim. 62 | It outputs the win rate for each agent and the number of ties. It can also render to the screen 63 | or produce videos. 64 | 65 | We similarly use `aprl.multi.score` to evaluate multiple pairs of policies in parallel. 66 | To reproduce all the evaluations used in the paper, run the following bash scripts, which call 67 | `aprl.multi.score` internally: 68 | - `experiments/modelfree/baselines.sh`: fixed baselines (no adversarial policies). 69 | - `experiments/modelfree/attack_transfer.sh `. To use our 70 | pre-trained policies, use the path `data/aws-public/multi_train/paper/20190429_011349` 71 | after syncing against S3. 72 | 73 | ## Visualizing Results 74 | 75 | Most of the visualization code lives in the `aprl.visualize` package. To reproduce the figures 76 | in the paper, use `paper_config`; for those in the appendix, use `supplementary_config`. So: 77 | 78 | ```bash 79 | python -m aprl.visualize.scores with paper_config # heatmaps in the paper 80 | python -m aprl.visualize.training with supplementary_config # training curves in appendix 81 | ``` 82 | 83 | To re-generate all the videos, use `aprl.visualize.make_videos`. We would recommend running 84 | in Docker, in which case it will render using `Xdummy`. This avoids rendering issues with many 85 | graphics drivers. 86 | 87 | Note you will likely need to change the default paths in the config to point at your evaluation 88 | results from the previous section, and desired output directory. For example: 89 | 90 | ```bash 91 | python -m aprl.visualize.scores with tb_dir= \ 92 | transfer_path= 93 | python -m aprl.visualize.make_videos with adversary_path= 94 | ``` 95 | 96 | ## Additional Analysis 97 | 98 | The density modeling can be run by `experiments/aprl/density.sh`, or with custom 99 | configurations via `aprl.density.pipeline`. 100 | 101 | The t-SNE visualizations can be replicated with `aprl.tsne.pipeline`. 102 | 103 | ## Using Ray 104 | 105 | Many of the experiments are computationally intensive. You can run them on a single machine, but it 106 | might take several weeks. We use [Ray](https://github.com/ray-project/ray) to run distributed 107 | experiments. We include example configs in `src/aprl/configs/ray/`. To use `aws.yaml` you 108 | will need to, at a minimum, edit the config to use your own AMI (anything with Docker should work) 109 | and private key. Then just run `ray up ` and it will start a cluster. SSH into the 110 | head node, start a shell in Docker, and then follow the above instructions. The script should 111 | automatically detect it is part of a Ray cluster and run on the existing Ray server, rather than 112 | starting a new one. 113 | 114 | # Contributions 115 | 116 | The codebase follows PEP8, with a 100-column maximum line width. Docstrings should be in reST. 117 | 118 | Please run the `ci/code_checks.sh` before committing. This runs several linting steps. 119 | These are also run as a continuous integration check. 120 | 121 | I like to use Git commit hooks to prevent bad commits from happening in the first place: 122 | ```bash 123 | ln -s ../../ci/code_checks.sh .git/hooks/pre-commit 124 | ``` 125 | -------------------------------------------------------------------------------- /ci/build_venv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e # exit immediately on any error 4 | 5 | venv=$1 6 | if [[ ${venv} == "" ]]; then 7 | venv="venv" 8 | fi 9 | 10 | virtualenv -p python3.7 ${venv} 11 | source ${venv}/bin/activate 12 | pip install -r requirements-build.txt 13 | pip install -r requirements.txt 14 | pip install -r requirements-dev.txt 15 | 16 | if [[ $USE_MPI == "True" ]]; then 17 | pip install mpi4py 18 | fi 19 | -------------------------------------------------------------------------------- /ci/code_checks.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # If you change these, also change .circle/config.yml. 4 | SRC_FILES="src/ tests/ setup.py" 5 | TYPECHECK_FILES="src/ tests/ setup.py" 6 | 7 | set -x # echo commands 8 | set -e # quit immediately on error 9 | 10 | flake8 ${SRC_FILES} 11 | black --check ${SRC_FILES} 12 | codespell -I .codespell.skip --skip='*.pyc,*.pkl,*.npz' ${SRC_FILES} 13 | 14 | if [ -x "`which circleci`" ]; then 15 | circleci config validate 16 | fi 17 | 18 | if [ "$skipexpensive" != "true" ]; then 19 | pytype ${TYPECHECK_FILES} 20 | fi 21 | -------------------------------------------------------------------------------- /ci/local_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [[ ${MUJOCO_KEY} == "" ]]; then 4 | echo "Set MUJOCO_KEY file to a URL with your key" 5 | exit 1 6 | fi 7 | 8 | # Run the same CI tests that Travis will run on local machine. 9 | docker build --cache-from humancompatibleai/adversarial_policies:local-test \ 10 | -t humancompatibleai/adversarial_policies:local-test . 11 | if [[ $? -ne 0 ]]; then 12 | echo "Docker build failed" 13 | exit 1 14 | fi 15 | 16 | docker run --rm --env MUJOCO_KEY=${MUJOCO_KEY} --env CODECOV_TOKEN=${CODECOV_TOKEN} \ 17 | humancompatibleai/adversarial_policies:local-test \ 18 | ci/run_tests.sh 19 | -------------------------------------------------------------------------------- /ci/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e # exit immediately on any error 4 | 5 | echo "Downloading MuJoCo Key" 6 | curl -o /root/.mujoco/mjkey.txt ${MUJOCO_KEY} 7 | 8 | set -o xtrace # print commands 9 | 10 | num_cpus=$2 11 | if [[ ${num_cpus} == "" ]]; then 12 | num_cpus=$(nproc --all) 13 | num_cpus=$((${num_cpus} / 2)) 14 | fi 15 | 16 | export LD_LIBRARY_PATH=/root/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH} 17 | COV_FLAGS="--cov=tests --cov=/venv/lib/python3.7/site-packages/aprl" 18 | pytest -vv -n ${num_cpus} ${COV_FLAGS} tests/ 19 | 20 | mv .coverage .coverage.tmp 21 | coverage combine # rewrite paths from virtualenv to src/ 22 | codecov 23 | -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/experiments/__init__.py -------------------------------------------------------------------------------- /experiments/benchmark_vec_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | NUM_ENVS="1 2 4 8 16" 4 | 5 | for num_env in $NUM_ENVS; do 6 | for rep in 1 2 3; do 7 | echo "BENCHMARK: ${num_env} environments test ${rep}" 8 | time python -m aprl.train with \ 9 | total_timesteps=50000 num_env=${num_env} \ 10 | exp_name="vec-env-benchmark-${num_env}-${rep}" 11 | done 12 | done -------------------------------------------------------------------------------- /experiments/build_and_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | . ${DIR}/common.sh 5 | 6 | CMD="bash" 7 | COPY="True" 8 | DETACH="False" 9 | WORK_DIR="$HOME/aprl" 10 | NAME="adversarial-policies" 11 | TREEISH="master" 12 | TB_PORT=6006 13 | RUN_DOCKER_ARGS="" 14 | 15 | while [[ $# -gt 0 ]] 16 | do 17 | key="$1" 18 | case $key in 19 | -c|--cmd) 20 | CMD="$2" 21 | shift 22 | shift 23 | ;; 24 | -d|--detach) 25 | DETACH="True" 26 | shift 27 | ;; 28 | -l|--listen) 29 | TB_PORT="$2" 30 | shift 31 | shift 32 | ;; 33 | -n|--name) 34 | NAME="$2" 35 | shift 36 | shift 37 | ;; 38 | --no-copy) 39 | COPY="False" 40 | shift 41 | ;; 42 | -r|--revision) 43 | TREEISH="$2" 44 | shift 45 | shift 46 | ;; 47 | -w|--work-dir) 48 | WORK_DIR="$2" 49 | shift 50 | shift 51 | ;; 52 | --run-docker-args) 53 | RUN_DOCKER_ARGS="$2" 54 | shift 55 | shift 56 | ;; 57 | *) 58 | echo "Unrecognized option '${key}'" 59 | exit 1 60 | esac 61 | done 62 | 63 | if [[ ${MUJOCO_KEY} == "" ]]; then 64 | echo "Set MUJOCO_KEY file to a URL with your key" 65 | exit 1 66 | fi 67 | 68 | set -e # exit immediately on any error 69 | 70 | if [[ ${COPY} == "True" ]]; then 71 | git clone ${GIT_REPO} ${WORK_DIR}/${NAME} 72 | fi 73 | 74 | cd ${WORK_DIR}/${NAME} 75 | git checkout ${TREEISH} 76 | docker build --cache-from ${DOCKER_REPO}:${NAME} \ 77 | --build-arg MUJOCO_KEY=${MUJOCO_KEY} \ 78 | -t ${DOCKER_REPO}:${NAME} . 79 | 80 | mkdir -p data 81 | tmux new-session -d -s ${NAME} \ 82 | "export MUJOCO_KEY=${MUJOCO_KEY} && 83 | ./experiments/run_docker.sh -t ${NAME} -l ${TB_PORT}:6006 \ 84 | -n ${NAME} -c \"${CMD}\" ${RUN_DOCKER_ARGS}; \ 85 | echo 'Finished; press Ctrl-D to exit'; cat /dev/stdin" 86 | ATTEMPTS=0 87 | while [[ `docker inspect -f {{.State.Running}} ${NAME}` != "true" ]]; do 88 | echo "Waiting for Docker container to start" 89 | sleep 2 90 | ATTEMPTS=$((ATTEMPTS + 1)) 91 | if [[ $ATTEMPTS -gt 5 ]]; then 92 | echo "Could not start Docker container. Dieing. Look in tmux session '${NAME}' for logs." 93 | exit 1 94 | fi 95 | done 96 | tmux new-window -t ${NAME} -d \ 97 | "docker exec ${NAME} bash -c \"env=aprl . ci/prepare_env.sh && tensorboard --port 6006 --logdir data/\"" 98 | 99 | if [[ ${DETACH} == "True" ]]; then 100 | echo "Experiment '${NAME}' running in eponymous tmux session, \ 101 | cwd '${WORK_DIR}/${NAME}' and TensorBoard running on port '${TB_PORT}'" 102 | else 103 | tmux attach-session -t ${NAME} 104 | fi -------------------------------------------------------------------------------- /experiments/common.sh: -------------------------------------------------------------------------------- 1 | DOCKER_REPO="humancompatibleai/adversarial_policies" 2 | GIT_REPO="https://github.com/HumanCompatibleAI/adversarial-policies.git" 3 | 4 | call_parallel() { 5 | PARALLEL_FLAGS=$1 6 | shift 7 | OUT_DIR=$1 8 | shift 9 | MODULE_NAME=$1 10 | shift 11 | EXTRA_ARGS=$* 12 | parallel ${PARALLEL_FLAGS} --header : --results ${OUT_DIR}/parallel \ 13 | python -m ${MODULE_NAME} ${EXTRA_ARGS} 14 | } 15 | -------------------------------------------------------------------------------- /experiments/modelfree/attack_transfer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | . ${DIR}/common.sh 5 | 6 | OUT_ROOT=data/aws/score_agents 7 | TIMESTAMP=`date --iso-8601=seconds` 8 | 9 | function multi_score { 10 | python -m aprl.multi.score with victims='["zoo"]' opponents='["adversary"]' "$@" high_accuracy 11 | } 12 | 13 | if [[ $# -eq 0 ]]; then 14 | echo "usage: $0 [logdir ...]" 15 | exit 1 16 | fi 17 | 18 | for dir in normal victim_masked_init victim_masked_zero adversary_masked_init; do 19 | mkdir -p ${OUT_ROOT}/${dir}/${TIMESTAMP} 20 | done 21 | 22 | ADVERSARY_PATHS=${OUT_ROOT}/normal/${TIMESTAMP}/best_adversaries.json 23 | python ${DIR}/highest_win_rate.py ${ADVERSARY_PATHS} --logdir $* 24 | 25 | export ADVERSARY_PATHS=${ADVERSARY_PATHS} 26 | 27 | multi_score save_path=${OUT_ROOT}/normal/${TIMESTAMP}/adversary_transfer.json& 28 | wait_proc 29 | 30 | multi_score mask_observations_of_victim \ 31 | save_path=${OUT_ROOT}/victim_masked_init/${TIMESTAMP}/adversary_transfer.json& 32 | wait_proc 33 | 34 | multi_score mask_observations_of_victim mask_observations_with_zeros \ 35 | save_path=${OUT_ROOT}/victim_masked_zero/${TIMESTAMP}/adversary_transfer.json& 36 | wait_proc 37 | 38 | multi_score mask_observations_of_adversary \ 39 | save_path=${OUT_ROOT}/adversary_masked_init/${TIMESTAMP}/adversary_transfer.json& 40 | wait_proc 41 | 42 | wait -------------------------------------------------------------------------------- /experiments/modelfree/baselines.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | . ${DIR}/common.sh 5 | 6 | function multi_score { 7 | python -m aprl.multi.score with "$@" high_accuracy 8 | } 9 | 10 | OUT_DIR=data/aws/score_agents.tmp 11 | 12 | mkdir -p ${OUT_DIR} 13 | for kind in zoo fixed; do 14 | mkdir -p ${OUT_DIR}/normal 15 | multi_score victims='["zoo"]' opponents="[\"${kind}\"]" \ 16 | save_path=${OUT_DIR}/normal/${kind}_baseline.json& 17 | wait_proc 18 | 19 | mkdir -p ${OUT_DIR}/victim_masked_init 20 | multi_score victims='["zoo"]' opponents="[\"${kind}\"]" \ 21 | mask_observations_of_victim \ 22 | save_path=${OUT_DIR}/victim_masked_init/${kind}_baseline.json& 23 | wait_proc 24 | 25 | mkdir -p ${OUT_DIR}/victim_masked_zero 26 | multi_score victims='["zoo"]' opponents="[\"${kind}\"]" \ 27 | mask_observations_of_victim mask_observations_with_zeros \ 28 | save_path=${OUT_DIR}/victim_masked_zero/${kind}_baseline.json& 29 | wait_proc 30 | done 31 | 32 | wait 33 | -------------------------------------------------------------------------------- /experiments/modelfree/common.sh: -------------------------------------------------------------------------------- 1 | function wait_proc { 2 | if [[ -f ~/ray_bootstrap_config.yaml ]]; then 3 | # Running on a Ray cluster. We want to submit all the jobs in parallel. 4 | sleep 5 # stagger jobs a bit 5 | else 6 | # Running locally. Each job will start a Ray cluster. Submit sequentially. 7 | wait 8 | fi 9 | } 10 | -------------------------------------------------------------------------------- /experiments/modelfree/dec2018replication.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Reproduce results of Dec 2018 draft write-up 4 | 5 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 6 | . ${DIR}/../common.sh 7 | 8 | ENV_NAMES="multicomp/KickAndDefend-v0 multicomp/SumoAnts-v0" 9 | PRETRAINED="1 2 3" 10 | SEEDS="0 1 2" 11 | 12 | OUT_DIR=data/mf-dec2018rep 13 | 14 | # Train PPO against victims 15 | python -m aprl.multi.train with dec2018rep 16 | 17 | SCORE_AGENT="aprl.score_agent with episodes=1000 num_env=16 render=False" 18 | # Baseline: pretrained policy 19 | call_parallel "$*" ${OUT_DIR}/pretrained ${SCORE_AGENT} \ 20 | env_name={env_name} agent_a_path={agent_a_path} agent_b_path={agent_b_path} \ 21 | ::: env_name ${ENV_NAMES} ::: agent_a_path ${PRETRAINED} ::: agent_b_path ${PRETRAINED} 22 | 23 | # Baseline: random action and constant zero 24 | call_parallel "$*" ${OUT_DIR}/fixed ${SCORE_AGENT} \ 25 | env_name={env_name} agent_a_type={agent_a_type} agent_b_path={agent_b_path} \ 26 | ::: env_name ${ENV_NAMES} ::: agent_a_type random zero ::: agent_b_path ${PRETRAINED} -------------------------------------------------------------------------------- /experiments/modelfree/defenses.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | . ${DIR}/common.sh 5 | 6 | OUT_ROOT=score_agents/defenses 7 | mkdir -p ${OUT_ROOT} 8 | 9 | function multi_score { 10 | python -m aprl.multi.score with "$@" defenses high_accuracy 11 | } 12 | 13 | multi_score save_path=${OUT_ROOT}/normal.json& 14 | wait_proc 15 | 16 | multi_score mask_observations_of_victim save_path=${OUT_ROOT}/victim_masked_init.json& 17 | wait_proc 18 | 19 | wait 20 | -------------------------------------------------------------------------------- /experiments/modelfree/density.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | 5 | . ${DIR}/common.sh 6 | 7 | TIMESTAMP=`date --iso-8601=seconds` 8 | ACTIVATION_DIR="data/density/${TIMESTAMP}/activations" 9 | 10 | # We fit our density model with 20,000 timesteps, and use 20,000 timesteps for evaluation. 11 | # So we need 40,000 timesteps for the training opponent. The others we only actually need 20,000 12 | # for so this is slightly wasteful. 13 | python -m aprl.common.generate_activations with score_update.score.timesteps=40000 \ 14 | out_dir=${ACTIVATION_DIR} 15 | 16 | for components in 5 10 20 40 80; do 17 | for cov_type in full diag; do 18 | python -m aprl.density.fit_density with gmm \ 19 | model_kwargs.n_components=${components} \ 20 | model_kwargs.covariance_type=${cov_type} \ 21 | activation_glob="${ACTIVATION_DIR}/*" \ 22 | output_root=data/density/${TIMESTAMP}/fitted 23 | wait_proc 24 | done 25 | done 26 | 27 | wait 28 | -------------------------------------------------------------------------------- /experiments/modelfree/highest_win_rate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Processes experimental output to find adversarial policies with maximal win rate.""" 4 | 5 | import argparse 6 | import collections 7 | import json 8 | import logging 9 | import os.path 10 | 11 | import numpy as np 12 | import tensorflow as tf 13 | 14 | logger = logging.getLogger('scripts.highest_win_rate') 15 | 16 | 17 | def event_files(path): 18 | for root, dirs, files in os.walk(path, followlinks=True): 19 | # checkpoint directories never contain TF events files, and will slow down search 20 | dirs[:] = list(filter(lambda x: x != 'checkpoint', dirs)) 21 | if root.split(os.path.sep)[-2:] == ['rl', 'tb']: 22 | for name in files: 23 | if 'tfevents' in name: 24 | yield os.path.join(root, name) 25 | 26 | 27 | def get_stats(event_path, episode_window): 28 | events = collections.defaultdict(list) 29 | last_step = 0 30 | for event in tf.train.summary_iterator(event_path): 31 | for value in event.summary.value: 32 | if value.tag in ['game_win0', 'game_win1', 'game_tie']: 33 | events[value.tag].append(value.simple_value) 34 | last_step = event.step 35 | 36 | logger.info(f"Read {len(events['game_win0'])} events up to {last_step} from '{event_path}'") 37 | means = {k: np.mean(v[-episode_window:]) for k, v in events.items()} 38 | 39 | return means 40 | 41 | 42 | def _strip_up_to(path, dirname): 43 | path_components = path.split(os.path.sep) 44 | if path_components[0] == '': 45 | path_components[0] = os.path.sep 46 | try: 47 | path_index = len(path_components) - 1 - path_components[::-1].index(dirname) 48 | except ValueError as e: 49 | raise ValueError(f"Error stripping '{dirname}' in '{path_components}': {e}") 50 | return os.path.join(*path_components[0:path_index]) 51 | 52 | 53 | def get_sacred_config(event_path): 54 | root = _strip_up_to(event_path, 'baselines') 55 | sacred_config_path = os.path.join(root, 'sacred', 'train', '1', 'config.json') 56 | with open(sacred_config_path, 'r') as f: 57 | return json.load(f) 58 | 59 | 60 | def get_final_model_path(event_path): 61 | root = _strip_up_to(event_path, 'rl') 62 | abs_path = os.path.join(root, 'final_model') 63 | components = abs_path.split(os.path.sep) 64 | try: 65 | multi_train_start = components.index('multi_train') 66 | components = components[multi_train_start:] 67 | except ValueError: 68 | pass 69 | return os.path.sep.join(components) 70 | 71 | 72 | def unstack(d): 73 | d = collections.OrderedDict(sorted(d.items())) 74 | res = collections.OrderedDict() 75 | for k, v in d.items(): 76 | env_name, opp_index, opp_path = k 77 | res.setdefault(env_name, {}).setdefault(opp_index, {})[opp_path] = v 78 | return res 79 | 80 | 81 | def find_best(logdirs, episode_window): 82 | # keys: (env_name, opp_index, opp_path) 83 | # value: path to policy evaluated on env_name against opponent opp_path playing opp_index 84 | best_policy = {} 85 | best_winrate = collections.defaultdict(float) 86 | 87 | for logdir in logdirs: 88 | for event_path in event_files(logdir): 89 | stats = get_stats(event_path=event_path, episode_window=episode_window) 90 | config = get_sacred_config(event_path) 91 | env_name = str(config['env_name']) 92 | opp_index = int(config['embed_index']) 93 | opp_type = str(config['embed_type']) 94 | # multi_score is not set up to handle multiple embedded agent types 95 | if opp_type != 'zoo' and config['load_policy']['type'] == 'zoo': 96 | # Assuming that this case corresponds to a situation where we're finetuning a 97 | # zoo policy, and that we still want the resulting dictionary indexed by the 98 | # integer zoo policy we finetuned, rather than the full path of its adversary 99 | zoo_path = str(config['load_policy']['path']) 100 | else: 101 | zoo_path = str(config['embed_path']) 102 | our_index = 1 - opp_index 103 | key = (env_name, opp_index, zoo_path) 104 | our_winrate = stats[f'game_win{our_index}'] 105 | 106 | if our_winrate > best_winrate[key]: 107 | best_policy[key] = get_final_model_path(event_path) 108 | best_winrate[key] = our_winrate 109 | 110 | result = { 111 | 'policies': unstack(best_policy), 112 | 'winrates': unstack(best_winrate), 113 | } 114 | 115 | return result 116 | 117 | 118 | def directory_type(path): 119 | if not os.path.isdir(path): 120 | raise ValueError(f"'{path}' does not exist") 121 | return path 122 | 123 | 124 | def get_args(): 125 | parser = argparse.ArgumentParser() 126 | parser.add_argument('logdir', nargs="+", type=directory_type) 127 | parser.add_argument('--episode-window', type=int, default=50) 128 | parser.add_argument('--output_path') 129 | return parser.parse_args() 130 | 131 | 132 | def main(): 133 | logging.basicConfig(level=logging.INFO) 134 | parsed_args = get_args() 135 | output_path = parsed_args.output_path 136 | # If no output path is given, default to saving it in the first logdir under a fixed name 137 | if output_path is None: 138 | if len(parsed_args.logdir) > 1: 139 | raise ValueError("Must specify --output_path when using multiple log directories.") 140 | output_path = os.path.join(parsed_args.logdir[0], 'highest_win_policies_and_rates.json') 141 | 142 | for logdir in parsed_args.logdir: 143 | if 'multi_train' not in logdir.split(os.path.sep): 144 | logger.warning(f"logdir '{logdir}' does not contain 'multi_train'." 145 | "Falling back to absolute paths, JSON may not be portable.") 146 | 147 | logger.info(f"Output path: {output_path}") 148 | logger.info(f"Log dir: {parsed_args.logdir}") 149 | with open(output_path, 'w') as f: # fail fast if output_path inaccessible 150 | result = find_best(parsed_args.logdir, parsed_args.episode_window) 151 | json.dump(result, f) 152 | 153 | 154 | if __name__ == '__main__': 155 | main() 156 | -------------------------------------------------------------------------------- /experiments/modelfree/noisy_actions_and_obs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | . ${DIR}/common.sh 5 | 6 | AWS_ROOT=data/aws 7 | OUT_ROOT=${AWS_ROOT}/score_agents 8 | TIMESTAMP=`date --iso-8601=seconds` 9 | 10 | # Format: multi_score ["extra_config1 ..."] 11 | # opponent_type: one of zoo or adversary 12 | # noise_type: one of ${NOISE_TYPES} 13 | # extra_config: a string with a list of space-separated named configs for aprl.multi.score 14 | # Saves to ${noise_type}/${TIMESTMAP}/${opponent_type}.json 15 | function multi_score { 16 | opponent_type=$1 17 | noise_type=$2 18 | extra_configs=$3 19 | 20 | python -m aprl.multi.score with victims='["zoo"]' opponents="[\"${opponent_type}\"]" \ 21 | ${noise_type} ${extra_configs} medium_accuracy \ 22 | save_path=${OUT_ROOT}/${noise_type}/${TIMESTAMP}/${opponent_type}.json 23 | wait_proc 24 | } 25 | 26 | # Sanity check we have the data 27 | if [[ ! -d ${OUT_ROOT} || ! -d ${AWS_ROOT}/multi_train ]]; then 28 | echo "Could not find some required data dierctories." 29 | echo "Consider running these commands (if using Ray, add to {head,worker}_start_ray_commands):" 30 | echo "aws s3 sync s3://adversarial-policies/score_agents/ /adversarial-policies/data/aws/score_agents/ &&" 31 | echo "aws s3 sync --exclude='*/checkpoint/*' --exclude='*/datasets/*' \ 32 | s3://adversarial-policies/multi_train/paper/20190429_011349/ \ 33 | /adversarial-policies/data/aws/multi_train/paper/20190429_011349/" 34 | exit 1 35 | fi 36 | 37 | # Make a directory for each of the noise types we're using, to store results in 38 | NOISE_TYPES="noise_adversary_actions noise_victim_actions mask_observations_with_additive_noise \ 39 | mask_observations_with_smaller_additive_noise" 40 | for dir in ${NOISE_TYPES}; do 41 | mkdir -p ${OUT_ROOT}/${dir}/${TIMESTAMP} 42 | done 43 | 44 | export ADVERSARY_PATHS=${OUT_ROOT}/normal/2019-05-05T18:12:24+00:00/best_adversaries.json 45 | 46 | multi_score zoo noise_adversary_actions 47 | echo "Zoo baseline noisy actions completed" 48 | 49 | multi_score adversary noise_adversary_actions 50 | echo "Noisy actions completed" 51 | 52 | multi_score adversary noise_victim_actions 53 | echo "Noisy victim actions completed" 54 | 55 | multi_score zoo mask_observations_with_additive_noise mask_observations_of_victim 56 | multi_score adversary mask_observations_with_additive_noise mask_observations_of_victim 57 | echo "Additive noise masking baseline complete" 58 | 59 | multi_score zoo mask_observations_with_smaller_additive_noise mask_observations_of_victim 60 | multi_score adversary mask_observations_with_smaller_additive_noise mask_observations_of_victim 61 | echo "Additive noise masking baseline complete" 62 | 63 | wait 64 | echo "Additive noise masking complete" 65 | -------------------------------------------------------------------------------- /experiments/modelfree/plot.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # https://github.com/mrahtz/tbplot 4 | TBPLOT="$HOME/dev/tbplot/tbplot" 5 | ENV_NAMES="KickAndDefend-v0 SumoHumans-v0 SumoAnts-v0 \ 6 | SumoHumansAutoContact-v0 SumoAntsAutoContact-v0 \ 7 | RunToGoalHumans-v0 RunToGoalAnts-v0 \ 8 | YouShallNotPassHumans-v0" 9 | VICTIMS="1 2 3 4" 10 | 11 | if [[ $# -neq 2 ]]; then 12 | echo "usage: $0 " 13 | exit 1 14 | fi 15 | 16 | DATA_DIR="$1" 17 | OUT_DIR="$2" 18 | 19 | parallel -j 8 --header : \ 20 | ${TBPLOT} --step --smoothing 0.9 \ 21 | --out ${OUT_DIR}/{env_name}_{victim}.png \ 22 | "${DATA_DIR}/train_rl_*_env_name:victim_path=\[*{env_name}*,\ {victim}\]*/data/baselines/*/rl/tb" \ 23 | ::: env_name ${ENV_NAMES} \ 24 | ::: victim ${VICTIMS} 25 | -------------------------------------------------------------------------------- /experiments/planning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/experiments/planning/__init__.py -------------------------------------------------------------------------------- /experiments/planning/common.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time 3 | 4 | import gym 5 | from ilqr.controller import RecedingHorizonController 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | def set_seeds(seed): 11 | random.seed(seed) 12 | np.random.seed(seed) 13 | 14 | 15 | def on_iteration(iteration_count, xs, us, J_opt, accepted, converged): 16 | info = "converged" if converged else ("accepted" if accepted else "failed") 17 | print("iteration", iteration_count, info, J_opt, xs[-1], us[-1]) 18 | 19 | 20 | def make_env(env_name, seed, horizon=None): 21 | env = gym.make(env_name) 22 | if horizon is None: 23 | horizon = env._max_episode_steps 24 | env = env.unwrapped 25 | env.frame_skip = 1 26 | env.seed(seed) 27 | env.reset() 28 | us_init = np.array([env.action_space.sample() for _ in range(horizon)]) 29 | 30 | return env, us_init 31 | 32 | 33 | def fit_ilqr(ilqrs, x0s, us_init, **kwargs): 34 | xs = {} 35 | us = {} 36 | print(ilqrs.keys()) 37 | for k, ilqr in ilqrs.items(): 38 | start = time.time() 39 | print('*** Fitting {} ***'.format(k)) 40 | x0 = x0s[k] 41 | xs[k], us[k] = ilqr.fit(x0, us_init, on_iteration=on_iteration, 42 | **kwargs) 43 | end = time.time() 44 | print('*** Fitted {} in {}s ***'.format(k, end - start)) 45 | return xs, us 46 | 47 | 48 | def receding(ilqr, x0, us_init, seed, step_size=1, horizon=None, **kwargs): 49 | if horizon is None: 50 | horizon = len(us_init) 51 | controller = RecedingHorizonController(x0, ilqr) 52 | controller.seed(seed) 53 | xs = np.zeros((horizon, ) + x0.shape) 54 | us = np.zeros((horizon, ) + us_init[0].shape) 55 | i = 0 56 | for x, u in controller.control(us_init, step_size=step_size, **kwargs): 57 | xs[i:i + step_size] = x[:-1] 58 | us[i:i + step_size] = u 59 | print('iteration {} x = {}, u = {}'.format(i, x, u)) 60 | i += step_size 61 | if i == horizon: 62 | break 63 | return xs, us 64 | 65 | 66 | def evaluate(env, dynamics, x0, us, render=False): 67 | dynamics.set_state(x0) 68 | if render: 69 | env.render() 70 | rew = [] 71 | actual_xs = [] 72 | for u in us: 73 | _obs, r, done, info = env.step(u) 74 | if done: 75 | print('warning: early termination! (assuming zero-reward from now)') 76 | break 77 | rew.append(r) 78 | actual_xs.append(dynamics.get_state()) 79 | if render: 80 | env.render() 81 | time.sleep(0.01) 82 | return rew, actual_xs 83 | 84 | 85 | def multi_evaluate(env, dynamics, x0s, us, **kwargs): 86 | rews = {} 87 | actual_xs = {} 88 | for k, solved_us in us.items(): 89 | print(k) 90 | rews[k], actual_xs[k] = evaluate(env.unwrapped, dynamics[k], x0s[k], 91 | solved_us, **kwargs) 92 | rewards = {k: sum(r) for k, r in rews.items()} 93 | lengths = {k: len(r) for k, r in rews.items()} 94 | return pd.DataFrame({'rewards': rewards, 'lengths': lengths}) 95 | -------------------------------------------------------------------------------- /experiments/pull_public_s3.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | aws --no-sign-request s3 sync \ 4 | --exclude='*/checkpoint/*' --exclude='*/datasets/*' --exclude='videos/*' \ 5 | s3://adversarial-policies-public/ data/aws-public/ 6 | -------------------------------------------------------------------------------- /experiments/pull_s3.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | S3_SYNC_CMD="aws s3 sync --exclude=*/checkpoint/* --exclude=*/datasets/*" 4 | 5 | ${S3_SYNC_CMD} s3://adversarial-policies/ data/aws/ 6 | ${S3_SYNC_CMD} s3://adversarial-policies-public/ data/aws-public/ 7 | -------------------------------------------------------------------------------- /experiments/push_public_s3.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Local directories 4 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 5 | PROJECT_DIR=$( dirname "${SCRIPT_DIR}" ) 6 | PUBLIC_AWS=${PROJECT_DIR}/data/aws-public 7 | 8 | # S3 Repos and commands 9 | PRIVATE_S3_REPO=s3://adversarial-policies 10 | PUBLIC_S3_REPO=s3://adversarial-policies-public 11 | S3_SYNC_CMD="aws s3 sync --exclude='*/checkpoint/*' --exclude='*/datasets/*' --acl public-read --delete" 12 | 13 | # Copy subset of data from private AWS to public view 14 | echo "Syncing from private bucket ${PRIVATE_S3_REPO} to public bucket ${PUBLIC_S3_REPO}" 15 | 16 | REMOTE_COPY="multi_train/paper/20190429_011349 score_agents" 17 | for path in ${REMOTE_COPY}; do 18 | echo "Syncing ${path}" 19 | ${S3_SYNC_CMD} ${PRIVATE_S3_REPO}/${path} ${PUBLIC_S3_REPO}/${path} 20 | done 21 | 22 | echo "Syncing from local machine ${PUBLIC_AWS} to public bucket ${PUBLIC_S3_REPO}" 23 | LOCAL_COPY="videos" 24 | for path in ${LOCAL_COPY}; do 25 | echo "Syncing ${path}" 26 | ${S3_SYNC_CMD} ${PUBLIC_AWS}/${path} ${PUBLIC_S3_REPO}/${path} 27 | done 28 | -------------------------------------------------------------------------------- /experiments/remote_build_and_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | . ${DIR}/common.sh 5 | 6 | REMOTE_HOST="" 7 | LOCAL_DATA="${DIR}/../data" 8 | REMOTE_WORK_DIR="/scratch/${USER}/aprl" 9 | TB_PORT=6006 10 | EXTRA_ARGS="" 11 | 12 | 13 | while [[ $# -gt 0 ]] 14 | do 15 | key="$1" 16 | case $key in 17 | -c|--cmd) 18 | CMD="$2" 19 | shift 20 | shift 21 | ;; 22 | -h|--host) 23 | REMOTE_HOST="$2" 24 | shift 25 | shift 26 | ;; 27 | -l|--listen) 28 | TB_PORT="$2" 29 | shift 30 | shift 31 | ;; 32 | -n|--name) 33 | NAME="$2" 34 | shift 35 | shift 36 | ;; 37 | -o|--output-dir) 38 | LOCAL_DATA="$2" 39 | shift 40 | shift 41 | ;; 42 | -w|--work-dir) 43 | REMOTE_WORK_DIR="$2" 44 | shift 45 | shift 46 | ;; 47 | *) 48 | EXTRA_ARGS="${EXTRA_ARGS} $1" 49 | shift 50 | ;; 51 | esac 52 | done 53 | 54 | if [[ ${MUJOCO_KEY} == "" ]]; then 55 | echo "Set MUJOCO_KEY file to a URL with your key" 56 | exit 1 57 | fi 58 | 59 | if [[ ${REMOTE_HOST} == "" ]]; then 60 | echo "Missing mandatory argument -h " 61 | exit 1 62 | fi 63 | 64 | set -o xtrace # print commands 65 | set -e # exit immediately on any error 66 | 67 | echo "Starting experiment" 68 | ssh -t -L ${TB_PORT}:localhost:${TB_PORT} ${REMOTE_HOST} \ 69 | "export MUJOCO_KEY='${MUJOCO_KEY}' && \ 70 | git clone ${GIT_REPO} ${REMOTE_WORK_DIR}/${NAME} || (cd ${REMOTE_WORK_DIR}/${NAME} && git fetch) && \ 71 | ${REMOTE_WORK_DIR}/${NAME}/experiments/build_and_run.sh \ 72 | --no-copy -w ${REMOTE_WORK_DIR} -n ${NAME} -l ${TB_PORT} -c \"${CMD}\" ${EXTRA_ARGS}" 73 | 74 | echo "Experiment completed, copying data" 75 | rsync -rlptv --exclude=sacred ${REMOTE_HOST}:${REMOTE_WORK_DIR}/${NAME}/data/ ${LOCAL_DATA}/ 76 | rsync -rlptv ${REMOTE_HOST}:${REMOTE_WORK_DIR}/${NAME}/data/sacred/ ${LOCAL_DATA}/sacred/${REMOTE_HOST} 77 | -------------------------------------------------------------------------------- /experiments/run_docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | . ${DIR}/common.sh 5 | 6 | CMD="bash" 7 | NAME="adversarial-policies" 8 | TAG="latest" 9 | RM="--rm" 10 | FLAGS="" 11 | 12 | while [[ $# -gt 0 ]] 13 | do 14 | key="$1" 15 | 16 | case $key in 17 | -c|--cmd) 18 | CMD="$2" 19 | shift 20 | shift 21 | ;; 22 | -l|--listen) 23 | FLAGS="${FLAGS} -p $2" 24 | shift 25 | shift 26 | ;; 27 | -n|--name) 28 | NAME="$2" 29 | shift 30 | shift 31 | ;; 32 | -p|--persist) 33 | RM="" 34 | shift 35 | ;; 36 | -t|--tag) 37 | TAG="$2" 38 | shift 39 | shift 40 | ;; 41 | *) 42 | echo "Unrecognized option '${key}'" 43 | exit 1 44 | esac 45 | done 46 | 47 | if [[ ${MUJOCO_KEY} == "" ]]; then 48 | echo "Set MUJOCO_KEY file to a URL with your key" 49 | exit 1 50 | fi 51 | 52 | docker run \ 53 | ${FLAGS} \ 54 | ${RM} \ 55 | -it \ 56 | --env MUJOCO_KEY=${MUJOCO_KEY} \ 57 | --name ${NAME} \ 58 | --mount type=bind,source="$(pwd)"/data,target=/adversarial-policies/data \ 59 | ${DOCKER_REPO}:${TAG} \ 60 | bash -c ". ci/prepare_env.sh && ${CMD}" 61 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | target-version = ["py37"] 4 | -------------------------------------------------------------------------------- /requirements-build.txt: -------------------------------------------------------------------------------- 1 | # Baselines unhelpfully does not list TensorFlow as a requirement, 2 | # but setup will break if it isn't installed. So force installing it first. 3 | tensorflow>=1.13.0,<1.14.0 4 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | black 2 | codecov 3 | codespell 4 | flake8 5 | flake8-blind-except 6 | flake8-builtins 7 | flake8-debugger 8 | flake8-isort 9 | isort~=4.0 10 | pytype 11 | pytest 12 | pytest-cov 13 | pytest-shard 14 | pytest-xdist 15 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.15 2 | pandas>=0.24.1 3 | scikit-learn>=0.20.3 4 | Pillow>=6.0.0 5 | matplotlib>=3.0.3 6 | Theano>=1.0 7 | sacred>=0.8.1 8 | pymongo>=3.8.0 9 | GitPython>=2.1 10 | baselines @ git+https://github.com/HumanCompatibleAI/baselines.git@f70377 11 | stable-baselines @ git+https://github.com/hill-a/stable-baselines.git@6fbc9a9 12 | ray[debug,tune]>=1.0.0 13 | boto3>=1.9 14 | awscli>=1.16 15 | statsmodels>=0.9.0 16 | seaborn>=0.9.0 17 | ilqr @ git+https://github.com/anassinator/ilqr.git 18 | gym[mujoco]==0.15.4 19 | mujoco-py-131 @ git+https://github.com/AdamGleave/mujoco-py.git@mj131 20 | gym_compete @ git+https://github.com/HumanCompatibleAI/multiagent-competition.git@3a3f9dc 21 | -------------------------------------------------------------------------------- /scripts/aws/cloudwatch.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | INSTANCE_ID=`ec2metadata --instance-id | cut -d' ' -f 2` 4 | EC2_REGION=`ec2metadata --availability-zone | sed 's/[a-z]$//' | cut -d' ' -f 2` 5 | CLUSTER_NAME=`aws ec2 describe-tags --filters "Name=resource-id,Values=${INSTANCE_ID}" "Name=key,Values=ray-cluster-name" --region=${EC2_REGION} | grep Value | cut -f2 -d':' | cut -f2 -d'"'` 6 | ALARM_NAME="${CLUSTER_NAME}-idle" 7 | 8 | aws cloudwatch delete-alarms --region ${EC2_REGION} --alarm-name ${ALARM_NAME} 9 | aws cloudwatch put-metric-alarm --region ${EC2_REGION} --alarm-name ${ALARM_NAME} \ 10 | --namespace AWS/EC2 --metric-name CPUUtilization \ 11 | --threshold 20 --comparison-operator LessThanThreshold \ 12 | --statistic Average --period 3600 \ 13 | --datapoints-to-alarm 12 --evaluation-periods 24 \ 14 | --treat-missing-data notBreaching \ 15 | --alarm-actions arn:aws:sns:us-west-2:286342508718:default \ 16 | --dimensions "Name=InstanceId,Value=${INSTANCE_ID}" 17 | -------------------------------------------------------------------------------- /scripts/aws/termination.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | from urllib.request import urlopen 3 | import logging 4 | import subprocess 5 | import time 6 | 7 | TERMINATION_URL = 'http://169.254.169.254/latest/meta-data/spot/termination-time' 8 | POLL_INTERVAL = 5 9 | 10 | def run(): 11 | logging.info('Starting.') 12 | not_terminated = True 13 | while not_terminated: 14 | try: 15 | time.sleep(POLL_INTERVAL) 16 | req = urlopen(TERMINATION_URL) 17 | not_terminated = False 18 | except urllib.error.HTTPError as e: 19 | if e.getcode() != 404: 20 | logging.error('Unexpected response code ', e) 21 | except urllib.error.URLError as e: 22 | logging.error('Unexpected error ', e) 23 | logging.info('Received termination notice!') 24 | logging.info('Scheduled to terminate at ', req.read()) 25 | logging.info('Shutting down Ray cleanly.') 26 | subprocess.check_call(['ray', 'stop']) 27 | 28 | if __name__ == '__main__': 29 | logging.basicConfig(level=logging.INFO) 30 | run() 31 | -------------------------------------------------------------------------------- /scripts/doubleblind.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | ROOT_DIR="$( dirname "${SCRIPT_DIR}" )" 5 | 6 | OPTIONS="-v -z -r -lpt" 7 | EXCLUDES="LICENSE README.md setup.py scripts/doubleblind.sh ci/local_tests.sh .travis.yml experiments/common.sh experiments/planning 8 | src/aprl/configs/ray/ .git supplementary.zip *.pkl requirements*.txt" 9 | 10 | # Refuse to compile if we find any of these words in non-excluded sources 11 | BLACKLISTED="Adam Gleave Michael Dennis Cody Neel Kant Sergey Levine Stuart Russell berkeley humancompatibleai humancompatible" 12 | 13 | TMPDIR=`mktemp --tmpdir -d doubleblinded.XXXXXXXX` 14 | 15 | SYNC_CMD="rsync ${OPTIONS} --exclude-from=.gitignore" 16 | for exclude in ${EXCLUDES}; do 17 | SYNC_CMD="${SYNC_CMD} --exclude=${exclude}" 18 | done 19 | 20 | ${SYNC_CMD} ${ROOT_DIR} ${TMPDIR} 21 | pushd ${TMPDIR} 22 | 23 | GREP_TERMS="" 24 | for pattern in ${BLACKLISTED}; do 25 | GREP_TERMS="${GREP_TERMS} -e ${pattern}" 26 | done 27 | grep -r . -i -F ${GREP_TERMS} 28 | if [[ $? -ne 1 ]]; then 29 | echo "Found blacklisted word. Dieing." 30 | exit 1 31 | fi 32 | 33 | cp $HOME/dev/adversarial-policies-paper/supplementary.pdf . 34 | 35 | rm ${ROOT_DIR}/supplementary.zip 36 | zip -r ${ROOT_DIR}/supplementary.zip . 37 | popd 38 | -------------------------------------------------------------------------------- /scripts/grab_frame.py: -------------------------------------------------------------------------------- 1 | """Extract a frame from the initial state of an environment for illustration purposes. 2 | 3 | Lets user interactively move the camera, then takes a screenshot when ready.""" 4 | 5 | import argparse 6 | import select 7 | import sys 8 | import time 9 | 10 | import imageio 11 | import mujoco_py 12 | import numpy as np 13 | 14 | from aprl.envs.wrappers import make_env 15 | from aprl.visualize.annotated_gym_compete import AnnotatedGymCompete 16 | 17 | 18 | def get_img(env_name, seed): 19 | env = make_env(env_name, int(seed), 0, None) 20 | env = AnnotatedGymCompete(env, env_name, 'zoo', '1', 'zoo', '1', None, 21 | resolution=(640, 480), font='times', font_size=24, 22 | draw=False) 23 | env.reset() 24 | 25 | env_scene = env.unwrapped.env_scene 26 | env_scene.viewer = mujoco_py.MjViewer(init_width=1000, init_height=750) 27 | env_scene.viewer.start() 28 | env_scene.viewer.set_model(env_scene.model) 29 | env_scene.viewer_setup() 30 | 31 | print("Type save to save the image, step to take one timestep.") 32 | 33 | running = True 34 | while running: 35 | img = None 36 | while sys.stdin not in select.select([sys.stdin], [], [], 0)[0]: 37 | env.render() 38 | img = env.render(mode='rgb_array') 39 | 40 | input = sys.stdin.readline().strip() 41 | if input == 'save': 42 | running = False 43 | elif input == 'step': 44 | action = tuple(np.zeros(space.shape) for space in env.action_space.spaces) 45 | env.step(action) 46 | else: 47 | print(f"Unrecognized command '{input}'") 48 | 49 | return img 50 | 51 | 52 | def main(): 53 | parser = argparse.ArgumentParser() 54 | parser.add_argument('--env', type=str, help="environment name") 55 | parser.add_argument('--seed', type=int, default=time.time()) 56 | parser.add_argument('--out', type=str, help="path to save figure") 57 | args = parser.parse_args() 58 | 59 | img = get_img(args.env, args.seed) 60 | imageio.imwrite(args.out, img) 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /scripts/incomplete_experiments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | 5 | import pandas as pd 6 | 7 | 8 | logger = logging.getLogger('scripts.incomplete_experiments') 9 | 10 | 11 | def directory_type(path): 12 | if not os.path.isdir(path): 13 | raise ValueError(f"'{path}' does not exist") 14 | return path 15 | 16 | 17 | def get_args(): 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('data_dir', type=directory_type) 20 | return parser.parse_args() 21 | 22 | 23 | def get_stats(data_dir): 24 | started = {} 25 | completed = {} 26 | data_dir = os.path.abspath(data_dir) 27 | for root, dirs, files in os.walk(data_dir, followlinks=True): 28 | # checkpoint directories are irrelevant and will slow down search 29 | logger.debug(f"Searching '{root}'") 30 | dirs[:] = list(filter(lambda x: x not in ['checkpoint', 'mon', 'tb'], dirs)) 31 | components = root.split(os.path.sep) 32 | 33 | if 'final_model' in dirs: 34 | # root is of format .../exp_name/timestamp/run_id/data/baselines/run_id 35 | assert components[-2] == 'baselines' 36 | logger.debug(f"Found final_model in '{root}'") 37 | exp_name = os.path.relpath(os.path.join('/', *components[:-4]), data_dir) 38 | completed[exp_name] = completed.get(exp_name, 0) + 1 39 | dirs[:] = [] # no need to search further in data/baselines/* 40 | elif 'sacred' in dirs: 41 | # root is of format ../exp_name/timestamp/run_id/data/sacred 42 | assert components[-1] == 'data' 43 | logger.debug(f"Found sacred at '{root}'") 44 | exp_name = os.path.relpath(os.path.join('/', *components[:-2]), data_dir) 45 | started[exp_name] = started.get(exp_name, 0) + 1 46 | dirs.remove('sacred') # don't need to search inside it 47 | 48 | return started, completed 49 | 50 | 51 | def compute_incompletes(started, completed): 52 | incomplete = {k: num_started - completed.get(k, 0) for k, num_started in started.items()} 53 | percent_incomplete = {k: num_incomplete / started[k] 54 | for k, num_incomplete in incomplete.items()} 55 | percent_incomplete = pd.Series(percent_incomplete) 56 | percent_incomplete = percent_incomplete.sort_values(ascending=False) 57 | percent_incomplete.index.name = 'path' 58 | percent_incomplete.name = 'percent_incomplete' 59 | return percent_incomplete 60 | 61 | 62 | def main(): 63 | logging.basicConfig(level=logging.INFO) 64 | args = get_args() 65 | started, completed = get_stats(args.data_dir) 66 | percent_incomplete = compute_incompletes(started, completed) 67 | print(percent_incomplete.to_csv(header=True)) 68 | 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [coverage:report] 2 | exclude_lines = 3 | pragma: no cover 4 | omit = 5 | setup.py 6 | 7 | [coverage:run] 8 | include= 9 | src/* 10 | tests/* 11 | 12 | [coverage:paths] 13 | source = 14 | src/aprl 15 | *venv/lib/python*/site-packages/aprl 16 | 17 | [flake8] 18 | max-line-length=100 19 | ignore = W503,E203 20 | 21 | [isort] 22 | known_first_party=aprl 23 | known_third_party=baselines,gym_compete,ray,stable_baselines 24 | force_sort_within_sections=True 25 | force_grid_wrap=0 26 | include_trailing_comma=True 27 | line_length=100 28 | multi_line_output=3 29 | use_parentheses=True 30 | 31 | [tool:pytest] 32 | filterwarnings = 33 | ignore:the imp module is deprecated in favour of importlib:DeprecationWarning:distutils 34 | ignore:Using or importing the ABCs from 'collections':DeprecationWarning:(google|pkg_resources|tensorflow|theano) 35 | ignore:inspect.getargspec:DeprecationWarning:tensorflow 36 | ignore:Passing.* as a synonym of type is deprecated:FutureWarning:(tensorflow|tensorboard) 37 | ignore:inspect.getargspec:DeprecationWarning:ray 38 | ignore:Importing from numpy.testing:DeprecationWarning:theano 39 | ignore:Parameters to load are deprecated:Warning:gym 40 | ignore:The binary mode of fromstring is deprecated:DeprecationWarning:gym 41 | ignore:.*TF Lite has moved from tf.contrib.lite to tf.lite:PendingDeprecationWarning 42 | ignore:It appears you are loading from a file with old format. Older cloudpickle format has been replaced with zip-archived models. Consider saving the model with new format.:DeprecationWarning:stable_baselines 43 | ignore:Loading model parameters from a list. This has been replaced with parameter dictionaries with variable names and parameters. If you are loading from a file, consider re-saving the file.:DeprecationWarning:stable_baselines 44 | ignore:Usage of `load_running_average` is deprecated. Please use `load` or pickle instead.:DeprecationWarning:stable_baselines 45 | 46 | [pytype] 47 | inputs = aprl 48 | python_version = 3.7 49 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import find_packages, setup 4 | 5 | setup( 6 | name="aprl", 7 | version="0.2", 8 | description="Adversarial Policies for Reinforcement Learning", 9 | author="Adam Gleave, Michael Dennis, et al", 10 | author_email="adam@gleave.me", 11 | python_requires=">=3.7.0", 12 | url="https://github.com/HumanCompatibleAI/adversarial-policies", 13 | packages=find_packages("src"), 14 | package_dir={"": "src"}, 15 | package_data={"aprl": ["configs/multi/*.json", "configs/noise/*.json", "configs/rew/*.json"]}, 16 | # We have some non-pip packages as requirements, 17 | # see requirements-build.txt and requirements.txt. 18 | install_requires=[], 19 | include_package_data=True, 20 | license="MIT", 21 | classifiers=[ 22 | # Trove classifiers 23 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 24 | "License :: OSI Approved :: MIT License", 25 | "Programming Language :: Python", 26 | "Programming Language :: Python :: 3", 27 | "Programming Language :: Python :: 3.7", 28 | "Programming Language :: Python :: Implementation :: CPython", 29 | "Programming Language :: Python :: Implementation :: PyPy", 30 | ], 31 | ) 32 | -------------------------------------------------------------------------------- /src/aprl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/__init__.py -------------------------------------------------------------------------------- /src/aprl/activations/__init__.py: -------------------------------------------------------------------------------- 1 | """Generating and analysing activations of victim policy network.""" 2 | -------------------------------------------------------------------------------- /src/aprl/activations/density/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/activations/density/__init__.py -------------------------------------------------------------------------------- /src/aprl/activations/density/pipeline.py: -------------------------------------------------------------------------------- 1 | """Records activations from victim's policy network and then fits a density model.""" 2 | 3 | import logging 4 | import os 5 | import os.path as osp 6 | 7 | import sacred 8 | from sacred.observers import FileStorageObserver 9 | 10 | from aprl.activations import generate_activations 11 | from aprl.activations.density.fit_density import fit_model, fit_model_ex 12 | from aprl.common import utils 13 | 14 | density_ex = sacred.Experiment( 15 | "density", ingredients=[generate_activations.generate_activations_ex, fit_model_ex] 16 | ) 17 | logger = logging.getLogger("aprl.density.pipeline") 18 | 19 | 20 | @density_ex.config 21 | def main_config(generate_activations, fit_density_model): 22 | generate_activations = dict(generate_activations) 23 | generate_activations["score_update"] = {"score": {"timesteps": 40000}} 24 | 25 | output_root = osp.join("data", "density") # where to produce output 26 | _ = locals() # quieten flake8 unused variable warning 27 | del _ 28 | 29 | 30 | @density_ex.named_config 31 | def debug_config(generate_activations, fit_density_model): 32 | # Is this the name of an ingredient? Is it being auto-added to config somehow? 33 | output_root = "/tmp/density-debug" 34 | generate_activations = dict(generate_activations) 35 | fit_density_model = dict(fit_density_model) 36 | 37 | generate_activations["score_configs"] = [("debug_two_agents",)] 38 | generate_activations["score_update"] = {"score": {"timesteps": 100}} 39 | fit_density_model["max_timesteps"] = 100 40 | fit_density_model["model_kwargs"] = {"n_components": 2} 41 | 42 | _ = locals() # quieten flake8 unused variable warning 43 | del _ 44 | 45 | 46 | @density_ex.main 47 | def pipeline(_run, output_root, fit_density_model): 48 | out_dir = osp.join(output_root, utils.make_timestamp()) 49 | os.makedirs(out_dir) 50 | 51 | activation_glob = fit_density_model["activation_glob"] 52 | if activation_glob is None: 53 | activation_dir = osp.join(out_dir, "activations") 54 | generate_activations.generate_activations(out_dir=activation_dir) 55 | activation_glob = osp.join(activation_dir, "*") 56 | 57 | # This is unsuitable for hyperparameter sweeps, as can only run one model fitting step. 58 | # See experiments/modelfree/density.sh for a bash script hyperparameter sweep, that 59 | # re-uses activations. 60 | # SOMEDAY: Add support for running multiple fitting configs? 61 | # (Does not neatly fit into Sacred model.) 62 | model_dir = osp.join(out_dir, "fitted") 63 | fit_model(activation_glob=activation_glob, output_root=model_dir) 64 | 65 | return out_dir 66 | 67 | 68 | def main(): 69 | observer = FileStorageObserver(osp.join("data", "sacred", "density")) 70 | density_ex.observers.append(observer) 71 | density_ex.run_commandline() 72 | 73 | 74 | if __name__ == "__main__": 75 | main() 76 | -------------------------------------------------------------------------------- /src/aprl/activations/generate_activations.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import os.path as osp 5 | 6 | import sacred 7 | from sacred.observers import FileStorageObserver 8 | 9 | from aprl.common import utils 10 | from aprl.multi.score import extract_data, run_external 11 | 12 | generate_activations_ex = sacred.Experiment("generate_activations") 13 | logger = logging.getLogger("aprl.activations.generate_activations") 14 | 15 | 16 | @generate_activations_ex.config 17 | def activation_storing_config(): 18 | adversary_path = osp.join( 19 | "data", 20 | "aws", 21 | "score_agents", 22 | "normal", 23 | "2019-05-05T18:12:24+00:00", 24 | "best_adversaries.json", 25 | ) 26 | ray_upload_dir = "data" # where Ray will upload multi.score outputs. 'data' works on local 27 | out_dir = None 28 | 29 | # Configs for the multi-score experiments 30 | score_configs = [(x,) for x in ["zoo_baseline", "random_baseline", "adversary_trained"]] 31 | score_update = {} 32 | 33 | _ = locals() # quieten flake8 unused variable warning 34 | del _ 35 | 36 | 37 | def _activations_path_generator( 38 | trial_root, 39 | cfg, 40 | env_sanitized, 41 | victim_index, 42 | victim_type, 43 | victim_path, 44 | opponent_type, 45 | opponent_path, 46 | ): 47 | del cfg 48 | src_path = osp.join(trial_root, "data", "trajectories", f"agent_{victim_index}.npz") 49 | 50 | if opponent_path.startswith("/"): # is path name 51 | opponent_root = osp.sep.join(opponent_path.split(osp.sep)[:-3]) 52 | opponent_sacred = osp.join(opponent_root, "sacred", "train", "1", "config.json") 53 | 54 | with open(opponent_sacred, "r") as f: 55 | opponent_cfg = json.load(f) 56 | 57 | if "embed_path" in opponent_cfg: 58 | opponent_path = opponent_cfg["embed_path"] 59 | elif "victim_path" in opponent_cfg: 60 | # TODO(adam): remove backwards compatibility when all policies retrained 61 | opponent_path = opponent_cfg["victim_path"] 62 | else: 63 | raise KeyError("'embed_path' and 'victim_path' not present in 'opponent_cfg'") 64 | 65 | new_name = ( 66 | f"{env_sanitized}_victim_{victim_type}_{victim_path}" 67 | f"_opponent_{opponent_type}_{opponent_path}" 68 | ) 69 | return src_path, new_name, "npz" 70 | 71 | 72 | @generate_activations_ex.main 73 | def generate_activations( 74 | _run, out_dir, score_configs, score_update, adversary_path, ray_upload_dir 75 | ): 76 | """Uses multi.score to generate activations, then extracts them into a convenient 77 | directory structure.""" 78 | logger.info("Generating activations") 79 | activation_dirs = run_external( 80 | score_configs, 81 | post_named_configs=["save_activations"], 82 | config_updates=score_update, 83 | adversary_path=adversary_path, 84 | ) 85 | 86 | os.makedirs(out_dir) 87 | extract_data(_activations_path_generator, out_dir, activation_dirs, ray_upload_dir) 88 | logger.info("Activations saved") 89 | 90 | utils.add_artifacts(_run, out_dir) 91 | 92 | 93 | def main(): 94 | observer = FileStorageObserver(osp.join("data", "sacred", "generate_activations")) 95 | generate_activations_ex.observers.append(observer) 96 | generate_activations_ex.run_commandline() 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /src/aprl/activations/tsne/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/activations/tsne/__init__.py -------------------------------------------------------------------------------- /src/aprl/activations/tsne/fit_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import os.path as osp 4 | import pickle 5 | import re 6 | import tempfile 7 | from typing import Any, Dict 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import ray 12 | import sacred 13 | from sacred.observers import FileStorageObserver 14 | from sklearn.manifold import TSNE 15 | 16 | from aprl.common import utils 17 | 18 | fit_model_ex = sacred.Experiment("tsne_fit_model") 19 | logger = logging.getLogger("aprl.activations.tsne.fit_model") 20 | 21 | 22 | @fit_model_ex.config 23 | def base_config(): 24 | ray_server = None # by default will launch a server 25 | init_kwargs = {} # passed to ray.init() 26 | activation_dir = None 27 | output_root = None 28 | data_type = "ff_policy" 29 | num_components = 2 30 | num_observations = None 31 | seed = 0 32 | perplexity = 250 33 | _ = locals() # quieten flake8 unused variable warning 34 | del _ 35 | 36 | 37 | @fit_model_ex.named_config 38 | def debug_config(): 39 | num_observations = 1000 40 | _ = locals() # quieten flake8 unused variable warning 41 | del _ 42 | 43 | 44 | def _load_and_reshape_single_file(np_path, opponent_type, data_type): 45 | traj_data = np.load(np_path, allow_pickle=True) 46 | episode_list = traj_data[data_type].tolist() 47 | episode_lengths = [len(episode) for episode in episode_list] 48 | episode_id = [] 49 | observation_index = [] 50 | relative_observation_index = [] 51 | for i, episode_length in enumerate(episode_lengths): 52 | episode_id += [i] * episode_length 53 | episode_observation_ids = list(range(episode_length)) 54 | observation_index += episode_observation_ids 55 | relative_observation_index += [el / episode_length for el in episode_observation_ids] 56 | 57 | concatenated_data = np.concatenate(episode_list) 58 | opponent_type = [opponent_type] * len(concatenated_data) 59 | 60 | metadata_df = pd.DataFrame( 61 | { 62 | "episode_id": episode_id, 63 | "observation_index": observation_index, 64 | "relative_observation_index": relative_observation_index, 65 | "opponent_id": opponent_type, 66 | } 67 | ) 68 | return concatenated_data, metadata_df 69 | 70 | 71 | @ray.remote 72 | def fit_tsne_helper( 73 | activation_paths, output_dir, num_components, num_observations, perplexity, data_type 74 | ): 75 | logger.info(f"Starting T-SNE fitting, saving to {output_dir}") 76 | 77 | all_file_data = [] 78 | all_metadata = [] 79 | for opponent_type, path in activation_paths.items(): 80 | logger.debug(f"Loaded data for {opponent_type} from {path}") 81 | file_data, metadata = _load_and_reshape_single_file(path, opponent_type, data_type) 82 | all_file_data.append(file_data) 83 | all_metadata.append(metadata) 84 | 85 | merged_file_data = np.concatenate(all_file_data) 86 | merged_metadata = pd.concat(all_metadata) 87 | 88 | # Optionally, sub-sample 89 | if num_observations is None: 90 | num_observations = len(merged_metadata) 91 | sub_data = merged_file_data[0:num_observations].reshape(num_observations, 128) 92 | 93 | # Save metadata 94 | metadata_path = os.path.join(output_dir, "metadata.csv") 95 | merged_metadata[0:num_observations].to_csv(metadata_path) 96 | 97 | # Fit t-SNE 98 | tsne_obj = TSNE(n_components=num_components, verbose=1, perplexity=perplexity) 99 | tsne_ids = tsne_obj.fit_transform(sub_data) 100 | 101 | # Save weights 102 | tsne_weights_path = os.path.join(output_dir, "tsne_weights.pkl") 103 | with open(tsne_weights_path, "wb") as fp: 104 | pickle.dump(tsne_obj, fp) 105 | 106 | # Save cluster IDs 107 | cluster_ids_path = os.path.join(output_dir, "cluster_ids.npy") 108 | np.save(cluster_ids_path, tsne_ids) 109 | 110 | logger.info(f"Completed T-SNE fitting, saved to {output_dir}") 111 | 112 | 113 | @fit_model_ex.main 114 | def fit_model( 115 | _run, 116 | ray_server: str, 117 | init_kwargs: Dict[str, Any], 118 | activation_dir: str, 119 | output_root: str, 120 | num_components: int, 121 | num_observations: int, 122 | perplexity: int, 123 | data_type, 124 | ): 125 | try: 126 | ray.init(address=ray_server, **init_kwargs) 127 | 128 | # Find activation paths for each environment & victim-path tuple 129 | stem_pattern = re.compile(r"(.*)_opponent_.*\.npz") 130 | opponent_pattern = re.compile(r".*_opponent_([^\s]+)_[^\s]+\.npz") 131 | activation_paths = {} 132 | for fname in os.listdir(activation_dir): 133 | stem_match = stem_pattern.match(fname) 134 | if stem_match is None: 135 | logger.debug(f"Skipping {fname}") 136 | continue 137 | stem = stem_match.groups()[0] 138 | 139 | opponent_match = opponent_pattern.match(fname) 140 | opponent_type = opponent_match.groups()[0] 141 | 142 | path = osp.join(activation_dir, fname) 143 | activation_paths.setdefault(stem, {})[opponent_type] = path 144 | 145 | # Create temporary output directory (if needed) 146 | tmp_dir = None 147 | if output_root is None: 148 | tmp_dir = tempfile.TemporaryDirectory() 149 | output_root = tmp_dir.name 150 | 151 | # Fit t-SNE and save model weights 152 | results = [] 153 | for stem, paths in activation_paths.items(): 154 | output_dir = osp.join(output_root, stem) 155 | os.makedirs(output_dir) 156 | future = fit_tsne_helper.remote( 157 | paths, output_dir, num_components, num_observations, perplexity, data_type 158 | ) 159 | results.append(future) 160 | 161 | ray.get(results) # block until all jobs have finished 162 | utils.add_artifacts(_run, output_root, ingredient=fit_model_ex) 163 | finally: 164 | # Clean up temporary directory (if needed) 165 | if tmp_dir is not None: 166 | tmp_dir.cleanup() 167 | ray.shutdown() 168 | 169 | 170 | def main(): 171 | observer = FileStorageObserver(osp.join("data", "sacred", "tsne_fit")) 172 | fit_model_ex.observers.append(observer) 173 | fit_model_ex.run_commandline() 174 | 175 | 176 | if __name__ == "__main__": 177 | main() 178 | -------------------------------------------------------------------------------- /src/aprl/activations/tsne/pipeline.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import os.path as osp 4 | 5 | import sacred 6 | from sacred.observers import FileStorageObserver 7 | 8 | from aprl.activations import generate_activations 9 | from aprl.activations.tsne import visualize 10 | from aprl.activations.tsne.fit_model import fit_model, fit_model_ex 11 | from aprl.common import utils 12 | 13 | tsne_ex = sacred.Experiment( 14 | "tsne", 15 | ingredients=[ 16 | generate_activations.generate_activations_ex, 17 | fit_model_ex, 18 | visualize.visualize_ex, 19 | ], 20 | ) 21 | logger = logging.getLogger("aprl.activations.tsne.pipeline") 22 | 23 | 24 | @tsne_ex.config 25 | def activation_storing_config(): 26 | output_root = "data/tsne" # where to produce output 27 | exp_name = "default" # experiment name 28 | 29 | _ = locals() # quieten flake8 unused variable warning 30 | del _ 31 | 32 | 33 | @tsne_ex.named_config 34 | def debug_config(generate_activations, tsne_visualize): 35 | generate_activations = dict(generate_activations) 36 | generate_activations["score_configs"] = [("debug_two_agents",)] 37 | generate_activations["score_update"] = {"score": {"timesteps": 100}} 38 | 39 | tsne_visualize = dict(tsne_visualize) 40 | tsne_visualize["ordering"] = ["Zoo", "Rand"] 41 | 42 | exp_name = "debug" 43 | 44 | _ = locals() # quieten flake8 unused variable warning 45 | del _ 46 | 47 | 48 | @tsne_ex.main 49 | def pipeline(_run, output_root, exp_name): 50 | out_dir = osp.join(output_root, exp_name, utils.make_timestamp()) 51 | os.makedirs(out_dir) 52 | 53 | activation_dst_dir = osp.join(out_dir, "activations") 54 | generate_activations.generate_activations(out_dir=activation_dst_dir) 55 | 56 | model_dir = osp.join(out_dir, "fitted") 57 | fit_model(activation_dir=activation_dst_dir, output_root=model_dir) 58 | 59 | figure_dst_dir = osp.join(out_dir, "figures") 60 | visualize.visualize(model_glob=osp.join(model_dir, "*"), output_root=figure_dst_dir) 61 | 62 | return out_dir 63 | 64 | 65 | def main(): 66 | observer = FileStorageObserver(osp.join("data", "sacred", "tsne")) 67 | tsne_ex.observers.append(observer) 68 | tsne_ex.run_commandline() 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /src/aprl/agents/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F401 2 | 3 | from aprl.agents.monte_carlo import MonteCarloParallel, MonteCarloSingle, MujocoResettableWrapper 4 | from aprl.agents.mujoco_lqr import ( 5 | MujocoFiniteDiffCost, 6 | MujocoFiniteDiffDynamicsBasic, 7 | MujocoFiniteDiffDynamicsPerformance, 8 | ) 9 | -------------------------------------------------------------------------------- /src/aprl/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/common/__init__.py -------------------------------------------------------------------------------- /src/aprl/common/mujoco.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from collections import namedtuple 3 | 4 | import gym 5 | import numpy as np 6 | 7 | 8 | # TODO: Cythonize 9 | class MujocoState(namedtuple("MujocoStateBase", "qpos qvel")): 10 | """Represents state from the MuJoCo simulator needed for planning, 11 | namely position and velocity.""" 12 | 13 | @staticmethod 14 | def from_mjdata(data): 15 | return MujocoState(data.qpos, data.qvel) 16 | 17 | @staticmethod 18 | def from_flattened(flattened, sim): 19 | qpos = flattened[0 : sim.model.nq] 20 | qvel = flattened[sim.model.nq : sim.model.nq + sim.model.nv] 21 | return MujocoState(qpos, qvel) 22 | 23 | def set_mjdata(self, data): 24 | try: 25 | data.qpos[:] = self.qpos 26 | data.qvel[:] = self.qvel 27 | except ValueError: # older mujoco version 28 | data.qpos = self.qpos 29 | data.qvel = self.qvel 30 | 31 | def flatten(self): 32 | return np.concatenate((self.qpos, self.qvel)) 33 | 34 | 35 | class ResettableEnv(gym.Env, abc.ABC): 36 | """A Gym environment that can be reset to an arbitrary state.""" 37 | 38 | @abc.abstractmethod 39 | def get_state(self): 40 | """Returns a serialized representation of the current state.""" 41 | pass 42 | 43 | @abc.abstractmethod 44 | def set_state(self, x): 45 | """Restores the environment to a previously saved state. 46 | :param x: return value of a previous call to get_state().""" 47 | pass 48 | -------------------------------------------------------------------------------- /src/aprl/common/multi_monitor.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | from stable_baselines.bench import Monitor 5 | 6 | from aprl.common.utils import getattr_unwrapped 7 | 8 | 9 | class MultiMonitor(Monitor): 10 | def __init__( 11 | self, 12 | env, 13 | filename, 14 | our_idx=None, 15 | allow_early_resets=False, 16 | reset_keywords=(), 17 | info_keywords=(), 18 | ): 19 | num_agents = getattr_unwrapped(env, "num_agents") 20 | extra_rks = tuple("r{:d}".format(i) for i in range(num_agents)) 21 | super().__init__( 22 | env, 23 | filename, 24 | allow_early_resets=allow_early_resets, 25 | reset_keywords=reset_keywords, 26 | info_keywords=extra_rks + info_keywords, 27 | ) 28 | self.our_idx = our_idx 29 | self.info_keywords = info_keywords 30 | 31 | def step(self, action): 32 | """ 33 | Step the environment with the given action 34 | 35 | :param action: ([int] or [float]) the action 36 | :return: ([int] or [float], [float], [bool], dict) observation, reward, done, information 37 | """ 38 | if self.needs_reset: 39 | raise RuntimeError("Tried to step environment that needs reset") 40 | observation, reward, done, info = self.env.step(action) 41 | self.rewards.append(reward) 42 | if done: 43 | self.needs_reset = True 44 | eplen = len(self.rewards) 45 | ep_rew = np.asarray(self.rewards).sum(axis=0).round(6) 46 | our_rew = float("nan") if self.our_idx is None else ep_rew[self.our_idx] 47 | ep_info = {"r": our_rew, "l": eplen, "t": round(time.time() - self.t_start, 6)} 48 | for i, rew in enumerate(ep_rew): 49 | ep_info["r{:d}".format(i)] = rew 50 | for key in self.info_keywords: 51 | ep_info[key] = info[key] 52 | self.episode_rewards.append(ep_rew) 53 | self.episode_lengths.append(eplen) 54 | self.episode_times.append(time.time() - self.t_start) 55 | ep_info.update(self.current_reset_info) 56 | if self.logger: 57 | self.logger.writerow(ep_info) 58 | self.file_handler.flush() 59 | info["episode"] = ep_info 60 | self.total_steps += 1 61 | return observation, reward, done, info 62 | 63 | def __getattr__(self, name): 64 | return getattr(self.env, name) 65 | -------------------------------------------------------------------------------- /src/aprl/common/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import datetime 3 | import os 4 | 5 | import tensorflow as tf 6 | 7 | 8 | def make_session(graph=None): 9 | tf_config = tf.ConfigProto() 10 | tf_config.gpu_options.allow_growth = True 11 | sess = tf.Session(graph=graph, config=tf_config) 12 | return sess 13 | 14 | 15 | def make_timestamp(): 16 | ISO_TIMESTAMP = "%Y%m%d_%H%M%S" 17 | return datetime.datetime.now().strftime(ISO_TIMESTAMP) 18 | 19 | 20 | def add_artifacts(run, dirname, ingredient=None): 21 | """Convenience function for Sacred to add artifacts inside directory dirname to current run. 22 | 23 | :param run: (sacred.Run) object representing current experiment. Can be captured as `_run`. 24 | :param dirname: (str) root of directory to save. 25 | :param ingredient: (sacred.Ingredient or None) optional, ingredient that generated the 26 | artifacts. Will be used to tag saved files. This is ignored if ingredient 27 | is equal to the currently running experiment. 28 | :return None""" 29 | prefix = "" 30 | if ingredient is not None: 31 | exp_name = run.experiment_info["name"] 32 | ingredient_name = ingredient.path 33 | if exp_name != ingredient_name: 34 | prefix = ingredient_name + "_" 35 | 36 | for root, dirs, files in os.walk(dirname): 37 | for file in files: 38 | path = os.path.join(root, file) 39 | relroot = os.path.relpath(path, dirname) 40 | name = prefix + relroot.replace("/", "_") + "_" + file 41 | run.add_artifact(path, name=name) 42 | 43 | 44 | # TODO(adam): delete this once Sacred issue #498 & #499 are resolved 45 | def sacred_copy(o): 46 | """Perform a deep copy on nested dictionaries and lists. 47 | 48 | If `d` is an instance of dict or list, copies `d` to a dict or list 49 | where the values are recursively copied using `sacred_copy`. Otherwise, `d` 50 | is copied using `copy.deepcopy`. Note this intentionally loses subclasses. 51 | This is useful if e.g. `d` is a Sacred read-only dict. However, it can be 52 | undesirable if e.g. `d` is an OrderedDict. 53 | 54 | :param o: (object) if dict, copy recursively; otherwise, use `copy.deepcopy`. 55 | :return A deep copy of d.""" 56 | if isinstance(o, dict): 57 | return {k: sacred_copy(v) for k, v in o.items()} 58 | elif isinstance(o, list): 59 | return [sacred_copy(v) for v in o] 60 | else: 61 | return copy.deepcopy(o) 62 | 63 | 64 | def getattr_unwrapped(env, attr): 65 | """Get attribute attr from env, or one of the nested environments. 66 | Args: 67 | - env(gym.Wrapper or gym.Env): a (possibly wrapped) environment. 68 | - attr: name of the attribute 69 | Returns: 70 | env.attr, if present, otherwise env.unwrapped.attr and so on recursively. 71 | """ 72 | try: 73 | return getattr(env, attr) 74 | except AttributeError: 75 | if env.env == env: 76 | raise 77 | else: 78 | return getattr_unwrapped(env.env, attr) 79 | -------------------------------------------------------------------------------- /src/aprl/configs/.gitignore: -------------------------------------------------------------------------------- 1 | *-localcfg* 2 | -------------------------------------------------------------------------------- /src/aprl/configs/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | DATA_LOCATION = os.path.abspath(os.environ.get("DATA_LOC", "data")) 4 | -------------------------------------------------------------------------------- /src/aprl/configs/multi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/configs/multi/__init__.py -------------------------------------------------------------------------------- /src/aprl/configs/multi/common.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from aprl.envs import gym_compete 5 | 6 | BANSAL_ENVS = ["multicomp/" + env for env in gym_compete.POLICY_STATEFUL.keys()] 7 | BANSAL_ENVS += ["multicomp/SumoHumansAutoContact-v0", "multicomp/SumoAntsAutoContact-v0"] 8 | BANSAL_GOOD_ENVS = [ # Environments well-suited to adversarial attacks 9 | "multicomp/KickAndDefend-v0", 10 | "multicomp/SumoHumansAutoContact-v0", 11 | "multicomp/SumoAntsAutoContact-v0", 12 | "multicomp/YouShallNotPassHumans-v0", 13 | ] 14 | 15 | 16 | def get_adversary_paths(): 17 | """Load adversary paths from ADVERSARY_PATHS environment variable. 18 | 19 | We can't make this a Sacred config param since Sacred named_configs execute before configs. 20 | """ 21 | path = os.getenv("ADVERSARY_PATHS") 22 | if path is None: 23 | raise ValueError( 24 | "Specify path to JSON file containing adversaries in ADVERSARY_PATHS " 25 | "environment variable. (Run 'experiments/modelfree/highest_win_rate.py'" 26 | "to generate this.)" 27 | ) 28 | with open(path, "r") as f: 29 | return json.load(f)["policies"] 30 | -------------------------------------------------------------------------------- /src/aprl/configs/noise/SumoHumans-cond.json: -------------------------------------------------------------------------------- 1 | { 2 | "metric": "sparse", 3 | "min_wait": 3000, 4 | "window_size": 1000, 5 | "start_val": 2 6 | } 7 | -------------------------------------------------------------------------------- /src/aprl/configs/noise/SumoHumans.json: -------------------------------------------------------------------------------- 1 | { 2 | "anneal_frac": 0.5, 3 | "param": 0.5 4 | } 5 | -------------------------------------------------------------------------------- /src/aprl/configs/noise/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "anneal_frac": 0.5, 3 | "param": 0.5 4 | } 5 | -------------------------------------------------------------------------------- /src/aprl/configs/ray/aws.yaml: -------------------------------------------------------------------------------- 1 | cluster_name: aprl 2 | min_workers: 0 3 | max_workers: 10 4 | initial_workers: 0 5 | target_utilization_fraction: 0.8 6 | idle_timeout_minutes: 5 7 | 8 | docker: 9 | image: humancompatibleai/adversarial_policies:latest 10 | container_name: ray 11 | run_options: 12 | # NOTE: MUJOCO_KEY must be set in environment (it is in our AMI) 13 | - "--env MUJOCO_KEY=${MUJOCO_KEY}" 14 | # Open port 6006 for TensorBoard 15 | - "-p 6006:6006" 16 | # These are to work around Ray bug #4403. 17 | - "-v /home/ubuntu/ray_results:/home/ubuntu/ray_results" 18 | - "-v /home/ubuntu/aws_private:/home/ubuntu/aws_private" 19 | - "--env LOGNAME=ubuntu" 20 | - "--env HOME=/home/ubuntu" 21 | - "--env DATA_LOC=/home/ubuntu/aws_private" 22 | - "--env ADVERSARY_PATHS=/home/ubuntu/aws_private/multi_train/paper/highest_win_policies_and_rates.json" 23 | 24 | provider: 25 | type: aws 26 | region: us-west-2 27 | availability_zone: us-west-2a,us-west-2b,us-west-2c,us-west-2d 28 | cache_stopped_nodes: False # TODO(adam): remove when Ray issue #6128 is closed 29 | 30 | # How Ray will authenticate with newly launched nodes. 31 | auth: 32 | ssh_user: ubuntu 33 | ssh_private_key: ~/.ssh/adversarial-policies 34 | 35 | head_node: 36 | InstanceType: c5.4xlarge 37 | ImageId: ami-03cb2176bb0ac9ec7 # CHAI Ubuntu 18.04 38 | KeyName: Adversarial Policies 39 | BlockDeviceMappings: 40 | - DeviceName: /dev/sda1 41 | Ebs: 42 | VolumeSize: 50 43 | VolumeType: gp2 44 | 45 | worker_nodes: 46 | InstanceType: c5.9xlarge # half a machine, less likely to get preempted than full c5.18xlarge 47 | ImageId: ami-03cb2176bb0ac9ec7 # CHAI Ubuntu 18.04 48 | KeyName: Adversarial Policies 49 | BlockDeviceMappings: 50 | - DeviceName: /dev/sda1 51 | Ebs: 52 | VolumeSize: 50 53 | VolumeType: gp2 54 | InstanceMarketOptions: 55 | MarketType: spot 56 | IamInstanceProfile: 57 | Name: EC2AccessS3 58 | 59 | # List of shell commands to run to set up nodes. 60 | initialization_commands: # before entering Docker 61 | - "curl -L -o $HOME/goofys https://github.com/kahing/goofys/releases/latest/download/goofys && chmod a+x $HOME/goofys" 62 | - "[[ -d /home/ubuntu/aws_private ]] || sudo mkdir /home/ubuntu/aws_private" 63 | - sudo $HOME/goofys -o allow_other --file-mode=0666 --dir-mode=0777 adversarial-policies /home/ubuntu/aws_private 64 | # TODO(adam): remove once Ray #6111 merged 65 | - docker pull humancompatibleai/adversarial_policies:latest 66 | setup_commands: 67 | # Part of Ray bug #4403 workaround. 68 | - ln -sf /root/.mujoco /home/ubuntu/.mujoco 69 | - ln -sf /home/ubuntu/ray_bootstrap_key.pem /root/ray_bootstrap_key.pem 70 | head_setup_commands: [] 71 | worker_setup_commands: [] 72 | 73 | head_start_ray_commands: 74 | # Set up alarm for if node is left running by mistake 75 | # Then (re)start Ray. 76 | - > 77 | DEBIAN_FRONTEND=noninteractive apt-get install -y cloud-guest-utils && 78 | git remote set-url origin https://github.com/HumanCompatibleAI/adversarial-policies.git && 79 | git pull && 80 | . /adversarial-policies/ci/prepare_env.sh && 81 | python3 setup.py sdist bdist_wheel && 82 | pip install --force dist/aprl-*.whl && 83 | /adversarial-policies/scripts/aws/cloudwatch.sh && 84 | ray stop && 85 | ulimit -n 65536 && 86 | ray start --head --redis-port=6379 --object-manager-port=8076 \ 87 | --autoscaling-config=~/ray_bootstrap_config.yaml --num-cpus=8 88 | 89 | worker_start_ray_commands: 90 | # (Re)start spot termination monitor. 91 | # Then (re)start Ray. 92 | - > 93 | (pkill -x -f "python /adversarial-policies/scripts/aws/termination.py" || true) && 94 | git remote set-url origin https://github.com/HumanCompatibleAI/adversarial-policies.git && 95 | git pull && 96 | . /adversarial-policies/ci/prepare_env.sh && 97 | python3 setup.py sdist bdist_wheel && 98 | pip install --force dist/aprl-*.whl && 99 | ray stop && 100 | (python /adversarial-policies/scripts/aws/termination.py >> /tmp/aws_termination.log 2>&1 &) && 101 | ulimit -n 65536 && 102 | ray start --redis-address=$RAY_HEAD_IP:6379 --object-manager-port=8076 103 | -------------------------------------------------------------------------------- /src/aprl/configs/ray/baremetal.yaml: -------------------------------------------------------------------------------- 1 | cluster_name: adversarial-policies 2 | min_workers: 0 3 | max_workers: 0 4 | initial_workers: 0 5 | target_utilization_fraction: 0.8 6 | idle_timeout_minutes: 5 7 | 8 | docker: 9 | image: humancompatibleai/adversarial_policies:latest 10 | container_name: ray 11 | run_options: 12 | # NOTE: MUJOCO_KEY must be set in environment (it is in our AMI) 13 | - "--env MUJOCO_KEY=${MUJOCO_KEY}" 14 | # Open port 6006 for TensorBoard 15 | - "-p 6006:6006" 16 | 17 | provider: 18 | type: local 19 | head_ip: YOUR HOSTNAME 20 | worker_ips: 21 | - HOSTNAME 1 22 | - HOSTNAME 2 23 | 24 | auth: 25 | ssh_user: YOUR USERNAME 26 | ssh_private_key: ~/.ssh/id_rsa 27 | 28 | head_node: {} 29 | worker_nodes: {} 30 | 31 | # List of shell commands to run to set up nodes. 32 | initialization_commands: # before entering Docker 33 | # TODO(adam): remove once Ray #6111 merged 34 | - docker pull humancompatibleai/adversarial_policies:latest 35 | setup_commands: [] 36 | head_setup_commands: [] 37 | worker_setup_commands: [] 38 | head_start_ray_commands: 39 | - > 40 | git remote set-url origin https://github.com/HumanCompatibleAI/adversarial-policies.git && 41 | git pull && 42 | . /adversarial-policies/ci/prepare_env.sh && 43 | python3 setup.py sdist bdist_wheel && 44 | pip install dist/aprl-*.whl && 45 | ray stop && 46 | ulimit -n 65536 && 47 | ray start --head --redis-port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml 48 | worker_start_ray_commands: 49 | - > 50 | git remote set-url origin https://github.com/HumanCompatibleAI/adversarial-policies.git && 51 | git pull && 52 | . /adversarial-policies/ci/prepare_env.sh && 53 | python3 setup.py sdist bdist_wheel && 54 | pip install dist/aprl-*.whl && 55 | ray stop && 56 | ulimit -n 65536 && 57 | ray start --redis-address=$RAY_HEAD_IP:6379 --object-manager-port=8076 58 | -------------------------------------------------------------------------------- /src/aprl/configs/rew/Humanoid.json: -------------------------------------------------------------------------------- 1 | { 2 | "weights": { 3 | "dense": { 4 | "reward_linvel": 0.1, 5 | "reward_quadctrl": 0.1, 6 | "reward_alive": 0.1, 7 | "reward_impact": 0.1 8 | }, 9 | "sparse": { 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/aprl/configs/rew/HumanoidStand.json: -------------------------------------------------------------------------------- 1 | { 2 | "weights": { 3 | "dense": { 4 | "reward_linvel": 0, 5 | "reward_quadctrl": 0.1, 6 | "reward_alive": 0.1, 7 | "reward_impact": 0.1 8 | }, 9 | "sparse": { 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/aprl/configs/rew/SumoHumans-cond.json: -------------------------------------------------------------------------------- 1 | { 2 | "metric": "length", 3 | "window_size": 100, 4 | "min_wait": 100, 5 | "thresh": 100 6 | } 7 | -------------------------------------------------------------------------------- /src/aprl/configs/rew/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "weights": { 3 | "dense": { 4 | "reward_move": 0.1 5 | }, 6 | "sparse": { 7 | "reward_remaining": 0.01 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /src/aprl/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa: F401 2 | import collections 3 | 4 | import gym 5 | from gym.envs import registration 6 | from pkg_resources import resource_filename 7 | 8 | 9 | def register(id, **kwargs): 10 | """Idempotent version of gym.envs.registration.registry. 11 | 12 | Needed since aprl.envs can get imported multiple times, e.g. when deserializing policies. 13 | """ 14 | try: 15 | existing_spec = registration.spec(id) 16 | new_spec = registration.EnvSpec(id, **kwargs) 17 | assert existing_spec.__dict__ == new_spec.__dict__ 18 | except gym.error.UnregisteredEnv: # not previously registered 19 | registration.register(id, **kwargs) 20 | 21 | 22 | # Low-dimensional multi-agent environments 23 | 24 | register( 25 | id="aprl/CrowdedLine-v0", 26 | entry_point="aprl.envs.crowded_line:CrowdedLineEnv", 27 | max_episode_steps=200, 28 | reward_threshold=0, 29 | kwargs={"num_agents": 3}, 30 | ) 31 | 32 | register( 33 | id="aprl/IteratedMatchingPennies-v0", 34 | entry_point="aprl.envs.matrix_game:IteratedMatchingPenniesEnv", 35 | max_episode_steps=200, 36 | reward_threshold=100, 37 | ) 38 | 39 | register( 40 | id="aprl/RockPaperScissors-v0", 41 | entry_point="aprl.envs.matrix_game:RockPaperScissorsEnv", 42 | max_episode_steps=200, 43 | reward_threshold=100, 44 | ) 45 | 46 | # Our variants of environments from gym_compete 47 | 48 | register( 49 | id="multicomp/SumoHumansAutoContact-v0", 50 | entry_point="aprl.envs.sumo_auto_contact:SumoAutoContactEnv", 51 | kwargs={ 52 | "agent_names": ["humanoid_fighter", "humanoid_fighter"], 53 | "scene_xml_path": resource_filename( 54 | "gym_compete", "new_envs/assets/world_body_arena.humanoid_body.humanoid_body.xml" 55 | ), 56 | "init_pos": [(-1, 0, 1.4), (1, 0, 1.4)], 57 | "max_episode_steps": 500, 58 | "min_radius": 1.5, 59 | "max_radius": 3.5, 60 | }, 61 | ) 62 | 63 | register( 64 | id="multicomp/SumoAntsAutoContact-v0", 65 | entry_point="aprl.envs.sumo_auto_contact:SumoAutoContactEnv", 66 | kwargs={ 67 | "agent_names": ["ant_fighter", "ant_fighter"], 68 | "scene_xml_path": resource_filename( 69 | "gym_compete", "new_envs/assets/world_body_arena.ant_body.ant_body.xml" 70 | ), 71 | "world_xml_path": resource_filename("gym_compete", "new_envs/assets/world_body_arena.xml"), 72 | "init_pos": [(-1, 0, 2.5), (1, 0, 2.5)], 73 | "max_episode_steps": 500, 74 | "min_radius": 2.5, 75 | "max_radius": 4.5, 76 | }, 77 | ) 78 | 79 | 80 | # Which index does the victim play in? 81 | # This is really an experiment parameter rather than an environment parameter. 82 | # However, it's used so widely (training, evaluation, figure and video generation) and is 83 | # held fixed across all experiments it's convenient to treat it as if it is static. 84 | VICTIM_INDEX = collections.defaultdict(lambda: 0) 85 | VICTIM_INDEX.update( 86 | { 87 | # YouShallNotPass: 1 is the walker, 0 is the blocker agent. 88 | # An adversarial walker makes little sense, but a blocker can be adversarial. 89 | "multicomp/YouShallNotPassHumans-v0": 1, 90 | } 91 | ) 92 | -------------------------------------------------------------------------------- /src/aprl/envs/crowded_line.py: -------------------------------------------------------------------------------- 1 | """Agents want to be close to 'food' but not be too crowded on a 1D line.""" 2 | 3 | from gym.spaces import Box, Tuple 4 | import numpy as np 5 | 6 | from aprl.envs.multi_agent import MultiAgentEnv 7 | 8 | 9 | class CrowdedLineEnv(MultiAgentEnv): 10 | dt = 1e-1 11 | 12 | """Agents live on a line in [-1,1]. States consist of a position and velocity 13 | for each agent, with actions consisting of acceleration.""" 14 | 15 | def __init__(self, num_agents): 16 | agent_action_space = Box(-1.0, 1.0, shape=(1,), dtype=np.float32) 17 | agent_observation_space = Box(-1.0, 1.0, shape=(2,), dtype=np.float32) 18 | self.action_space = Tuple(tuple(agent_action_space for _ in range(num_agents))) 19 | self.observation_space = Tuple(tuple(agent_observation_space for _ in range(num_agents))) 20 | super().__init__(num_agents=num_agents) 21 | self.np_random = np.random.RandomState() 22 | 23 | def _get_obs(self): 24 | return tuple((np.array(row) for row in self.state)) 25 | 26 | def reset(self): 27 | self.state = self.np_random.rand(self.num_agents, 2) * 2 - 1 28 | return self._get_obs() 29 | 30 | def step(self, action_n): 31 | # Dynamics 32 | positions = self.state[:, 0] 33 | velocities = self.state[:, 1] 34 | positions += velocities * self.dt 35 | velocities += np.array(action_n).flatten() 36 | self.state = np.clip(self.state, -1, 1) 37 | 38 | # Reward: zero-sum game, agents want to be close to food items that other 39 | # agents are not close to. They should end up spreading out to cover the line. 40 | # One food item per agent, equally spaced: 41 | # at [-1, -1 + 2/(N-1), ..., 0, 1 - 2/(N-1), 1] 42 | # Each agent induces a quasi-Gaussian around its current position, 43 | # and gets a weighted average of the value of each of the food items. 44 | # The value of the food item is inversely proportional to the weights 45 | # induced by the agents. 46 | foods = np.arange(self.num_agents) * 2 / (self.num_agents - 1) - 1 47 | positions = positions.reshape(self.num_agents, 1) 48 | foods = foods.reshape(1, self.num_agents) 49 | # (num_agents, num_agents) matrix where rows are agents and columns food 50 | distance = positions - foods 51 | weights = np.exp(-np.square(distance)) 52 | food_values = 1 / weights.sum(axis=0) 53 | rewards = tuple(weights.dot(food_values) - 1) 54 | 55 | obs = self._get_obs() 56 | done = False 57 | info = {} 58 | return obs, rewards, done, info 59 | 60 | def seed(self, seed): 61 | self.np_random.seed(seed) 62 | 63 | def render(self, mode="human"): 64 | return ", ".join(["{:3f} @ {:3f}".format(pos, vel) for pos, vel in self.state]) 65 | -------------------------------------------------------------------------------- /src/aprl/envs/matrix_game.py: -------------------------------------------------------------------------------- 1 | """Two-player, normal-form games with symmetric action spaces.""" 2 | 3 | from gym.spaces import Discrete, Tuple 4 | import numpy as np 5 | 6 | from aprl.envs.multi_agent import MultiAgentEnv 7 | 8 | 9 | class MatrixGameEnv(MultiAgentEnv): 10 | """Models two-player, normal-form games with symmetrically sized action space.""" 11 | 12 | metadata = {"render.modes": ["human"]} 13 | ACTION_TO_SYM = None 14 | 15 | def __init__(self, num_actions, payoff): 16 | """payoff_matrices must be a pair of num_actions*num_actions payoff matrices.""" 17 | agent_space = Discrete(num_actions) 18 | overall_space = Tuple((agent_space, agent_space)) 19 | self.action_space = overall_space 20 | self.observation_space = overall_space 21 | super().__init__(num_agents=2) 22 | 23 | payoff = np.array(payoff) 24 | assert payoff.shape == (2, num_actions, num_actions) 25 | self.payoff = payoff 26 | 27 | def step(self, action_n): 28 | assert len(action_n) == 2 29 | i, j = action_n 30 | # observation is the other players move 31 | self.obs_n = (j, i) 32 | rew_n = self.payoff[:, i, j] 33 | done = False 34 | return self.obs_n, rew_n, done, dict() 35 | 36 | def reset(self): 37 | # State is previous players action, so this doesn't make much sense; 38 | # just assume (0, 0) is start. 39 | self.obs_n = (0, 0) 40 | return self.obs_n 41 | 42 | def seed(self, seed=None): 43 | # No-op, there is no randomness in this environment. 44 | return 45 | 46 | def render(self, mode="human"): 47 | # note observations are flipped -- observe other agents actions 48 | p2, p1 = self.obs_n 49 | if self.ACTION_TO_SYM is not None: 50 | p1, p2 = tuple(map(self.ACTION_TO_SYM.get, (p1, p2))) 51 | return f"P1: {p1}, P2: {p2}" 52 | 53 | 54 | class IteratedMatchingPenniesEnv(MatrixGameEnv): 55 | ACTION_TO_SYM = {0: "H", 1: "T"} 56 | 57 | def __init__(self): 58 | p1_payoff = np.array([[1, -1], [-1, 1]]) 59 | payoff = [p1_payoff, -p1_payoff] 60 | return super().__init__(num_actions=2, payoff=payoff) 61 | 62 | 63 | class RockPaperScissorsEnv(MatrixGameEnv): 64 | ACTION_TO_SYM = {0: "R", 1: "P", 2: "S"} 65 | 66 | def __init__(self): 67 | p1_payoff = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]]) 68 | payoff = [p1_payoff, -p1_payoff] 69 | return super().__init__(num_actions=3, payoff=payoff) 70 | -------------------------------------------------------------------------------- /src/aprl/envs/observation_masking.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from gym.envs import registration 4 | from gym_compete.new_envs.agents.agent import Agent 5 | from gym_compete.new_envs.multi_agent_env import MultiAgentEnv 6 | import numpy as np 7 | 8 | 9 | def make_mask_from_class(cls): 10 | if not issubclass(cls, Agent): 11 | raise TypeError("You have passed in '{cls}', expected subclass of 'Agent'") 12 | 13 | class AdversaryMaskedGymCompeteAgent(cls): 14 | def __init__( 15 | self, 16 | agent_to_mask, 17 | agents_to_hide=None, 18 | masking_type="initialization", 19 | noise_magnitude=None, 20 | ): 21 | if not isinstance(agent_to_mask, cls): 22 | raise TypeError( 23 | f"You have passed in '{type(agent_to_mask)}', " f"requires instance of '{cls}'" 24 | ) 25 | 26 | self.agent_to_mask = agent_to_mask 27 | self.agents_to_hide = agents_to_hide 28 | self.noise_magnitude = noise_magnitude 29 | self.masking_type = masking_type 30 | if self.masking_type == "additive_noise" and self.noise_magnitude is None: 31 | raise ValueError( 32 | "To create a noisy observation masker, you must specify magnitude" 33 | "of desired Gaussian noise" 34 | ) 35 | 36 | other_agent_qpos = super(AdversaryMaskedGymCompeteAgent, self).get_other_agent_qpos() 37 | self.initial_values = {} 38 | for other_agent_id in other_agent_qpos: 39 | self.initial_values[other_agent_id] = other_agent_qpos[other_agent_id] 40 | self.initial_other_qpos = super(AdversaryMaskedGymCompeteAgent, self).get_other_qpos() 41 | 42 | def _get_masking_given_initial(self, initial_position_value, true_current_position): 43 | if self.masking_type == "zeros": 44 | return np.zeros_like(initial_position_value) 45 | elif self.masking_type == "debug": 46 | return np.full_like(initial_position_value, fill_value=-4.2) 47 | elif self.masking_type == "initialization": 48 | return initial_position_value 49 | elif self.masking_type == "additive_noise": 50 | noise = np.random.normal( 51 | scale=self.noise_magnitude, size=initial_position_value.shape 52 | ) 53 | return true_current_position + noise 54 | else: 55 | raise ValueError(f"Unsupported masking type '{self.masking_type}'") 56 | 57 | def get_other_agent_qpos(self): 58 | outp = {} 59 | for other_agent_id in self.initial_values: 60 | if self.agents_to_hide is None or other_agent_id in self.agents_to_hide: 61 | true_current_pos = self.agent_to_mask.get_other_agent_qpos()[other_agent_id] 62 | outp[other_agent_id] = self._get_masking_given_initial( 63 | initial_position_value=self.initial_values[other_agent_id], 64 | true_current_position=true_current_pos, 65 | ) 66 | return outp 67 | 68 | def get_other_qpos(self): 69 | true_current_pos = self.agent_to_mask.get_other_qpos() 70 | return self._get_masking_given_initial( 71 | initial_position_value=self.initial_other_qpos, 72 | true_current_position=true_current_pos, 73 | ) 74 | 75 | def __getattr__(self, item): 76 | return getattr(self.agent_to_mask, item) 77 | 78 | return AdversaryMaskedGymCompeteAgent 79 | 80 | 81 | def make_mask_for_env(env_name, agent_index): 82 | spec = registration.registry.spec(env_name) 83 | agent_names = spec._kwargs["agent_names"] 84 | agent_name = agent_names[agent_index] 85 | agent_cls = MultiAgentEnv.AGENT_MAP[agent_name][1] 86 | return make_mask_from_class(agent_cls) 87 | 88 | 89 | def make_mask_agent_wrappers(env_name, agent_index, **kwargs): 90 | masker = make_mask_for_env(env_name, agent_index) 91 | masker = functools.partial(masker, **kwargs) 92 | return {agent_index: masker} 93 | -------------------------------------------------------------------------------- /src/aprl/envs/sumo_auto_contact.py: -------------------------------------------------------------------------------- 1 | from gym_compete.new_envs import SumoEnv 2 | 3 | 4 | class SumoAutoContactEnv(SumoEnv): 5 | """ 6 | Same as SumoEnv but agents automatically contact one another. 7 | This is so that falling or exiting the stage without touching 8 | the opponent counts as a loss and not a tie. 9 | """ 10 | 11 | def reset(self, margins=None, version=None): 12 | ob = super(SumoAutoContactEnv, self).reset(margins, version) 13 | self.agent_contacts = True 14 | return ob 15 | -------------------------------------------------------------------------------- /src/aprl/multi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/multi/__init__.py -------------------------------------------------------------------------------- /src/aprl/multi/common.py: -------------------------------------------------------------------------------- 1 | """Configuration that is common between multi.train and multi.score. 2 | 3 | In particular, configures sensible defaults for upload directory and Ray server 4 | depending on if running on EC2 or baremetal. 5 | """ 6 | 7 | import functools 8 | import getpass 9 | import hashlib 10 | import json 11 | import os 12 | import os.path as osp 13 | import shlex 14 | import socket 15 | import subprocess 16 | from typing import Any, Dict 17 | import urllib 18 | import uuid 19 | 20 | import ray 21 | from ray import tune 22 | 23 | from aprl.common import utils 24 | 25 | 26 | def _detect_ec2(): 27 | """Auto-detect if we are running on EC2.""" 28 | try: 29 | EC2_ID_URL = "http://169.254.169.254/latest/dynamic/instance-identity/document" 30 | with urllib.request.urlopen(EC2_ID_URL, timeout=3) as f: 31 | response = f.read().decode() 32 | if "availabilityZone" in response: 33 | return True 34 | else: 35 | raise ValueError(f"Received unexpected response from '{EC2_ID_URL}'") 36 | except urllib.error.URLError: 37 | return False 38 | 39 | 40 | def _rsync_func(local_dir, remote_uri): 41 | """rsync data from worker to a remote location (by default the driver).""" 42 | # SOMEDAY: This function blocks until syncing completes, which is unfortunate. 43 | # If we instead specified a shell command, ray.tune._LogSyncer would run it asynchronously. 44 | # But we need to do a two-stage command, creating the directories first, because rsync will 45 | # balk if destination directory does not exist; so no easy way to do that. 46 | remote_host, ssh_key, *remainder = remote_uri.split(":") 47 | remote_dir = ":".join(remainder) # remote directory may contain : 48 | remote_dir = shlex.quote(remote_dir) # make safe for SSH/rsync call 49 | 50 | ssh_command = ["ssh", "-o", "StrictHostKeyChecking=no", "-i", ssh_key] 51 | ssh_mkdir = ssh_command + [remote_host, "mkdir", "-p", remote_dir] 52 | subprocess.run(ssh_mkdir, check=True) 53 | 54 | rsync = [ 55 | "rsync", 56 | "-rlptv", 57 | "-e", 58 | " ".join(ssh_command), 59 | f"{local_dir}/", 60 | f"{remote_host}:{remote_dir}", 61 | ] 62 | subprocess.run(rsync) 63 | 64 | 65 | def make_sacred(ex, worker_name, worker_fn): 66 | @ex.config 67 | def default_config(): 68 | spec = {} # Ray spec 69 | platform = None # hosting: 'baremetal' or 'ec2' 70 | s3_bucket = None # results storage on 'ec2' platform 71 | baremetal = {} # config options for 'baremetal' platform 72 | local_dir = None # results storage on 'local' platform 73 | ray_server = None # if None, start cluster on local machine 74 | upload_root = None # root of upload_dir 75 | init_kwargs = {} # options for ray.init 76 | exp_name = "default" # experiment name 77 | 78 | _ = locals() # quieten flake8 unused variable warning 79 | del _ 80 | 81 | @ex.config 82 | def ec2_config(platform, s3_bucket, spec): 83 | """When running on AWS EC2 cloud. 84 | 85 | If you are not the authors of this project, you will need to override s3_bucket.""" 86 | if platform is None: 87 | if _detect_ec2(): 88 | platform = "ec2" 89 | 90 | if platform == "ec2": 91 | # We're running on EC2 92 | if s3_bucket is None: 93 | s3_bucket = "adversarial-policies" 94 | 95 | spec["sync_config"] = {"upload_dir": f"s3://{s3_bucket}/"} 96 | ray_server = "localhost:6379" 97 | 98 | _ = locals() # quieten flake8 unused variable warning 99 | del _ 100 | 101 | @ex.config 102 | def baremetal_config(platform, baremetal, spec): 103 | """When running in bare-metal Ray cluster (i.e. not in cloud). 104 | 105 | Assumes we're running on the head node. Requires the worker have permission to rsync 106 | to the head node. The intended config is they run with an SSH key that allows login to 107 | the user from any machine in the cluster.""" 108 | if platform is None: 109 | if osp.exists(osp.expanduser("~/ray_bootstrap_config.yaml")): 110 | platform = "baremetal" 111 | 112 | if platform == "baremetal": 113 | baremetal = dict(baremetal) 114 | if "ssh_key" not in baremetal: 115 | baremetal["ssh_key"] = osp.expanduser("~/ray_bootstrap_key.pem") 116 | if "host" not in baremetal: 117 | baremetal["host"] = f"{getpass.getuser()}@{socket.getfqdn()}" 118 | if "dir" not in baremetal: 119 | baremetal["dir"] = osp.expanduser("~/adversarial-policies/data") 120 | 121 | spec["sync_config"] = { 122 | "upload_dir": ":".join([baremetal["host"], baremetal["ssh_key"], baremetal["dir"]]), 123 | "sync_to_cloud": tune.function(_rsync_func), 124 | } 125 | ray_server = "localhost:6379" 126 | 127 | _ = locals() # quieten flake8 unused variable warning 128 | del _ 129 | 130 | @ex.config 131 | def local_config(platform, local_dir, spec): 132 | if platform is None: 133 | # No platform specified; assume local if no previous config autodetected. 134 | platform = "local" 135 | 136 | if platform == "local": 137 | if local_dir is None: 138 | local_dir = osp.abspath(osp.join(os.getcwd(), "data")) 139 | spec["sync_config"] = { 140 | "sync_to_cloud": ("mkdir -p {target} && " "rsync -rlptv {source}/ {target}"), 141 | "upload_dir": local_dir, 142 | } 143 | 144 | @ex.capture 145 | def run( 146 | base_config: Dict[str, Any], 147 | ray_server: str, 148 | init_kwargs: Dict[str, Any], 149 | exp_name: str, 150 | spec: Dict[str, Any], 151 | ) -> ray.tune.ExperimentAnalysis: 152 | ray.init(address=ray_server, **init_kwargs) 153 | 154 | # We have to register the function we're going to call with Ray. 155 | # We partially apply worker_fn, so it's different for each experiment. 156 | # Compute a hash based on the config to make sure it has a unique name! 157 | # Note Ray does let you pass a worker_fn directly without registering, but then 158 | # it registers using the function name (which may not be unique). 159 | cfg = { 160 | # ReadOnlyDict's aren't serializable: see sacred issue #499 161 | "base_config": utils.sacred_copy(base_config), 162 | "exp_name": exp_name, 163 | } 164 | cfg_str = json.dumps(cfg) 165 | hasher = hashlib.md5() # we are not worried about security here 166 | hasher.update(cfg_str.encode("utf8")) 167 | cfg_hash = hasher.hexdigest() 168 | 169 | trainable_name = f"{worker_name}-{cfg_hash}" 170 | base_config = utils.sacred_copy(base_config) 171 | trainable_fn = functools.partial(worker_fn, base_config) 172 | tune.register_trainable(trainable_name, trainable_fn) 173 | 174 | exp_id = f"{ex.path}/{exp_name}/{utils.make_timestamp()}-{uuid.uuid4().hex}" 175 | spec = utils.sacred_copy(spec) 176 | 177 | # Disable TensorBoard logger: fails due to the spec containing string variables. 178 | tune_loggers = [tune.logger.JsonLogger, tune.logger.CSVLogger] 179 | sync_config = None 180 | if "sync_config" in spec: 181 | sync_config = tune.SyncConfig(**spec["sync_config"]) 182 | try: 183 | result = tune.run( 184 | trainable_name, 185 | name=exp_id, 186 | config=spec["config"], 187 | sync_config=sync_config, 188 | loggers=tune_loggers, 189 | **spec["run_kwargs"], 190 | ) 191 | finally: 192 | ray.shutdown() 193 | 194 | return result, exp_id 195 | 196 | return run 197 | -------------------------------------------------------------------------------- /src/aprl/multi/common_worker.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | import sacred 4 | 5 | 6 | def flatten_config(config): 7 | """Take dict with ':'-separated keys and values or tuples of values, 8 | flattening to single key-value pairs. 9 | 10 | Example: _flatten_config({'a:b': (1, 2), 'c': 3}) -> {'a: 1, 'b': 2, 'c': 3}.""" 11 | new_config = {} 12 | for ks, vs in config.items(): 13 | ks = ks.split(":") 14 | if len(ks) == 1: 15 | vs = (vs,) 16 | 17 | for k, v in zip(ks, vs): 18 | assert k not in new_config, f"duplicate key '{k}'" 19 | new_config[k] = v 20 | 21 | return new_config 22 | 23 | 24 | def update(d, u): 25 | """Recursive dictionary update.""" 26 | for k, v in u.items(): 27 | if isinstance(v, collections.Mapping): 28 | d[k] = update(d.get(k, {}), v) 29 | else: 30 | d[k] = v 31 | return d 32 | 33 | 34 | def fix_sacred_capture(): 35 | """Workaround for Sacred stdout capture issue #195 and Ray issue #5718.""" 36 | # TODO(adam): remove once Sacred issue #195 is closed 37 | sacred.SETTINGS.CAPTURE_MODE = "sys" 38 | -------------------------------------------------------------------------------- /src/aprl/multi/score.py: -------------------------------------------------------------------------------- 1 | """Hyperparameter search for train.py using Ray Tune.""" 2 | 3 | import json 4 | import logging 5 | import math 6 | import os 7 | import os.path as osp 8 | import shutil 9 | import tempfile 10 | 11 | from ray import tune 12 | from sacred import Experiment 13 | from sacred.observers import FileStorageObserver 14 | 15 | from aprl.configs.multi.score import make_configs 16 | from aprl.envs import VICTIM_INDEX 17 | from aprl.envs.gym_compete import env_name_to_canonical 18 | from aprl.multi import common, score_worker 19 | from aprl.score_agent import score_ex 20 | 21 | multi_score_ex = Experiment("multi_score", ingredients=[score_ex]) 22 | pylog = logging.getLogger("aprl.multi.score") 23 | 24 | # Load common configs (e.g. upload directories) and define the run command 25 | run = common.make_sacred(multi_score_ex, "score", score_worker.score_worker) 26 | 27 | # Load named configs for individual experiments (these change a lot, so keep out of this file) 28 | make_configs(multi_score_ex) 29 | 30 | 31 | @multi_score_ex.config 32 | def default_config(score): 33 | spec = { # experiment specification 34 | "run_kwargs": {"resources_per_trial": {"cpu": math.ceil(score["num_env"] / 2)}}, 35 | "config": {}, 36 | } 37 | save_path = None # path to save JSON results. If None, do not save. 38 | 39 | _ = locals() # quieten flake8 unused variable warning 40 | del _ 41 | 42 | 43 | @score_ex.config 44 | def score_config(): 45 | render = False 46 | videos = False 47 | 48 | _ = locals() # quieten flake8 unused variable warning 49 | del _ 50 | 51 | 52 | @multi_score_ex.named_config 53 | def debug_config(score): 54 | """Try zero-agent and random-agent against pre-trained zoo policies.""" 55 | score = dict(score) 56 | score["episodes"] = 1 57 | score["agent_a_type"] = "zoo" 58 | score["agent_b_type"] = "zoo" 59 | spec = {"config": {"agent_a_path": tune.grid_search(["1", "2"])}} 60 | exp_suffix = "debug" 61 | _ = locals() # quieten flake8 unused variable warning 62 | del _ 63 | 64 | 65 | def _remap_keys(d): 66 | return [{"k": k, "v": v} for k, v in d.items()] 67 | 68 | 69 | @multi_score_ex.main 70 | def multi_score(score, save_path): 71 | f = None 72 | try: 73 | tmp_path = None 74 | if save_path is not None: 75 | f = open(save_path, "w") # open it now so we fail fast if file is unwriteable 76 | else: 77 | fd, tmp_path = tempfile.mkstemp(prefix="multi_score") 78 | f = os.fdopen(fd, mode="w") 79 | save_path = tmp_path 80 | 81 | analysis, exp_id = run(base_config=score) 82 | trials = analysis.trials 83 | additional_index_keys = score.get("index_keys", []) 84 | results = {} 85 | for trial in trials: 86 | idx = trial.last_result["idx"] 87 | cols = ["env_name", "agent_a_type", "agent_a_path", "agent_b_type", "agent_b_path"] 88 | cols += additional_index_keys 89 | key = tuple(idx[col] for col in cols) 90 | results[key] = trial.last_result["score"] 91 | 92 | json.dump(_remap_keys(results), f) 93 | finally: 94 | if f is not None: 95 | f.close() 96 | multi_score_ex.add_artifact(save_path, name="scores.json") 97 | if tmp_path is not None: 98 | os.unlink(tmp_path) 99 | 100 | return {"scores": results, "exp_id": exp_id} 101 | 102 | 103 | def run_external(named_configs, post_named_configs, config_updates, adversary_path=None): 104 | """Run multiple multi_score experiments. Intended for use by external scripts, 105 | not accessible from commandline. 106 | 107 | :param named_configs: (list) list of named configs, executed one by one 108 | :param post_named_configs: (list) list of base named configs, applied after the 109 | current config from `named_configs`. 110 | :param config_updates: (dict) a dict of config options, overriding the named config. 111 | :param adversary_path: (str or None) path to JSON, needed by adversary_transfer config. 112 | :return (dict) mapping from named configs to their output directory 113 | """ 114 | # Sad workaround for Sacred config limitation, 115 | # see aprl.configs.multi.score:_get_adversary_paths 116 | os.environ["ADVERSARY_PATHS"] = adversary_path 117 | 118 | output_dir = {} 119 | for trial_configs in named_configs: 120 | configs = list(trial_configs) + list(post_named_configs) 121 | run = multi_score_ex.run(named_configs=configs, config_updates=config_updates) 122 | assert run.status == "COMPLETED" 123 | exp_id = run.result["exp_id"] 124 | output_dir[tuple(trial_configs)] = exp_id 125 | 126 | return output_dir 127 | 128 | 129 | def extract_data(path_generator, out_dir, experiment_dirs, ray_upload_dir): 130 | """Helper method to extract data from multiple_score experiments.""" 131 | for experiment, experiment_dir in experiment_dirs.items(): 132 | experiment_root = osp.join(ray_upload_dir, experiment_dir) 133 | # video_root contains one directory for each score_agent trial. 134 | # These directories have names of form score-__... 135 | for dir_entry in os.scandir(experiment_root): 136 | if not dir_entry.is_dir(): 137 | continue 138 | 139 | trial_name = dir_entry.name 140 | # Each trial contains the Sacred output from score_agent. 141 | # Note Ray Tune is running with a fresh working directory per trial, so Sacred 142 | # output will always be at score/1. 143 | trial_root = osp.join(experiment_root, trial_name) 144 | 145 | sacred_config = osp.join(trial_root, "data", "sacred", "score", "1", "config.json") 146 | with open(sacred_config, "r") as f: 147 | cfg = json.load(f) 148 | 149 | def agent_key(agent): 150 | return cfg[agent + "_type"], cfg[agent + "_path"] 151 | 152 | env_name = cfg["env_name"] 153 | victim_index = VICTIM_INDEX[env_name] 154 | if victim_index == 0: 155 | victim_type, victim_path = agent_key("agent_a") 156 | opponent_type, opponent_path = agent_key("agent_b") 157 | else: 158 | victim_type, victim_path = agent_key("agent_b") 159 | opponent_type, opponent_path = agent_key("agent_a") 160 | 161 | if "multicomp" in cfg["env_name"]: 162 | env_name = env_name_to_canonical(env_name) 163 | env_name = env_name.replace("/", "-") # sanitize 164 | 165 | src_path, new_name, suffix = path_generator( 166 | trial_root=trial_root, 167 | cfg=cfg, 168 | env_sanitized=env_name, 169 | victim_index=victim_index, 170 | victim_type=victim_type, 171 | victim_path=victim_path, 172 | opponent_type=opponent_type, 173 | opponent_path=opponent_path, 174 | ) 175 | dst_path = osp.join(out_dir, f"{new_name}.{suffix}") 176 | shutil.copy(src_path, dst_path) 177 | dst_config = osp.join(out_dir, f"{new_name}_sacred.json") 178 | shutil.copy(sacred_config, dst_config) 179 | 180 | 181 | def main(): 182 | observer = FileStorageObserver(osp.join("data", "sacred", "multi_score")) 183 | multi_score_ex.observers.append(observer) 184 | multi_score_ex.run_commandline() 185 | 186 | 187 | if __name__ == "__main__": 188 | main() 189 | -------------------------------------------------------------------------------- /src/aprl/multi/score_worker.py: -------------------------------------------------------------------------------- 1 | """Helper functions for training.py executed on worker nodes using Ray Tune. 2 | 3 | It's important these are all pickleable.""" 4 | 5 | import os.path as osp 6 | 7 | from sacred import observers 8 | 9 | from aprl.multi import common_worker 10 | 11 | 12 | def score_worker(base_config, tune_config, reporter): 13 | """Run a aprl.score experiment with specified config, logging to reporter. 14 | 15 | :param base_config: (dict) default config 16 | :param tune_config: (dict) overrides values in base_config 17 | :param reporter: (ray.tune.StatusReporter) Ray Tune internal logger.""" 18 | common_worker.fix_sacred_capture() 19 | 20 | # score_ex is not pickleable, so we cannot close on it. 21 | # Instead, import inside the function. 22 | from aprl.score_agent import score_ex 23 | 24 | config = dict(base_config) 25 | tune_config = common_worker.flatten_config(tune_config) 26 | common_worker.update(config, tune_config) 27 | 28 | # We're breaking the Sacred interface by running an experiment from within another experiment. 29 | # This is the best thing we can do, since we need to run the experiment with varying configs. 30 | # Just be careful: this could easily break things. 31 | observer = observers.FileStorageObserver(osp.join("data", "sacred", "score")) 32 | score_ex.observers.append(observer) 33 | run = score_ex.run(config_updates=config) 34 | index_keys = config.get("index_keys", []) 35 | 36 | idx = { 37 | k: v 38 | for k, v in config.items() 39 | if k.startswith("agent") or k == "env_name" or k in index_keys 40 | } 41 | 42 | reporter(done=True, score=run.result, idx=idx) 43 | -------------------------------------------------------------------------------- /src/aprl/multi/train.py: -------------------------------------------------------------------------------- 1 | """Hyperparameter search for train.py using Ray Tune.""" 2 | 3 | import logging 4 | import math 5 | import os.path as osp 6 | 7 | from ray import tune 8 | from sacred import Experiment 9 | from sacred.observers import FileStorageObserver 10 | 11 | from aprl.configs.multi.train import make_configs 12 | from aprl.multi import common, train_worker 13 | from aprl.train import train_ex 14 | 15 | multi_train_ex = Experiment("multi_train", ingredients=[train_ex]) 16 | pylog = logging.getLogger("aprl.multi.train") 17 | 18 | # Load common configs (e.g. upload directories) and define the run command 19 | run = common.make_sacred(multi_train_ex, "train_rl", train_worker.train_rl) 20 | 21 | # Load named configs for individual experiments (these change a lot, so keep out of this file) 22 | make_configs(multi_train_ex) 23 | 24 | 25 | @multi_train_ex.config 26 | def default_config(train): 27 | spec = { # experiment specification 28 | "run_kwargs": {"resources_per_trial": {"cpu": math.ceil(train["num_env"] / 2)}}, 29 | "config": {}, 30 | } 31 | 32 | _ = locals() # quieten flake8 unused variable warning 33 | del _ 34 | 35 | 36 | @multi_train_ex.named_config 37 | def debug_config(): 38 | spec = { 39 | "config": {"seed": tune.grid_search([0, 1])}, 40 | } 41 | exp_name = "debug" 42 | _ = locals() # quieten flake8 unused variable warning 43 | del _ 44 | 45 | 46 | @multi_train_ex.main 47 | def multi_train(train): 48 | return run(base_config=train) 49 | 50 | 51 | def main(): 52 | observer = FileStorageObserver(osp.join("data", "sacred", "multi_train")) 53 | multi_train_ex.observers.append(observer) 54 | multi_train_ex.run_commandline() 55 | 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /src/aprl/multi/train_worker.py: -------------------------------------------------------------------------------- 1 | """Helper functions for training.py executed on worker nodes using Ray Tune. 2 | 3 | It's important these are all pickleable.""" 4 | 5 | import os.path as osp 6 | 7 | from sacred import observers 8 | from stable_baselines import logger 9 | 10 | from aprl.multi import common_worker 11 | 12 | 13 | class ReporterOutputFormat(logger.KVWriter): 14 | """Key-value logging plugin for Stable Baselines that writes to a Ray Tune StatusReporter.""" 15 | 16 | def __init__(self, reporter): 17 | self.last_kvs = dict() 18 | self.reporter = reporter 19 | 20 | def writekvs(self, kvs): 21 | self.last_kvs = kvs 22 | self.reporter(**kvs) 23 | 24 | 25 | def train_rl(base_config, tune_config, reporter): 26 | """Run a aprl.train experiment with specified config, logging to reporter. 27 | 28 | :param base_config: (dict) default config 29 | :param tune_config: (dict) overrides values in base_config 30 | :param reporter: (ray.tune.StatusReporter) Ray Tune internal logger.""" 31 | common_worker.fix_sacred_capture() 32 | 33 | # train_ex is not pickleable, so we cannot close on it. 34 | # Instead, import inside the function. 35 | from aprl.train import train_ex 36 | 37 | config = dict(base_config) 38 | tune_config = common_worker.flatten_config(tune_config) 39 | common_worker.update(config, tune_config) 40 | tune_kv_str = "-".join([f"{k}={v}" for k, v in tune_config.items()]) 41 | config["exp_name"] = config["exp_name"] + "-" + tune_kv_str 42 | 43 | output_format = ReporterOutputFormat(reporter) 44 | config["log_output_formats"] = [output_format] 45 | 46 | # We're breaking the Sacred interface by running an experiment from within another experiment. 47 | # This is the best thing we can do, since we need to run the experiment with varying configs. 48 | # Just be careful: this could easily break things. 49 | observer = observers.FileStorageObserver(osp.join("data", "sacred", "train")) 50 | 51 | train_ex.observers.append(observer) 52 | train_ex.run(config_updates=config) 53 | reporter(done=True, **output_format.last_kvs) 54 | -------------------------------------------------------------------------------- /src/aprl/policies/__init__.py: -------------------------------------------------------------------------------- 1 | """RL policies, models and related functionality.""" 2 | -------------------------------------------------------------------------------- /src/aprl/policies/base.py: -------------------------------------------------------------------------------- 1 | """RL policies, models and adaptor classes.""" 2 | 3 | from typing import Optional, Type 4 | 5 | import gym 6 | import numpy as np 7 | from stable_baselines.common import BaseRLModel 8 | from stable_baselines.common.policies import BasePolicy 9 | import tensorflow as tf 10 | 11 | 12 | class PredictOnlyModel(BaseRLModel): 13 | """Abstract class for policies pretending to be RL algorithms (models). 14 | 15 | Provides stub implementations that raise NotImplementedError. 16 | The predict method is left as abstract and must be implemented in base class.""" 17 | 18 | def __init__( 19 | self, 20 | policy: Type[BasePolicy], 21 | sess: Optional[tf.Session], 22 | observation_space: gym.Space, 23 | action_space: gym.Space, 24 | ): 25 | """Constructs a DummyModel with given policy and session. 26 | :param policy: (BasePolicy) a loaded policy. 27 | :param sess: (tf.Session or None) a TensorFlow session. 28 | :return an instance of BaseRLModel. 29 | """ 30 | super().__init__(policy=policy, env=None, requires_vec_env=True, policy_base="Dummy") 31 | self.sess = sess 32 | self.observation_space = observation_space 33 | self.action_space = action_space 34 | 35 | def setup_model(self): 36 | raise NotImplementedError() 37 | 38 | def learn(self): 39 | raise NotImplementedError() 40 | 41 | def action_probability(self, observation, state=None, mask=None, actions=None): 42 | raise NotImplementedError() 43 | 44 | def save(self, save_path): 45 | raise NotImplementedError() 46 | 47 | def load(self): 48 | raise NotImplementedError() 49 | 50 | def _get_pretrain_placeholders(self): 51 | raise NotImplementedError() 52 | 53 | def get_parameter_list(self): 54 | raise NotImplementedError() 55 | 56 | 57 | class ModelWrapper(PredictOnlyModel): 58 | """Base class for wrapping RL algorithms (models).""" 59 | 60 | def __init__(self, model: BaseRLModel): 61 | super().__init__( 62 | policy=model.policy, 63 | sess=model.sess, 64 | observation_space=model.observation_space, 65 | action_space=model.action_space, 66 | ) 67 | self.model = model 68 | 69 | 70 | class PolicyToModel(PredictOnlyModel): 71 | """Converts BasePolicy to a BaseRLModel with only predict implemented.""" 72 | 73 | def __init__(self, policy_obj: BasePolicy): 74 | """Constructs a BaseRLModel using policy for predictions. 75 | :param policy: a loaded policy. 76 | :return an instance of BaseRLModel. 77 | """ 78 | super().__init__( 79 | policy=type(policy_obj), 80 | sess=policy_obj.sess, 81 | observation_space=policy_obj.ob_space, 82 | action_space=policy_obj.ac_space, 83 | ) 84 | self.policy_obj = policy_obj 85 | 86 | def _get_policy_out(self, observation, state, mask, transparent, deterministic=False): 87 | if state is None: 88 | state = self.policy_obj.initial_state 89 | if mask is None: 90 | mask = [False for _ in range(self.policy_obj.n_env)] 91 | 92 | step_fn = self.policy_obj.step_transparent if transparent else self.policy_obj.step 93 | return step_fn(observation, state, mask, deterministic=deterministic) 94 | 95 | def predict(self, observation, state=None, mask=None, deterministic=False): 96 | policy_out = self._get_policy_out( 97 | observation, state, mask, transparent=False, deterministic=deterministic 98 | ) 99 | actions, _val, states, _neglogp = policy_out 100 | return actions, states 101 | 102 | def predict_transparent(self, observation, state=None, mask=None, deterministic=False): 103 | """Returns same values as predict, as well as a dictionary with transparent data.""" 104 | policy_out = self._get_policy_out( 105 | observation, state, mask, transparent=True, deterministic=deterministic 106 | ) 107 | actions, _val, states, _neglogp, data = policy_out 108 | return actions, states, data 109 | 110 | 111 | class OpenAIToStablePolicy(BasePolicy): 112 | """Converts an OpenAI Baselines Policy to a Stable Baselines policy.""" 113 | 114 | def __init__(self, old_policy, ob_space: gym.Space, ac_space: gym.Space): 115 | super().__init__( 116 | sess=old_policy.sess, 117 | ob_space=ob_space, 118 | ac_space=ac_space, 119 | n_env=1, 120 | n_steps=1, 121 | n_batch=1, 122 | ) 123 | self.old = old_policy 124 | 125 | @property 126 | def initial_state(self): 127 | return self.old.initial_state 128 | 129 | def step(self, obs, state=None, mask=None, deterministic=False): 130 | stochastic = not deterministic 131 | return self.old.step(obs, S=state, M=mask, stochastic=stochastic) 132 | 133 | def proba_step(self, obs, state=None, mask=None): 134 | raise NotImplementedError() 135 | 136 | 137 | class ConstantPolicy(BasePolicy): 138 | """Policy that returns a constant action.""" 139 | 140 | def __init__(self, env, constant): 141 | assert env.action_space.contains(constant) 142 | super().__init__( 143 | sess=None, 144 | ob_space=env.observation_space, 145 | ac_space=env.action_space, 146 | n_env=env.num_envs, 147 | n_steps=1, 148 | n_batch=1, 149 | ) 150 | self.constant = constant 151 | 152 | def step(self, obs, state=None, mask=None, deterministic=False): 153 | actions = np.array([self.constant] * self.n_env) 154 | return actions, None, None, None 155 | 156 | def proba_step(self, obs, state=None, mask=None): 157 | return self.step(obs, state=state, mask=mask) 158 | 159 | 160 | class ZeroPolicy(ConstantPolicy): 161 | """Policy that returns a zero action.""" 162 | 163 | def __init__(self, env): 164 | super().__init__(env, np.zeros(env.action_space.shape)) 165 | 166 | 167 | class RandomPolicy(BasePolicy): 168 | def __init__(self, env): 169 | super().__init__( 170 | sess=None, 171 | ob_space=env.observation_space, 172 | ac_space=env.action_space, 173 | n_env=env.num_envs, 174 | n_steps=1, 175 | n_batch=1, 176 | ) 177 | 178 | def step(self, obs, state=None, mask=None, deterministic=False): 179 | actions = np.array([self.ac_space.sample() for _ in range(self.n_env)]) 180 | return actions, None, None, None 181 | 182 | def proba_step(self, obs, state=None, mask=None): 183 | raise NotImplementedError() 184 | -------------------------------------------------------------------------------- /src/aprl/policies/loader.py: -------------------------------------------------------------------------------- 1 | """Load serialized policies of different types.""" 2 | 3 | import logging 4 | import os 5 | import pickle 6 | import sys 7 | 8 | import stable_baselines 9 | from stable_baselines.common import vec_env 10 | import tensorflow as tf 11 | 12 | from aprl.envs.gym_compete import load_zoo_agent 13 | from aprl.envs.multi_agent import FakeSingleSpacesVec 14 | from aprl.policies.base import ( 15 | ModelWrapper, 16 | OpenAIToStablePolicy, 17 | PolicyToModel, 18 | RandomPolicy, 19 | ZeroPolicy, 20 | ) 21 | 22 | pylog = logging.getLogger("aprl.policy_loader") 23 | 24 | 25 | class NormalizeModel(ModelWrapper): 26 | def __init__( 27 | self, 28 | model: stable_baselines.common.base_class.BaseRLModel, 29 | vec_normalize: vec_env.VecNormalize, 30 | ): 31 | super().__init__(model=model) 32 | self.vec_normalize = vec_normalize 33 | 34 | def predict(self, observation, state=None, mask=None, deterministic=False): 35 | norm_obs = self.vec_normalize.normalize_obs(observation) 36 | return self.model.predict(norm_obs, state, mask, deterministic) 37 | 38 | def predict_transparent(self, observation, state=None, mask=None, deterministic=False): 39 | """Returns same values as predict, as well as a dictionary with transparent data.""" 40 | norm_obs = self.vec_normalize.normalize_obs(observation) 41 | return self.model.predict_transparent(norm_obs, state, mask, deterministic) 42 | 43 | 44 | def load_vec_normalize(root_dir: str, venv: vec_env.VecEnv) -> vec_env.VecNormalize: 45 | try: 46 | normalize_path = os.path.join(root_dir, "vec_normalize.pkl") 47 | vec_normalize = vec_env.VecNormalize.load(normalize_path, venv) 48 | vec_normalize.training = False 49 | pylog.info(f"Loaded normalization statistics from '{normalize_path}'") 50 | return vec_normalize 51 | except FileNotFoundError: 52 | pass 53 | 54 | # Could not find vec_normalize.pkl: try loading old-style vec normalize. 55 | vec_normalize = vec_env.VecNormalize(venv, training=False) 56 | vec_normalize.load_running_average(root_dir) 57 | pylog.info(f"Loaded normalization statistics from '{root_dir}'") 58 | return vec_normalize 59 | 60 | 61 | def load_stable_baselines(cls): 62 | def f(root_dir, env, env_name, index, transparent_params): 63 | denv = FakeSingleSpacesVec(env, agent_id=index) 64 | pylog.info(f"Loading Stable Baselines policy for '{cls}' from '{root_dir}'") 65 | model = load_backward_compatible_model(cls, root_dir, denv) 66 | 67 | try: 68 | vec_normalize = load_vec_normalize(root_dir, denv) 69 | model = NormalizeModel(model, vec_normalize) 70 | except FileNotFoundError: 71 | # No saved VecNormalize, must have not trained with normalization. 72 | pass 73 | 74 | return model 75 | 76 | return f 77 | 78 | 79 | def load_old_ppo2(root_dir, env, env_name, index, transparent_params): 80 | try: 81 | from baselines.ppo2 import ppo2 as ppo2_old 82 | except ImportError as e: 83 | msg = "{}. HINT: you need to install (OpenAI) Baselines to use old_ppo2".format(e) 84 | raise ImportError(msg) 85 | 86 | denv = FakeSingleSpacesVec(env, agent_id=index) 87 | possible_fnames = ["model.pkl", "final_model.pkl"] 88 | model_path = None 89 | for fname in possible_fnames: 90 | candidate_path = os.path.join(root_dir, fname) 91 | if os.path.exists(candidate_path): 92 | model_path = candidate_path 93 | if model_path is None: 94 | raise FileNotFoundError( 95 | f"Could not find model at '{root_dir}' " f"under any filename '{possible_fnames}'" 96 | ) 97 | 98 | graph = tf.Graph() 99 | sess = tf.Session(graph=graph) 100 | with sess.as_default(): 101 | with graph.as_default(): 102 | pylog.info(f"Loading Baselines PPO2 policy from '{model_path}'") 103 | policy = ppo2_old.learn( 104 | network="mlp", 105 | env=denv, 106 | total_timesteps=1, 107 | seed=0, 108 | nminibatches=4, 109 | log_interval=1, 110 | save_interval=1, 111 | load_path=model_path, 112 | ) 113 | stable_policy = OpenAIToStablePolicy( 114 | policy, ob_space=denv.observation_space, ac_space=denv.action_space 115 | ) 116 | model = PolicyToModel(stable_policy) 117 | 118 | try: 119 | normalize_path = os.path.join(root_dir, "normalize.pkl") 120 | with open(normalize_path, "rb") as f: 121 | old_vec_normalize = pickle.load(f) 122 | vec_normalize = vec_env.VecNormalize(denv, training=False) 123 | vec_normalize.obs_rms = old_vec_normalize.ob_rms 124 | vec_normalize.ret_rms = old_vec_normalize.ret_rms 125 | model = NormalizeModel(model, vec_normalize) 126 | pylog.info(f"Loaded normalization statistics from '{normalize_path}'") 127 | except FileNotFoundError: 128 | # We did not use VecNormalize during training, skip 129 | pass 130 | 131 | return model 132 | 133 | 134 | def load_zero(path, env, env_name, index, transparent_params): 135 | denv = FakeSingleSpacesVec(env, agent_id=index) 136 | policy = ZeroPolicy(denv) 137 | return PolicyToModel(policy) 138 | 139 | 140 | def load_random(path, env, env_name, index, transparent_params): 141 | denv = FakeSingleSpacesVec(env, agent_id=index) 142 | policy = RandomPolicy(denv) 143 | return PolicyToModel(policy) 144 | 145 | 146 | def mpi_unavailable_error(*args, **kwargs): 147 | raise ImportError("This algorithm requires MPI, which is not available.") 148 | 149 | 150 | # Lazy import for PPO1 and SAC, which have optional mpi dependency 151 | AGENT_LOADERS = { 152 | "zoo": load_zoo_agent, 153 | "ppo2": load_stable_baselines(stable_baselines.PPO2), 154 | "old_ppo2": load_old_ppo2, 155 | "zero": load_zero, 156 | "random": load_random, 157 | } 158 | 159 | try: 160 | # MPI algorithms -- only visible if mpi4py installed 161 | from stable_baselines import PPO1, SAC 162 | 163 | AGENT_LOADERS["ppo1"] = load_stable_baselines(PPO1) 164 | AGENT_LOADERS["sac"] = load_stable_baselines(SAC) 165 | except ImportError: 166 | AGENT_LOADERS["ppo1"] = mpi_unavailable_error 167 | AGENT_LOADERS["sac"] = mpi_unavailable_error 168 | 169 | 170 | def load_policy(policy_type, policy_path, env, env_name, index, transparent_params=None): 171 | agent_loader = AGENT_LOADERS.get(policy_type) 172 | if agent_loader is None: 173 | raise ValueError(f"Unrecognized agent type '{policy_type}'") 174 | return agent_loader(policy_path, env, env_name, index, transparent_params) 175 | 176 | 177 | def load_backward_compatible_model(cls, root_dir, denv=None, **kwargs): 178 | """Backwards compatibility hack to load old pickled policies 179 | which still expect modelfree.* to exist. 180 | """ 181 | import aprl.training.scheduling # noqa: F401 182 | 183 | mock_modules = { 184 | "modelfree": "aprl", 185 | "modelfree.scheduling": "aprl.training.scheduling", 186 | "modelfree.training.scheduling": "aprl.training.scheduling", 187 | } 188 | for old, new in mock_modules.items(): 189 | sys.modules[old] = sys.modules[new] 190 | if "env" in kwargs: 191 | denv = kwargs["env"] 192 | del kwargs["env"] 193 | model_path = os.path.join(root_dir, "model.pkl") 194 | model = cls.load(model_path, env=denv, **kwargs) 195 | for old in mock_modules: 196 | del sys.modules[old] 197 | return model 198 | -------------------------------------------------------------------------------- /src/aprl/policies/transparent.py: -------------------------------------------------------------------------------- 1 | """Policies """ 2 | 3 | from abc import ABC 4 | 5 | import numpy as np 6 | from stable_baselines.common.policies import FeedForwardPolicy, nature_cnn 7 | import tensorflow as tf 8 | 9 | from aprl.envs.wrappers import _filter_dict 10 | 11 | TRANSPARENCY_KEYS = set(["obs", "ff_policy", "ff_value", "hid"]) 12 | 13 | 14 | class TransparentPolicy(ABC): 15 | """Policy which returns its observations and/or activations in its call to self.predict 16 | 17 | :param transparent_params: (set) a subset of TRANSPARENCY_KEYS. 18 | If key is present, that data will be included in the transparency_dict 19 | returned in step_transparent. 20 | """ 21 | 22 | def __init__(self, transparent_params): 23 | if transparent_params is None: 24 | transparent_params = set() 25 | unexpected_keys = set(transparent_params).difference(TRANSPARENCY_KEYS) 26 | if unexpected_keys: 27 | raise KeyError(f"Unrecognized transparency keys: {unexpected_keys}") 28 | self.transparent_params = transparent_params 29 | 30 | def _get_default_transparency_dict(self, obs, ff, hid): 31 | """This structure is typical for subclasses of TransparentPolicy 32 | 33 | :param obs: ([float]) array of observations 34 | :param ff: (dict, [float]) dictionary of lists of feedforward activations. 35 | :param hid: ([float] or None) LSTM hidden state. 36 | """ 37 | # TODO: Do not consolidate -- have this happen later down the pipeline. 38 | def consolidate(acts): 39 | """Turn a list of activations into one array with shape (num_env,) + action_space""" 40 | return np.squeeze(np.concatenate(acts)) 41 | 42 | transparency_dict = { 43 | "obs": obs, 44 | "hid": hid, 45 | "ff_policy": consolidate(ff["policy"]), 46 | "ff_value": consolidate(ff["value"]), 47 | } 48 | transparency_dict = _filter_dict(transparency_dict, self.transparent_params) 49 | return transparency_dict 50 | 51 | 52 | class TransparentFeedForwardPolicy(TransparentPolicy, FeedForwardPolicy): 53 | """stable_baselines FeedForwardPolicy which is also transparent.""" 54 | 55 | def __init__( 56 | self, 57 | sess, 58 | ob_space, 59 | ac_space, 60 | n_env, 61 | n_steps, 62 | n_batch, 63 | transparent_params, 64 | reuse=False, 65 | layers=None, 66 | net_arch=None, 67 | act_fun=tf.tanh, 68 | cnn_extractor=nature_cnn, 69 | feature_extraction="cnn", 70 | **kwargs, 71 | ): 72 | FeedForwardPolicy.__init__( 73 | self, 74 | sess, 75 | ob_space, 76 | ac_space, 77 | n_env, 78 | n_steps, 79 | n_batch, 80 | reuse, 81 | layers, 82 | net_arch, 83 | act_fun, 84 | cnn_extractor, 85 | feature_extraction, 86 | **kwargs, 87 | ) 88 | TransparentPolicy.__init__(self, transparent_params) 89 | 90 | def step_transparent(self, obs, state=None, mask=None, deterministic=False): 91 | action_op = self.deterministic_action if deterministic else self.action 92 | outputs = [action_op, self.value_flat, self.neglogp, self.ff_out] 93 | 94 | action, value, neglogp, ff = self.sess.run(outputs, {self.obs_ph: obs}) 95 | return action, value, self.initial_state, neglogp, ff 96 | 97 | 98 | class TransparentMlpPolicy(TransparentFeedForwardPolicy): 99 | def __init__( 100 | self, 101 | sess, 102 | ob_space, 103 | ac_space, 104 | n_env, 105 | n_steps, 106 | n_batch, 107 | transparent_params, 108 | reuse=False, 109 | **_kwargs, 110 | ): 111 | super(TransparentMlpPolicy, self).__init__( 112 | sess, 113 | ob_space, 114 | ac_space, 115 | n_env, 116 | n_steps, 117 | n_batch, 118 | transparent_params, 119 | reuse, 120 | feature_extraction="mlp", 121 | **_kwargs, 122 | ) 123 | -------------------------------------------------------------------------------- /src/aprl/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/training/__init__.py -------------------------------------------------------------------------------- /src/aprl/training/embedded_agents.py: -------------------------------------------------------------------------------- 1 | """Wrappers to embed a fixed agent in an environment.""" 2 | 3 | from aprl.envs.multi_agent import VecMultiWrapper, _tuple_pop, _tuple_space_filter 4 | 5 | 6 | class CurryVecEnv(VecMultiWrapper): 7 | """Substitutes in a fixed agent for one of the players in a VecMultiEnv. 8 | 9 | The agent's session will be closed, if it exists, when the environment is closed.""" 10 | 11 | def __init__(self, venv, policy, agent_idx=0, deterministic=False): 12 | """Fixes one of the players in a VecMultiEnv. 13 | :param venv(VecMultiEnv): the environments. 14 | :param policy(Policy): the policy to use for the agent at agent_idx. 15 | :param agent_idx(int): the index of the agent that should be fixed. 16 | :return: a new VecMultiEnv with num_agents decremented. It behaves like env but 17 | with all actions at index agent_idx set to those returned by agent.""" 18 | super().__init__(venv) 19 | 20 | assert venv.num_agents >= 1 # allow currying the last agent 21 | self.num_agents = venv.num_agents - 1 22 | self.observation_space = _tuple_space_filter(self.observation_space, agent_idx) 23 | self.action_space = _tuple_space_filter(self.action_space, agent_idx) 24 | 25 | self._agent_to_fix = agent_idx 26 | self._policy = policy 27 | self._state = None 28 | self._obs = None 29 | self._dones = [False] * venv.num_envs 30 | self.deterministic = deterministic 31 | 32 | def step_async(self, actions): 33 | action, self._state = self._policy.predict( 34 | self._obs, state=self._state, mask=self._dones, deterministic=self.deterministic 35 | ) 36 | actions.insert(self._agent_to_fix, action) 37 | self.venv.step_async(actions) 38 | 39 | def step_wait(self): 40 | observations, rewards, self._dones, infos = self.venv.step_wait() 41 | observations, self._obs = _tuple_pop(observations, self._agent_to_fix) 42 | rewards, _ = _tuple_pop(rewards, self._agent_to_fix) 43 | return observations, rewards, self._dones, infos 44 | 45 | def reset(self): 46 | observations = self.venv.reset() 47 | observations, self._obs = _tuple_pop(observations, self._agent_to_fix) 48 | return observations 49 | 50 | def get_policy(self): 51 | return self._policy 52 | 53 | def get_curry_venv(self): 54 | """Helper method to locate self in a stack of nested VecEnvWrappers""" 55 | return self 56 | 57 | def set_curry_obs(self, obs, env_idx=None): 58 | """Setter for observation of embedded agent 59 | 60 | :param obs ([float]) a vectorized observation from either one or all environments 61 | :param env_idx (int,None) indices of observations to set. None means all. 62 | """ 63 | if env_idx is None: 64 | self._obs = obs 65 | else: 66 | self._obs[env_idx] = obs 67 | 68 | def get_curry_obs(self, env_idx=None): 69 | """Getter for observation of embedded agent 70 | 71 | :param env_idx (int,None) indices of observations to get. None means all. 72 | :return: ([float]) observations from specified environments 73 | """ 74 | if env_idx is None: 75 | return self._obs 76 | else: 77 | return self._obs[env_idx] 78 | 79 | def close(self): 80 | if hasattr(self._policy, "sess") and self._policy.sess is not None: 81 | self._policy.sess.close() 82 | super().close() 83 | 84 | 85 | class TransparentCurryVecEnv(CurryVecEnv): 86 | """CurryVecEnv that provides transparency data about its policy by updating infos dicts.""" 87 | 88 | def __init__(self, venv, policy, agent_idx=0, deterministic=False): 89 | """ 90 | :param venv (VecMultiEnv): the environments 91 | :param policy (BaseRLModel): model which wraps a BasePolicy object 92 | :param agent_idx (int): the index of the agent that should be fixed. 93 | :return: a new VecMultiEnv with num_agents decremented. It behaves like env but 94 | with all actions at index agent_idx set to those returned by agent.""" 95 | super().__init__(venv, policy, agent_idx, deterministic) 96 | if not hasattr(self._policy.policy, "step_transparent"): 97 | raise TypeError("Error: policy must be transparent") 98 | self._action = None 99 | 100 | def step_async(self, actions): 101 | policy_out = self._policy.predict_transparent( 102 | self._obs, state=self._state, mask=self._dones, deterministic=self.deterministic 103 | ) 104 | self._action, self._state, self._data = policy_out 105 | actions.insert(self._agent_to_fix, self._action) 106 | self.venv.step_async(actions) 107 | 108 | def step_wait(self): 109 | observations, rewards, self._dones, infos = self.venv.step_wait() 110 | observations, self._obs = _tuple_pop(observations, self._agent_to_fix) 111 | for env_idx in range(self.num_envs): 112 | env_data = {k: v[env_idx] for k, v in self._data.items()} 113 | infos[env_idx][self._agent_to_fix].update(env_data) 114 | return observations, rewards, self._dones, infos 115 | -------------------------------------------------------------------------------- /src/aprl/training/gail_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from stable_baselines.gail.dataset.dataset import ExpertDataset 3 | 4 | 5 | class ExpertDatasetFromOurFormat(ExpertDataset): 6 | """GAIL Expert Dataset. Loads in our format, rather than the GAIL default. 7 | 8 | In particular, GAIL expects a dict of flattened arrays, with episodes concatenated together. 9 | The episode start is delineated by an `episode_starts` array. See `ExpertDataset` base class 10 | for more information. 11 | 12 | By contrast, our format consists of a list of NumPy arrays, one for each episode.""" 13 | 14 | def __init__(self, expert_path, **kwargs): 15 | traj_data = np.load(expert_path, allow_pickle=True) 16 | 17 | # Add in episode starts 18 | episode_starts = [] 19 | for reward_dict in traj_data["rewards"]: 20 | ep_len = len(reward_dict) 21 | # used to index episodes since they are flattened in GAIL format. 22 | ep_starts = [True] + [False] * (ep_len - 1) 23 | episode_starts.append(np.array(ep_starts)) 24 | 25 | # Flatten arrays 26 | traj_data = {k: np.concatenate(v) for k, v in traj_data.items()} 27 | traj_data["episode_starts"] = np.concatenate(episode_starts) 28 | 29 | # Rename observations->obs 30 | traj_data["obs"] = traj_data["observations"] 31 | del traj_data["observations"] 32 | 33 | super().__init__(traj_data=traj_data, **kwargs) 34 | -------------------------------------------------------------------------------- /src/aprl/training/logger.py: -------------------------------------------------------------------------------- 1 | """Logging for RL algorithms. 2 | 3 | Configures Baseline's logger and TensorBoard appropriately.""" 4 | 5 | import os 6 | from os import path as osp 7 | 8 | from stable_baselines import logger 9 | from tensorboard.plugins.custom_scalar import layout_pb2 10 | import tensorboard.summary as summary_lib 11 | from tensorflow.core.util import event_pb2 12 | 13 | from aprl.common import utils 14 | 15 | 16 | def gen_multiline_charts(cfg): 17 | charts = [] 18 | for title, tags in cfg: 19 | charts.append( 20 | layout_pb2.Chart(title=title, multiline=layout_pb2.MultilineChartContent(tag=tags)) 21 | ) 22 | return charts 23 | 24 | 25 | def tb_layout(): 26 | episode_rewards = layout_pb2.Category( 27 | title="Episode Reward", 28 | chart=gen_multiline_charts( 29 | [ 30 | ("Shaped Reward", [r"shaping/eprewmean_true"]), 31 | ("Episode Length", [r"eplenmean"]), 32 | ("Sparse Reward", [r"shaping/epsparsemean"]), 33 | ("Dense Reward", [r"shaping/epdensemean"]), 34 | ("Dense Reward Annealing", [r"shaping/rew_anneal_c"]), 35 | ("Unshaped Reward", [r"ep_rewmean"]), 36 | ("Victim Action Noise", [r"shaping/victim_noise"]), 37 | ] 38 | ), 39 | ) 40 | 41 | game_outcome = layout_pb2.Category( 42 | title="Game Outcomes", 43 | chart=gen_multiline_charts( 44 | [ 45 | ("Agent 0 Win Proportion", [r"game_win0"]), 46 | ("Agent 1 Win Proportion", [r"game_win1"]), 47 | ("Tie Proportion", [r"game_tie"]), 48 | ("# of games", [r"game_total"]), 49 | ] 50 | ), 51 | ) 52 | 53 | training = layout_pb2.Category( 54 | title="Training", 55 | chart=gen_multiline_charts( 56 | [ 57 | ("Policy Loss", [r"policy_loss"]), 58 | ("Value Loss", [r"value_loss"]), 59 | ("Policy Entropy", [r"policy_entropy"]), 60 | ("Explained Variance", [r"explained_variance"]), 61 | ("Approx KL", [r"approxkl"]), 62 | ("Clip Fraction", [r"clipfrac"]), 63 | ] 64 | ), 65 | ) 66 | 67 | # Intentionally unused: 68 | # + serial_timesteps (just total_timesteps / num_envs) 69 | # + time_elapsed (TensorBoard already logs wall-clock time) 70 | # + nupdates (this is already logged as step) 71 | time = layout_pb2.Category( 72 | title="Time", 73 | chart=gen_multiline_charts([("Total Timesteps", [r"total_timesteps"]), ("FPS", [r"fps"])]), 74 | ) 75 | 76 | categories = [episode_rewards, game_outcome, training, time] 77 | return summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories)) 78 | 79 | 80 | def setup_logger(out_dir="results", exp_name="test", output_formats=None): 81 | timestamp = utils.make_timestamp() 82 | exp_name = exp_name.replace("/", "_") # environment names can contain /'s 83 | filename = "{}-{}".format(timestamp, exp_name)[0:255] # Linux has filename limit of 255 84 | out_dir = osp.join(out_dir, filename) 85 | os.makedirs(out_dir, exist_ok=True) 86 | 87 | logger.configure(folder=osp.join(out_dir, "rl"), format_strs=["tensorboard", "stdout"]) 88 | logger_instance = logger.Logger.CURRENT 89 | 90 | if output_formats is not None: 91 | logger_instance.output_formats += output_formats 92 | 93 | for fmt in logger_instance.output_formats: 94 | if isinstance(fmt, logger.TensorBoardOutputFormat): 95 | writer = fmt.writer 96 | layout = tb_layout() 97 | event = event_pb2.Event(summary=layout) 98 | writer.WriteEvent(event) 99 | writer.Flush() 100 | 101 | return out_dir, logger_instance 102 | -------------------------------------------------------------------------------- /src/aprl/training/shaping_wrappers.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | from itertools import islice 3 | 4 | from stable_baselines.common.vec_env import VecEnvWrapper 5 | 6 | from aprl.policies.wrappers import NoisyAgentWrapper 7 | from aprl.training.scheduling import ConditionalAnnealer, ConstantAnnealer, LinearAnnealer 8 | 9 | REW_TYPES = set(("sparse", "dense")) 10 | 11 | 12 | class RewardShapingVecWrapper(VecEnvWrapper): 13 | """ 14 | A more direct interface for shaping the reward of the attacking agent. 15 | - shaping_params schema: {'sparse': {k: v}, 'dense': {k: v}, **kwargs} 16 | """ 17 | 18 | def __init__(self, venv, agent_idx, shaping_params, reward_annealer=None): 19 | super().__init__(venv) 20 | assert shaping_params.keys() == REW_TYPES 21 | self.shaping_params = {} 22 | for rew_type, params in shaping_params.items(): 23 | for rew_term, weight in params.items(): 24 | self.shaping_params[rew_term] = (rew_type, weight) 25 | 26 | self.reward_annealer = reward_annealer 27 | self.agent_idx = agent_idx 28 | queue_keys = REW_TYPES.union(["length"]) 29 | self.ep_logs = {k: deque([], maxlen=10000) for k in queue_keys} 30 | self.ep_logs["total_episodes"] = 0 31 | self.ep_logs["last_callback_episode"] = 0 32 | self.step_rew_dict = { 33 | rew_type: [[] for _ in range(self.num_envs)] for rew_type in REW_TYPES 34 | } 35 | 36 | def log_callback(self, logger): 37 | """Logs various metrics. This is given as a callback to PPO2.learn()""" 38 | num_episodes = self.ep_logs["total_episodes"] - self.ep_logs["last_callback_episode"] 39 | if num_episodes == 0: 40 | return 41 | 42 | means = {} 43 | for rew_type in REW_TYPES: 44 | if len(self.ep_logs[rew_type]) < num_episodes: 45 | raise AssertionError(f"Data missing in ep_logs for {rew_type}") 46 | rews = islice(self.ep_logs[rew_type], num_episodes) 47 | means[rew_type] = sum(rews) / num_episodes 48 | logger.logkv(f"shaping/ep{rew_type}mean", means[rew_type]) 49 | 50 | overall_mean = _anneal(means, self.reward_annealer) 51 | logger.logkv("shaping/eprewmean_true", overall_mean) 52 | c = self.reward_annealer() 53 | logger.logkv("shaping/rew_anneal_c", c) 54 | self.ep_logs["last_callback_episode"] = self.ep_logs["total_episodes"] 55 | 56 | def get_logs(self): 57 | """Interface to access self.ep_logs which contains data about episodes""" 58 | if self.ep_logs["total_episodes"] == 0: 59 | return None 60 | # keys: 'dense', 'sparse', 'length', 'total_episodes', 'last_callback_episode' 61 | return self.ep_logs 62 | 63 | def reset(self): 64 | return self.venv.reset() 65 | 66 | def step_wait(self): 67 | obs, rew, done, infos = self.venv.step_wait() 68 | for env_num in range(self.num_envs): 69 | # Compute shaped_reward for each rew_type 70 | shaped_reward = {k: 0 for k in REW_TYPES} 71 | for rew_term, rew_value in infos[env_num][self.agent_idx].items(): 72 | if rew_term not in self.shaping_params: 73 | continue 74 | rew_type, weight = self.shaping_params[rew_term] 75 | shaped_reward[rew_type] += weight * rew_value 76 | 77 | # Compute total shaped reward, optionally annealing 78 | rew[env_num] = _anneal(shaped_reward, self.reward_annealer) 79 | 80 | # Log the results of an episode into buffers and then pass on the shaped reward 81 | for rew_type, val in shaped_reward.items(): 82 | self.step_rew_dict[rew_type][env_num].append(val) 83 | 84 | if done[env_num]: 85 | ep_length = max(len(self.step_rew_dict[k]) for k in self.step_rew_dict.keys()) 86 | self.ep_logs["length"].appendleft(ep_length) # pytype:disable=attribute-error 87 | for rew_type in REW_TYPES: 88 | rew_type_total = sum(self.step_rew_dict[rew_type][env_num]) 89 | rew_type_logs = self.ep_logs[rew_type] 90 | rew_type_logs.appendleft(rew_type_total) # pytype:disable=attribute-error 91 | self.step_rew_dict[rew_type][env_num] = [] 92 | self.ep_logs["total_episodes"] += 1 93 | return obs, rew, done, infos 94 | 95 | 96 | def apply_reward_wrapper(single_env, shaping_params, agent_idx, scheduler): 97 | if "metric" in shaping_params: 98 | rew_shape_annealer = ConditionalAnnealer.from_dict(shaping_params, get_logs=None) 99 | scheduler.set_conditional("rew_shape") 100 | else: 101 | anneal_frac = shaping_params.get("anneal_frac") 102 | if anneal_frac is not None: 103 | rew_shape_annealer = LinearAnnealer(1, 0, anneal_frac) 104 | else: 105 | # In this case, we weight the reward terms as per shaping_params 106 | # but the ratio of sparse to dense reward remains constant. 107 | rew_shape_annealer = ConstantAnnealer(0.5) 108 | 109 | scheduler.set_annealer("rew_shape", rew_shape_annealer) 110 | return RewardShapingVecWrapper( 111 | single_env, 112 | agent_idx=agent_idx, 113 | shaping_params=shaping_params["weights"], 114 | reward_annealer=scheduler.get_annealer("rew_shape"), 115 | ) 116 | 117 | 118 | def apply_embedded_agent_wrapper(embedded, noise_params, scheduler): 119 | if "metric" in noise_params: 120 | noise_annealer = ConditionalAnnealer.from_dict(noise_params, get_logs=None) 121 | scheduler.set_conditional("noise") 122 | else: 123 | noise_anneal_frac = noise_params.get("anneal_frac", 0) 124 | noise_param = noise_params.get("param", 0) 125 | 126 | if noise_anneal_frac <= 0: 127 | msg = "victim_noise_params.anneal_frac must be >0 if using a NoisyAgentWrapper." 128 | raise ValueError(msg) 129 | noise_annealer = LinearAnnealer(noise_param, 0, noise_anneal_frac) 130 | scheduler.set_annealer("noise", noise_annealer) 131 | return NoisyAgentWrapper(embedded, noise_annealer=scheduler.get_annealer("noise")) 132 | 133 | 134 | def _anneal(reward_dict, reward_annealer): 135 | c = reward_annealer() 136 | assert 0 <= c <= 1 137 | sparse_weight = 1 - c 138 | dense_weight = c 139 | return reward_dict["sparse"] * sparse_weight + reward_dict["dense"] * dense_weight 140 | -------------------------------------------------------------------------------- /src/aprl/visualize/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/src/aprl/visualize/__init__.py -------------------------------------------------------------------------------- /src/aprl/visualize/benchmark_ffmpeg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | for resolution in 1920x1080 1280x720 854x480; do 4 | for threads in 1 2 4 6 8 12; do 5 | echo "*** RESOLUTION ${resolution} with THREADS ${threads}" 6 | time (ffmpeg -y -i $1 -s ${resolution} -c:v libx264 -preset slow -crf 28 -threads ${threads} /tmp/ffmpeg_benchmark.mp4 >/dev/null 2>&1) 7 | done 8 | done 9 | -------------------------------------------------------------------------------- /src/aprl/visualize/compress_videos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [[ $# -ne 2 ]]; then 4 | echo "usage: $0 " 5 | exit -1 6 | fi 7 | 8 | IN_DIR=$1 9 | OUT_DIR=$2 10 | 11 | fnames="" 12 | for in_path in ${IN_DIR}/*.mp4; do 13 | fnames="${fnames} $(basename -s .mp4 ${in_path})" 14 | done 15 | 16 | FFMPEG_COMMAND="ffmpeg -i ${IN_DIR}/{prefix}.mp4 -c:v libx264 -preset slow -crf 28" 17 | 18 | # These were tuned for my machine. See benchmark_ffmpeg.sh to choose reasonable values. 19 | # Generally there are diminishing returns to using more threads per video. 20 | # Since we have a large number of videos, favor large job count and small thread count. 21 | parallel --header : -j 50% ${FFMPEG_COMMAND} -threads 2 \ 22 | ${OUT_DIR}/{prefix}_1080p.mp4 ::: prefix ${fnames} 23 | parallel --header : -j 100% ${FFMPEG_COMMAND} -threads 1 -s 1280x720 \ 24 | ${OUT_DIR}/{prefix}_720p.mp4 ::: prefix ${fnames} 25 | parallel --header : -j 100% ${FFMPEG_COMMAND} -threads 1 -s 854x480 \ 26 | ${OUT_DIR}/{prefix}_480p.mp4 ::: prefix ${fnames} 27 | -------------------------------------------------------------------------------- /src/aprl/visualize/generate_website_json.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import functools 3 | import json 4 | import logging 5 | import os 6 | import re 7 | import sys 8 | from typing import Any, Iterable, Sequence, Tuple 9 | 10 | import boto3 11 | 12 | from aprl.visualize import util 13 | 14 | logger = logging.getLogger("aprl.visualize.generate_website_json") 15 | 16 | ENV_NAME_LOOKUP = { 17 | "KickAndDefend-v0": "Kick and Defend", 18 | "SumoHumans-v0": "Sumo Humans", 19 | "YouShallNotPassHumans-v0": "You Shall Not Pass", 20 | "SumoAnts-v0": "Sumo Ants", 21 | } 22 | BUCKET_NAME = "adversarial-policies-public" 23 | PREFIX = "videos" 24 | 25 | EXCLUDE_ABBREV = [r"ZooM[SD].*"] 26 | 27 | 28 | class NestedDict(OrderedDict): 29 | """Implementation of perl's autovivification feature.""" 30 | 31 | def __getitem__(self, item): 32 | try: 33 | return super().__getitem__(item) # pytype:disable=unsupported-operands 34 | except KeyError: 35 | value = self[item] = type(self)() 36 | return value 37 | 38 | 39 | def get_s3_files() -> Iterable[str]: 40 | s3 = boto3.resource("s3") 41 | adv_policies_bucket = s3.Bucket(BUCKET_NAME) 42 | objs = adv_policies_bucket.objects.filter(Prefix=PREFIX).all() 43 | return [os.path.basename(o.key) for o in objs] 44 | 45 | 46 | def is_excluded(abbrev: str) -> bool: 47 | for exclude in EXCLUDE_ABBREV: 48 | if re.match(exclude, abbrev): 49 | return True 50 | return False 51 | 52 | 53 | def get_videos(video_files: Iterable[str]) -> NestedDict: 54 | video_files = [path for path in video_files if path.endswith(".mp4")] 55 | stem_pattern = re.compile(r"(.*)_[0-9]+p.mp4") 56 | agent_pattern = re.compile(r"(\w*-v\d)_victim_(.*)_opponent_(.*)") 57 | 58 | nested = NestedDict() 59 | for path in video_files: 60 | stem_match = stem_pattern.match(path) 61 | if stem_match is None: 62 | logger.info(f"Skipping path '{path}: malformed filename, cannot extract stem.") 63 | continue 64 | 65 | stem = stem_match.groups()[0] 66 | assert isinstance(stem, str) 67 | 68 | agent_match = agent_pattern.match(stem) 69 | if agent_match is None: 70 | logger.info(f"Skipping path '{path}: malformed filename, cannot extract agent.") 71 | continue 72 | 73 | env_name, victim_abbrev, opponent_abbrev = agent_match.groups() 74 | if is_excluded(victim_abbrev) or is_excluded(opponent_abbrev): 75 | logger.info(f"Skipping path '{path}': explicitly excluded.") 76 | continue 77 | 78 | env_name = ENV_NAME_LOOKUP.get(env_name) 79 | victim = f"{util.friendly_agent_label(victim_abbrev)} ({victim_abbrev})" 80 | opponent = f"{util.friendly_agent_label(opponent_abbrev)} ({opponent_abbrev})" 81 | nested[env_name][opponent][victim] = stem # pytype:disable=unsupported-operands 82 | 83 | return nested 84 | 85 | 86 | def sort_fn(item: Tuple[str, Any], groups: Sequence[str]) -> str: 87 | """Prepends index of key in groups: can sort in order of groups with alphabetical sort. 88 | 89 | :param item: key-value pair. 90 | :param groups: sequence of regexps.""" 91 | k, v = item 92 | match = re.match(r".* \((.*)\)", k) 93 | assert match 94 | abbrev = match.groups()[0] 95 | for i, grp in enumerate(groups): 96 | if re.match(grp, abbrev): 97 | break 98 | return f"{i}{abbrev}" 99 | 100 | 101 | def sort_nested(nested: NestedDict) -> NestedDict: 102 | new_nested = NestedDict() 103 | victim_sort = functools.partial(sort_fn, groups=util.GROUPS["rows"]) 104 | opponent_sort = functools.partial(sort_fn, groups=util.GROUPS["cols"]) 105 | 106 | for env, d1 in nested.items(): 107 | new_d1 = {} 108 | for opponent, d2 in d1.items(): 109 | new_d1[opponent] = OrderedDict(sorted(d2.items(), key=victim_sort)) 110 | new_nested[env] = OrderedDict(sorted(new_d1.items(), key=opponent_sort)) 111 | return new_nested 112 | 113 | 114 | def main(): 115 | logging.basicConfig(level=logging.INFO) 116 | paths = get_s3_files() 117 | nested = get_videos(paths) 118 | nested = sort_nested(nested) 119 | 120 | out_path = "file_list.json" 121 | if len(sys.argv) > 1: 122 | out_path = sys.argv[1] 123 | 124 | print(nested) 125 | with open(out_path, "w") as fp: 126 | json.dump(nested, fp, indent=4) 127 | logger.info(f"Saved files to '{out_path}'.") 128 | 129 | 130 | if __name__ == "__main__": 131 | main() 132 | -------------------------------------------------------------------------------- /src/aprl/visualize/make_videos.py: -------------------------------------------------------------------------------- 1 | """Generate videos for adversaries and standard baselines.""" 2 | 3 | import logging 4 | import os 5 | import os.path as osp 6 | 7 | from sacred import Experiment 8 | from sacred.observers import FileStorageObserver 9 | 10 | from aprl.common.utils import make_timestamp 11 | from aprl.configs import DATA_LOCATION 12 | from aprl.multi.score import extract_data, run_external 13 | from aprl.visualize import util 14 | 15 | make_videos_ex = Experiment("make_videos") 16 | make_videos_logger = logging.getLogger("make_videos") 17 | 18 | 19 | @make_videos_ex.config 20 | def default_config(): 21 | adversary_path = osp.join( 22 | DATA_LOCATION, "multi_train", "paper", "highest_win_policies_and_rates.json" 23 | ) 24 | ray_upload_dir = "data" # where Ray will upload multi.score outputs. 'data' works on baremetal 25 | score_configs = [("normal",), ("normal", "mask_observations_of_victim")] 26 | multi_score = {} 27 | root_dir = "data/videos" 28 | exp_name = "default" 29 | _ = locals() # quieten flake8 unused variable warning 30 | del _ 31 | 32 | 33 | @make_videos_ex.named_config 34 | def defense_config(): 35 | score_configs = [("defenses",), ("defenses", "mask_observations_of_victim")] 36 | exp_name = "defense" 37 | _ = locals() # quieten flake8 unused variable warning 38 | del _ 39 | 40 | 41 | @make_videos_ex.named_config 42 | def slides_config(): 43 | """Generate a subset of videos, with tighter-cropped camera. 44 | Intended for slideshows/demos.""" 45 | score_configs = [("summary",), ("summary", "mask_observations_of_victim")] 46 | multi_score = { 47 | "score": { 48 | "video_params": {"annotation_params": {"camera_config": "close", "short_labels": True}} 49 | } 50 | } 51 | exp_name = "slides" 52 | _ = locals() # quieten flake8 unused variable warning 53 | del _ 54 | 55 | 56 | LOW_RES = { 57 | "score": {"video_params": {"annotation_params": {"resolution": (640, 480), "font_size": 24}}} 58 | } 59 | 60 | 61 | @make_videos_ex.named_config 62 | def low_res(): 63 | multi_score = LOW_RES # noqa: F841 64 | 65 | 66 | @make_videos_ex.named_config 67 | def debug_config(): 68 | score_configs = [ 69 | ("debug_one_each_type",), 70 | ("debug_one_each_type", "mask_observations_of_victim"), 71 | ] 72 | multi_score = dict(LOW_RES) 73 | multi_score["score"]["episodes"] = 2 74 | exp_name = "debug" 75 | _ = locals() # quieten flake8 unused variable warning 76 | del _ 77 | 78 | 79 | @make_videos_ex.capture 80 | def generate_videos(score_configs, multi_score, adversary_path): 81 | """Uses multi.score to generate videos.""" 82 | return run_external( 83 | score_configs, 84 | post_named_configs=["video"], 85 | config_updates=multi_score, 86 | adversary_path=adversary_path, 87 | ) 88 | 89 | 90 | @make_videos_ex.capture 91 | def extract_videos(out_dir, video_dirs, ray_upload_dir): 92 | def path_generator( 93 | trial_root, 94 | env_sanitized, 95 | victim_index, 96 | victim_type, 97 | victim_path, 98 | opponent_type, 99 | opponent_path, 100 | cfg, 101 | ): 102 | src_path = osp.join( 103 | trial_root, "data", "sacred", "score", "1", "videos", "env_0_episode_0_recording.mp4" 104 | ) 105 | 106 | victim_suffix = "" 107 | opponent_suffix = "" 108 | mask_index = cfg["mask_agent_index"] 109 | if mask_index is not None: 110 | if mask_index == victim_index: 111 | victim_suffix = "M" 112 | else: 113 | opponent_suffix == "M" 114 | 115 | victim = util.abbreviate_agent_config( 116 | cfg["env_name"], victim_type, victim_path, victim_suffix, victim=True 117 | ) 118 | opponent = util.abbreviate_agent_config( 119 | cfg["env_name"], opponent_type, opponent_path, opponent_suffix, victim=False 120 | ) 121 | 122 | new_name = f"{env_sanitized}_victim_{victim}_opponent_{opponent}" 123 | return src_path, new_name, "mp4" 124 | 125 | return extract_data(path_generator, out_dir, video_dirs, ray_upload_dir) 126 | 127 | 128 | @make_videos_ex.main 129 | def make_videos(root_dir, exp_name): 130 | out_dir = osp.join(root_dir, exp_name, make_timestamp()) 131 | os.makedirs(out_dir) 132 | 133 | video_dirs = generate_videos() 134 | extract_videos(out_dir=out_dir, video_dirs=video_dirs) 135 | 136 | 137 | def main(): 138 | observer = FileStorageObserver(osp.join("data", "sacred", "make_videos")) 139 | make_videos_ex.observers.append(observer) 140 | make_videos_ex.run_commandline() 141 | make_videos_logger.info("Sacred run completed, files stored at {}".format(observer.dir)) 142 | 143 | 144 | if __name__ == "__main__": 145 | main() 146 | -------------------------------------------------------------------------------- /src/aprl/visualize/noisy_observations.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import os 4 | import os.path as osp 5 | 6 | from matplotlib import pyplot as plt 7 | import pandas as pd 8 | from sacred import Experiment 9 | from sacred.observers import FileStorageObserver 10 | import seaborn as sns 11 | 12 | from aprl.envs.gym_compete import NUM_ZOO_POLICIES 13 | from aprl.visualize import styles 14 | 15 | plot_noisy_obs_exp = Experiment("plot_noisy_observations") 16 | 17 | 18 | @plot_noisy_obs_exp.config 19 | def base_config(): 20 | root_dir = "data/aws/score_agents/victim_masked_noise/" 21 | out_dir = "data/aws/score_agents/masked_obs_visualization/" 22 | input_run = "ep_500_5-22_all_zoo" 23 | # Runs known to work: ["ep_500_5-22_single_zoo", "ep_100_5-21", "ep_500_5-22_all_zoo"] 24 | _ = locals() # quieten flake8 unused variable warning 25 | del _ 26 | 27 | 28 | def transform(df, transform_list): 29 | new_df = df.copy() 30 | for trans_dict in transform_list: 31 | new_df[trans_dict["new_col"]] = new_df[trans_dict["old_col"]].apply(trans_dict["func"]) 32 | return new_df 33 | 34 | 35 | def subset(df, spec): 36 | ret = df.copy() 37 | for constraint, constraint_value in spec.items(): 38 | ret = ret[ret[constraint] == constraint_value] 39 | return ret 40 | 41 | 42 | def process_element_into_flat_dict(el, key_order): 43 | outp = {} 44 | for i, k in enumerate(key_order): 45 | outp[k] = el["k"][i] 46 | outp["agent0_wins"] = el["v"]["win0"] 47 | outp["agent1_wins"] = el["v"]["win1"] 48 | outp["ties"] = el["v"]["ties"] 49 | return outp 50 | 51 | 52 | def noisy_adversary_opponent_subset_plot( 53 | original_df, subset_specs, transform_specs, logistic=True, plot_line=True, savefile=None 54 | ): 55 | subset_df = subset(original_df, subset_specs) 56 | if len(subset_df) == 0: 57 | return 58 | transformed_df = transform(subset_df, transform_specs) 59 | plt.figure(figsize=(10, 7)) 60 | if plot_line: 61 | sns.lmplot(data=transformed_df, x="log_noise", y="agent0_win_perc", logistic=logistic) 62 | else: 63 | sns.scatterplot(data=transformed_df, x="log_noise", y="agent0_win_perc") 64 | plt.title( 65 | "{}: Noisy Zoo{} Observations vs Adversary".format( 66 | subset_specs["env"], subset_specs["agent0_path"] 67 | ) 68 | ) 69 | if savefile is not None: 70 | plt.savefig(savefile) 71 | else: 72 | plt.show() 73 | plt.close() 74 | 75 | 76 | def noisy_multiple_opponent_subset_plot( 77 | original_df, subset_specs, transform_specs, logistic=True, savefile=None 78 | ): 79 | subset_df = subset(original_df, subset_specs) 80 | if len(subset_df) == 0: 81 | return 82 | transformed_df = transform(subset_df, transform_specs) 83 | plt.figure(figsize=(10, 7)) 84 | sns.lmplot( 85 | data=transformed_df, 86 | x="log_noise", 87 | y="agent0_win_perc", 88 | hue="agent1_path", 89 | logistic=logistic, 90 | ) 91 | plt.title( 92 | "{}: Noisy Zoo{} Observations vs Normal Zoos".format( 93 | subset_specs["env"], subset_specs["agent0_path"] 94 | ) 95 | ) 96 | if savefile is not None: 97 | plt.savefig(savefile) 98 | else: 99 | plt.show() 100 | plt.close() 101 | 102 | 103 | @plot_noisy_obs_exp.main 104 | def generate_plots(input_run, root_dir, out_dir): 105 | num_episodes = int(input_run.split("_")[1]) 106 | baseline_transformations = [ 107 | {"new_col": "log_noise", "old_col": "noise_magnitude", "func": lambda x: math.log(x)}, 108 | { 109 | "new_col": "agent0_win_perc", 110 | "old_col": "agent0_wins", 111 | "func": lambda x: x / num_episodes, 112 | }, 113 | { 114 | "new_col": "agent1_win_perc", 115 | "old_col": "agent1_wins", 116 | "func": lambda x: x / num_episodes, 117 | }, 118 | ] 119 | 120 | zoo_path = os.path.join(root_dir, input_run, "noisy_zoo_observations.json") 121 | adversary_path = os.path.join(root_dir, input_run, "noisy_adversary_observations.json") 122 | with open(adversary_path, "r") as fp: 123 | noisy_obs_against_adv = json.load(fp) 124 | 125 | DATAFRAME_KEYS = [ 126 | "env", 127 | "agent0_type", 128 | "agent0_path", 129 | "agent1_type", 130 | "agent1_path", 131 | "masking_param", 132 | "noise_magnitude", 133 | ] 134 | 135 | with open(zoo_path, "r") as fp: 136 | noisy_obs_against_zoo = json.load(fp) 137 | noisy_zoo_obs_df = pd.DataFrame( 138 | [ 139 | process_element_into_flat_dict(el, key_order=DATAFRAME_KEYS) 140 | for el in noisy_obs_against_zoo 141 | ] 142 | ) 143 | noisy_adv_obs_df = pd.DataFrame( 144 | [ 145 | process_element_into_flat_dict(el, key_order=DATAFRAME_KEYS) 146 | for el in noisy_obs_against_adv 147 | ] 148 | ) 149 | experiment_out_dir = os.path.join(out_dir, input_run) 150 | 151 | if not os.path.exists(experiment_out_dir): 152 | os.mkdir(experiment_out_dir) 153 | 154 | for env_name, pretty_env in styles.PRETTY_ENV.items(): 155 | short_env = pretty_env.replace(" ", "") 156 | if env_name == "multicomp/YouShallNotPassHumans-v0": 157 | # skip for now as has different victim index, need to fix plotting code 158 | continue 159 | 160 | for zoo_id in range(1, NUM_ZOO_POLICIES[short_env] + 1): 161 | subset_params = {"agent0_path": str(zoo_id), "env": env_name} 162 | 163 | zoo_plot_path = os.path.join( 164 | experiment_out_dir, f"{input_run}_ZooBaseline_" f"{short_env}_AgainstZoo{zoo_id}" 165 | ) 166 | 167 | adversary_plot_path = os.path.join( 168 | experiment_out_dir, 169 | f"{input_run}_AdversaryTrained_" f"{short_env}_AgainstZoo{zoo_id}", 170 | ) 171 | noisy_multiple_opponent_subset_plot( 172 | noisy_zoo_obs_df, 173 | subset_specs=subset_params, 174 | transform_specs=baseline_transformations, 175 | savefile=zoo_plot_path, 176 | ) 177 | noisy_adversary_opponent_subset_plot( 178 | noisy_adv_obs_df, 179 | subset_specs=subset_params, 180 | transform_specs=baseline_transformations, 181 | savefile=os.path.join(adversary_plot_path), 182 | ) 183 | 184 | 185 | def main(): 186 | observer = FileStorageObserver(osp.join("data", "sacred", "plot_noisy_observations")) 187 | plot_noisy_obs_exp.observers.append(observer) 188 | plot_noisy_obs_exp.run_commandline() 189 | 190 | 191 | if __name__ == "__main__": 192 | main() 193 | -------------------------------------------------------------------------------- /src/aprl/visualize/scores.py: -------------------------------------------------------------------------------- 1 | from distutils.dir_util import copy_tree 2 | import logging 3 | import os.path 4 | 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | from sacred import Experiment 8 | from sacred.observers import FileStorageObserver 9 | 10 | from aprl.visualize import styles as vis_styles 11 | from aprl.visualize import util 12 | 13 | logger = logging.getLogger("aprl.visualize.scores") 14 | visualize_score_ex = Experiment("visualize_score") 15 | 16 | 17 | @visualize_score_ex.capture 18 | def heatmap_opponent(single_env, cmap, row_starts, row_ends, col_ends): 19 | xlabel = single_env.name in col_ends 20 | ylabel = single_env.name in row_starts 21 | cbar = single_env.name in row_ends 22 | return util.heatmap_one_col( 23 | single_env, col="Opponent Win", cmap=cmap, xlabel=xlabel, ylabel=ylabel, cbar=cbar 24 | ) 25 | 26 | 27 | def _make_old_paths(timestamped_path, **kwargs): 28 | """Paths in traditional format, before refactoring multi.score. 29 | 30 | Specifically, expects a timestamped directory containing `adversary_transfer.json`. 31 | In the same directory as the timestamped directory, there should be `fixed_baseline.json` and 32 | `zoo_baseline.json`. 33 | """ 34 | score_dir = os.path.dirname(timestamped_path) 35 | paths = [ 36 | os.path.join(timestamped_path, "adversary_transfer.json"), 37 | os.path.join(score_dir, "fixed_baseline.json"), 38 | os.path.join(score_dir, "zoo_baseline.json"), 39 | ] 40 | return [dict(path=path, **kwargs) for path in paths] 41 | 42 | 43 | SMALL_SCORE_PATHS = _make_old_paths( 44 | os.path.join("normal", "2019-05-05T18:12:24+00:00") 45 | ) + _make_old_paths( 46 | os.path.join("victim_masked_init", "2019-05-05T18:12:24+00:00"), victim_suffix="M" 47 | ) 48 | DEFENSE_SCORE_PATHS = [ 49 | {"path": os.path.join("defenses", "normal.json")}, 50 | {"path": os.path.join("defenses", "victim_masked_init.json"), "victim_suffix": "M"}, 51 | ] 52 | 53 | 54 | @visualize_score_ex.config 55 | def default_config(): 56 | score_root = os.path.join("data", "aws", "score_agents") 57 | score_paths = _make_old_paths(os.path.join("normal", "2019-05-05T18:12:24+00:00")) 58 | 59 | command = util.heatmap_full 60 | styles = ["paper", "a4"] 61 | palette = "Blues" 62 | publication = False 63 | 64 | fig_dir = os.path.join("data", "figs", "scores") 65 | 66 | seed = 0 # we don't use it for anything, but stop config changing each time as we version it 67 | 68 | _ = locals() # quieten flake8 unused variable warning 69 | del _ 70 | 71 | 72 | @visualize_score_ex.named_config 73 | def full_masked_config(): 74 | score_paths = ( # noqa: F841 75 | _make_old_paths( 76 | os.path.join("normal", "2019-05-05T18:12:24+00:00"), 77 | victim_suffix="N", 78 | opponent_suffix="N", 79 | ) 80 | + _make_old_paths( 81 | os.path.join("victim_masked_init", "2019-05-05T18:12:24+00:00"), 82 | victim_suffix="BI", 83 | opponent_suffix="N", 84 | ) 85 | + _make_old_paths( 86 | os.path.join("victim_masked_zero", "2019-05-05T18:12:24+00:00"), 87 | victim_suffix="BZ", 88 | opponent_suffix="N", 89 | ) 90 | + [ 91 | { 92 | "path": os.path.join( 93 | "adversary_masked_init", "2019-05-05T18:12:24+00:00", "adversary_transfer.json" 94 | ), 95 | "victim_suffix": "N", 96 | "opponent_suffix": "BI", 97 | } 98 | ] 99 | ) 100 | 101 | 102 | @visualize_score_ex.named_config 103 | def paper_config(): 104 | score_paths = SMALL_SCORE_PATHS 105 | 106 | styles = ["paper", "scores", "scores_twocol"] 107 | row_starts = ["multicomp/KickAndDefend-v0", "multicomp/SumoHumansAutoContact-v0"] 108 | row_ends = ["multicomp/YouShallNotPassHumans-v0", "multicomp/SumoAntsAutoContact-v0"] 109 | col_ends = ["multicomp/SumoHumansAutoContact-v0", "multicomp/SumoAntsAutoContact-v0"] 110 | command = heatmap_opponent 111 | publication = True 112 | 113 | fig_dir = os.path.expanduser("~/dev/adversarial-policies-paper/figs/scores_single") 114 | 115 | _ = locals() # quieten flake8 unused variable warning 116 | del _ 117 | 118 | 119 | @visualize_score_ex.named_config 120 | def supplementary_config(): 121 | score_paths = SMALL_SCORE_PATHS 122 | 123 | styles = ["paper", "scores", "scores_monolithic"] 124 | publication = True 125 | 126 | fig_dir = os.path.expanduser("~/dev/adversarial-policies-paper/figs/scores") 127 | 128 | _ = locals() # quieten flake8 unused variable warning 129 | del _ 130 | 131 | 132 | @visualize_score_ex.named_config 133 | def defense_paper_config(): 134 | score_paths = DEFENSE_SCORE_PATHS 135 | styles = ["paper", "scores", "scores_twocol"] 136 | row_starts = [] 137 | row_ends = ["multicomp/YouShallNotPassHumans-v0"] 138 | col_ends = [] 139 | command = heatmap_opponent 140 | publication = True 141 | 142 | fig_dir = os.path.expanduser("~/dev/adversarial-policies-paper/figs/scores_defense_single") 143 | 144 | _ = locals() # quieten flake8 unused variable warning 145 | del _ 146 | 147 | 148 | @visualize_score_ex.named_config 149 | def defense_supplementary_config(): 150 | score_paths = DEFENSE_SCORE_PATHS 151 | # can use short as currently just YSNP environment 152 | styles = ["paper", "scores", "scores_monolithic_short"] 153 | publication = True 154 | 155 | fig_dir = os.path.expanduser("~/dev/adversarial-policies-paper/figs/scores_defense") 156 | 157 | _ = locals() # quieten flake8 unused variable warning 158 | del _ 159 | 160 | 161 | @visualize_score_ex.named_config 162 | def poster_config(): 163 | score_paths = DEFENSE_SCORE_PATHS 164 | 165 | styles = ["poster", "scores_poster_threecol"] 166 | row_starts = ["multicomp/KickAndDefend-v0"] 167 | row_ends = ["multicomp/YouShallNotPassHumans-v0"] 168 | col_ends = [ 169 | "multicomp/KickAndDefend-v0", 170 | "multicomp/SumoHumansAutoContact-v0", 171 | "multicomp/YouShallNotPassHumans-v0", 172 | ] 173 | command = heatmap_opponent 174 | publication = True 175 | 176 | fig_dir = os.path.expanduser("~/dev/adversarial-policies-paper/figs/scores_poster") 177 | 178 | _ = locals() # quieten flake8 unused variable warning 179 | del _ 180 | 181 | 182 | @visualize_score_ex.main 183 | def visualize_score(command, styles, palette, publication, fig_dir, score_root, score_paths): 184 | datasets = [ 185 | util.load_datasets( 186 | os.path.join(score_root, item["path"]), 187 | victim_suffix=item.get("victim_suffix", ""), 188 | opponent_suffix=item.get("opponent_suffix", ""), 189 | ) 190 | for item in score_paths 191 | ] 192 | dataset = pd.concat(datasets) 193 | 194 | for style in styles: 195 | plt.style.use(vis_styles.STYLES[style]) 196 | 197 | suptitle = not publication 198 | combine = not publication 199 | generator = util.apply_per_env(dataset, command, suptitle=suptitle, cmap=palette) 200 | for out_path in util.save_figs(fig_dir, generator, combine=combine): 201 | visualize_score_ex.add_artifact(filename=out_path) 202 | 203 | for observer in visualize_score_ex.observers: 204 | if hasattr(observer, "dir"): 205 | logger.info(f"Copying from {observer.dir} to {fig_dir}") 206 | copy_tree(observer.dir, fig_dir) 207 | break 208 | 209 | 210 | def main(): 211 | observer = FileStorageObserver(os.path.join("data", "sacred", "visualize_score")) 212 | visualize_score_ex.observers.append(observer) 213 | visualize_score_ex.run_commandline() 214 | 215 | 216 | if __name__ == "__main__": 217 | main() 218 | -------------------------------------------------------------------------------- /src/aprl/visualize/styles.py: -------------------------------------------------------------------------------- 1 | PRETTY_ENV = { 2 | "multicomp/KickAndDefend-v0": "Kick and Defend", 3 | "multicomp/SumoAntsAutoContact-v0": "Sumo Ants", 4 | "multicomp/SumoAnts-v0": "Sumo Ants", 5 | "multicomp/SumoHumansAutoContact-v0": "Sumo Humans", 6 | "multicomp/SumoHumans-v0": "Sumo Humans", 7 | "multicomp/YouShallNotPassHumans-v0": "You Shall Not Pass", 8 | } 9 | 10 | PRETTY_LABELS = { 11 | "Adv": "Adversary (Adv)", 12 | "Zoo": "Normal (Zoo)", 13 | "Rand": "Random (Rand)", 14 | "Zero": "Zero", 15 | } 16 | 17 | STYLES = { 18 | "paper": { 19 | "figure.figsize": (5.5, 7.5), 20 | "font.family": "serif", 21 | "font.serif": "Times New Roman", 22 | "font.weight": "bold", 23 | "font.size": 9, 24 | "legend.fontsize": 9, 25 | "axes.unicode_minus": False, # workaround bug with Unicode minus signs not appearing 26 | "axes.titlesize": 9, 27 | "axes.labelsize": 9, 28 | "xtick.labelsize": 9, 29 | "ytick.labelsize": 9, 30 | }, 31 | "slides": {"figure.figsize": (9.32, 3)}, 32 | "slides_density": {"figure.figsize": (5, 3)}, 33 | "poster": { 34 | "font.family": "sans-serif", 35 | "font.sans-serif": "Arial", 36 | "font.weight": "bold", 37 | "font.size": 14, 38 | "legend.fontsize": 14, 39 | "axes.titlesize": 14, 40 | "axes.labelsize": 14, 41 | "xtick.labelsize": 14, 42 | "ytick.labelsize": 14, 43 | }, 44 | "monolithic": {"figure.figsize": (5.5, 2.0625)}, 45 | "twocol": {"figure.figsize": (2.75, 2.0625)}, 46 | "threecol": {"figure.figsize": (1.83, 1.7)}, 47 | "scores": {"font.size": 8, "ytick.labelsize": 8, "xtick.labelsize": 8}, 48 | "scores_monolithic": {"figure.figsize": (5.5, 1.4)}, 49 | "scores_monolithic_short": {"figure.figsize": (5.5, 1.3)}, 50 | "scores_twocol": {"figure.figsize": (2.7, 1.61)}, 51 | "scores_threecol": {"figure.figsize": (1.76, 1.6)}, 52 | "density_twocol": {"figure.figsize": (2.7, 2.0625), "legend.fontsize": 8}, 53 | "scores_poster_threecol": {"figure.figsize": (5.15, 3.1)}, 54 | "a4": {"figure.figsize": (8.27, 11.69)}, 55 | } 56 | -------------------------------------------------------------------------------- /src/aprl/visualize/tb.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import fnmatch 3 | import functools 4 | import itertools 5 | import json 6 | import logging 7 | import multiprocessing 8 | import os 9 | import traceback 10 | 11 | import tensorflow as tf 12 | 13 | logger = logging.getLogger("aprl.visualize.tb") 14 | 15 | 16 | def find_tfevents(log_dir): 17 | result = [] 18 | for root, dirs, files in os.walk(log_dir, followlinks=True): 19 | if root.endswith("rl/tb"): 20 | for name in files: 21 | if fnmatch.fnmatch(name, "events.out.tfevents.*"): 22 | result.append(os.path.join(root, name)) 23 | return result 24 | 25 | 26 | def exp_root_from_event(event_path): 27 | # tb_dirname = ...experiment/data/baselines/TIMESTAMP/rl/tb/events.* 28 | # exp_root = ...experiment/ 29 | return os.path.sep.join(event_path.split(os.path.sep)[:-6]) 30 | 31 | 32 | def read_events_file(events_filename, keys=None): 33 | events = [] 34 | try: 35 | for event in tf.train.summary_iterator(events_filename): 36 | row = {"wall_time": event.wall_time, "step": event.step} 37 | for value in event.summary.value: 38 | if keys is not None and value.tag not in keys: 39 | continue 40 | row[value.tag] = value.simple_value 41 | events.append(row) 42 | except Exception: # noqa:B902 43 | logger.error(f"While reading '{events_filename}': {traceback.print_exc()}") 44 | return events 45 | 46 | 47 | def read_sacred_config(exp_root, kind): 48 | sacred_config_path = os.path.join(exp_root, "data", "sacred", kind, "1", "config.json") 49 | with open(sacred_config_path, "r") as f: 50 | return json.load(f) 51 | 52 | 53 | def load_tb_data(log_dir, keys=None): 54 | event_paths = find_tfevents(log_dir) 55 | 56 | pool = multiprocessing.Pool() 57 | events_by_path = pool.map(functools.partial(read_events_file, keys=keys), event_paths) 58 | 59 | events_by_dir = {} 60 | for event_path, events in zip(event_paths, events_by_path): 61 | exp_root = exp_root_from_event(event_path) 62 | if exp_root not in events_by_dir: 63 | events_by_dir[exp_root] = [] 64 | events_by_dir[exp_root] += events 65 | 66 | config_by_dir = { 67 | dirname: read_sacred_config(dirname, "train") for dirname in events_by_dir.keys() 68 | } 69 | 70 | return config_by_dir, events_by_dir 71 | 72 | 73 | def split_by_keys(configs, events, keys): 74 | res = collections.defaultdict(list) 75 | for dirname, config in configs.items(): 76 | event = events[dirname] 77 | cfg_vals = tuple(config[k] for k in keys) 78 | res[cfg_vals].append({"dir": dirname, "config": config, "events": event}) 79 | return res 80 | 81 | 82 | def tb_apply(configs, events, split_keys, fn, **kwargs): 83 | events_by_plot = split_by_keys(configs, events, split_keys) 84 | 85 | pool = multiprocessing.Pool() 86 | map_fn = functools.partial(fn, **kwargs) 87 | res = pool.map(map_fn, events_by_plot.items()) 88 | res = itertools.chain(*res) 89 | return res 90 | -------------------------------------------------------------------------------- /tests/SumoAnts_traj/agent_0.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/SumoAnts_traj/agent_0.npz -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/__init__.py -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/old_ppo2/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/old_ppo2/model.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/old_ppo2/obs_rms.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/old_ppo2/obs_rms.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/old_ppo2/ret_rms.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/old_ppo2/ret_rms.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/ppo1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo1/model.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/ppo1/obs_rms.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo1/obs_rms.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/ppo1/ret_rms.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo1/ret_rms.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/ppo2/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo2/model.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/ppo2/obs_rms.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo2/obs_rms.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/ppo2/ret_rms.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/ppo2/ret_rms.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/sac/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/sac/model.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/sac/obs_rms.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/sac/obs_rms.pkl -------------------------------------------------------------------------------- /tests/dummy_sumo_ants/sac/ret_rms.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/dummy_sumo_ants/sac/ret_rms.pkl -------------------------------------------------------------------------------- /tests/policies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HumanCompatibleAI/adversarial-policies/4b517793241cd7ea17112066f7764b0d035d4dfe/tests/policies/__init__.py -------------------------------------------------------------------------------- /tests/test_agents.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from ilqr import iLQR 3 | import numpy as np 4 | import pytest 5 | 6 | from aprl.agents.monte_carlo import ( 7 | MonteCarloParallel, 8 | MonteCarloSingle, 9 | MujocoResettableWrapper, 10 | receding_horizon, 11 | ) 12 | from aprl.agents.mujoco_lqr import ( 13 | MujocoFiniteDiffCost, 14 | MujocoFiniteDiffDynamicsBasic, 15 | MujocoFiniteDiffDynamicsPerformance, 16 | ) 17 | 18 | dynamics_list = [MujocoFiniteDiffDynamicsBasic, MujocoFiniteDiffDynamicsPerformance] 19 | 20 | 21 | @pytest.mark.parametrize("dynamics_cls", dynamics_list) 22 | def test_lqr_mujoco(dynamics_cls): 23 | """Smoke test for MujcooFiniteDiff{Dynamics,Cost}. 24 | Jupyter notebook experiments/mujoco_control.ipynb has quantitative results 25 | attained; for efficiency, we only run for a few iterations here.""" 26 | env = gym.make("Reacher-v2").unwrapped 27 | env.seed(42) 28 | env.reset() 29 | dynamics = dynamics_cls(env) 30 | cost = MujocoFiniteDiffCost(env) 31 | N = 10 32 | ilqr = iLQR(dynamics, cost, N) 33 | x0 = dynamics.get_state() 34 | us_init = np.array([env.action_space.sample() for _ in range(N)]) 35 | xs, us = ilqr.fit(x0, us_init, n_iterations=3) 36 | assert x0.shape == xs[0].shape 37 | assert xs.shape[0] == N + 1 38 | assert us.shape == (N, 2) 39 | assert env.action_space.contains(us[0]) 40 | 41 | 42 | def rollout(env, actions): 43 | obs, rews, dones, infos = [], [], [], [] 44 | for a in actions: 45 | ob, rew, done, info = env.step(a) 46 | obs.append(ob) 47 | rews.append(rew) 48 | dones.append(done) 49 | infos.append(info) 50 | obs = np.array(obs) 51 | rews = np.array(rews) 52 | dones = np.array(dones) 53 | return obs, rews, dones, infos 54 | 55 | 56 | def make_mujoco_env(env_name, seed): 57 | env = gym.make(env_name) 58 | env = MujocoResettableWrapper(env.unwrapped) 59 | env.seed(seed) 60 | env.reset() 61 | return env 62 | 63 | 64 | MONTE_CARLO_ENVS = ["Reacher-v2", "HalfCheetah-v2", "Hopper-v2"] 65 | 66 | 67 | @pytest.mark.parametrize("env_name", MONTE_CARLO_ENVS) 68 | def test_mujoco_reset_env(env_name, horizon=10, seed=42): 69 | env = make_mujoco_env(env_name, seed) 70 | state = env.get_state() 71 | actions = [env.action_space.sample() for _ in range(horizon)] 72 | 73 | first_obs, first_rews, first_dones, _first_infos = rollout(env, actions) 74 | env.set_state(state) 75 | second_obs, second_rews, second_dones, _second_infos = rollout(env, actions) 76 | 77 | np.testing.assert_almost_equal(second_obs, first_obs, decimal=5) 78 | np.testing.assert_almost_equal(second_rews, first_rews, decimal=5) 79 | assert (first_dones == second_dones).all() 80 | 81 | 82 | def check_monte_carlo( 83 | kind, score_thresholds, total_horizon, planning_horizon, trajectories, seed=42 84 | ): 85 | def f(env_name): 86 | # Setup 87 | env = make_mujoco_env(env_name, seed) 88 | if kind == "single": 89 | mc = MonteCarloSingle(env, planning_horizon, trajectories) 90 | elif kind == "parallel": 91 | env_fns = [lambda: make_mujoco_env(env_name, seed) for _ in range(2)] 92 | mc = MonteCarloParallel(env_fns, planning_horizon, trajectories) 93 | else: # pragma: no cover 94 | raise ValueError("Unrecognized kind '{}'".format(kind)) 95 | mc.seed(seed) 96 | 97 | # Check for side-effects 98 | state = env.get_state() 99 | _ = mc.best_action(state) 100 | assert (env.get_state() == state).all(), "Monte Carlo search has side effects" 101 | 102 | # One receding horizon rollout of Monte Carlo search 103 | total_rew = 0 104 | prev_done = False 105 | for i, (a, ob, rew, done, info) in enumerate(receding_horizon(mc, env)): 106 | assert not prev_done, "should terminate if env returns done" 107 | prev_done = done 108 | assert env.action_space.contains(a) 109 | assert env.observation_space.contains(ob) 110 | total_rew += rew 111 | 112 | if i >= total_horizon: 113 | break 114 | assert i == total_horizon or done 115 | 116 | # Check it does better than random sequences 117 | random_rews = [] 118 | for i in range(10): 119 | env.action_space.np_random.seed(seed + i) 120 | action_seq = [env.action_space.sample() for _ in range(total_horizon)] 121 | env.set_state(state) 122 | _, rews, _, _ = rollout(env, action_seq) 123 | random_rew = sum(rews) 124 | random_rews.append(random_rew) 125 | assert total_rew >= random_rew, "random sequence {}".format(i) 126 | print( 127 | f"Random actions on {env_name} for {total_horizon} obtains " 128 | f"mean {np.mean(random_rews)} s.d. {np.std(random_rews)}" 129 | ) 130 | 131 | # Check against pre-defined score threshold 132 | assert total_rew >= score_thresholds[env_name] 133 | 134 | # Cleanup 135 | if kind == "parallel": 136 | mc.close() 137 | with pytest.raises(BrokenPipeError): 138 | mc.best_action(state) 139 | 140 | return f 141 | 142 | 143 | MC_SINGLE_THRESHOLDS = { 144 | "Reacher-v2": -11, # tested -9.5, random -17.25 s.d. 1.5 145 | "HalfCheetah-v2": 19, # tested 21.6, random -4.2 s.d. 3.7 146 | "Hopper-v2": 29, # tested 31.1, random 15.2 s.d. 5.9 147 | } 148 | MC_PARALLEL_THRESHOLDS = { 149 | "Reacher-v2": -17, # tested at -15.3; random -25.8 s.d. 1.8 150 | "HalfCheetah-v2": 33, # tested at 35.5; random -6.0 s.d. 7.1 151 | "Hopper-v2": 52, # tested at 54.7; random 21.1 s.d. 13.2 152 | } 153 | _test_mc_single = check_monte_carlo( 154 | "single", MC_SINGLE_THRESHOLDS, total_horizon=20, planning_horizon=10, trajectories=100 155 | ) 156 | _test_mc_parallel = check_monte_carlo( 157 | "parallel", MC_PARALLEL_THRESHOLDS, total_horizon=30, planning_horizon=15, trajectories=200 158 | ) 159 | test_mc_single = pytest.mark.parametrize("env_name", MONTE_CARLO_ENVS)(_test_mc_single) 160 | test_mc_parallel = pytest.mark.parametrize("env_name", MONTE_CARLO_ENVS)(_test_mc_parallel) 161 | -------------------------------------------------------------------------------- /tests/test_common.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | import gym 5 | 6 | from aprl.common.multi_monitor import MultiMonitor 7 | import aprl.envs # noqa: F401 8 | 9 | 10 | def test_multi_monitor(): 11 | """Smoke test for MultiMonitor.""" 12 | env = gym.make("aprl/IteratedMatchingPennies-v0") 13 | env.seed(42) 14 | with tempfile.TemporaryDirectory(prefix="test_multi_mon") as d: 15 | env = MultiMonitor(env, filename=os.path.join(d, "test")) 16 | for eps in range(5): 17 | env.reset() 18 | done = False 19 | while not done: 20 | a = env.action_space.sample() 21 | _, _, done, info = env.step(a) 22 | epinfo = info["episode"] 23 | assert set(epinfo.keys()) == {"r", "r0", "r1", "l", "t"} 24 | -------------------------------------------------------------------------------- /tests/test_envs.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.spaces import Tuple 3 | import numpy as np 4 | import pytest 5 | 6 | from aprl.envs import multi_agent 7 | 8 | spec_list = [ 9 | spec 10 | for spec in sorted(gym.envs.registration.registry.all(), key=lambda x: x.id) 11 | if spec.id.startswith("aprl/") or spec.id.startswith("multicomp/") 12 | ] 13 | 14 | 15 | def make_env(spec, i=0): 16 | env = spec.make() 17 | env.seed(42 + i) 18 | return env 19 | 20 | 21 | @pytest.yield_fixture 22 | def env_from_spec(spec): 23 | env = make_env(spec) 24 | yield env 25 | env.close() 26 | 27 | 28 | def test_envs_exist(): 29 | assert len(spec_list) > 0, "No aprl environments detected" 30 | 31 | 32 | @pytest.mark.parametrize("spec", spec_list) 33 | def test_random_rollout(env_from_spec): 34 | """Based on Gym smoke test in gym.envs.tests.test_envs.""" 35 | ob = env_from_spec.reset() 36 | for _ in range(1000): 37 | assert env_from_spec.observation_space.contains(ob) 38 | a = env_from_spec.action_space.sample() 39 | assert env_from_spec.action_space.contains(a) 40 | ob, reward, done, info = env_from_spec.step(a) 41 | if done: 42 | break 43 | 44 | 45 | @pytest.mark.parametrize("spec", spec_list) 46 | def test_env(env_from_spec): 47 | """Based on Gym smoke test in gym.envs.tests.test_envs.""" 48 | ob_space = env_from_spec.observation_space 49 | act_space = env_from_spec.action_space 50 | ob = env_from_spec.reset() 51 | assert ob_space.contains(ob), "Reset observation: {!r} not in space".format(ob) 52 | a = act_space.sample() 53 | ob, reward, done, _info = env_from_spec.step(a) 54 | assert ob_space.contains(ob), "Step observation: {!r} not in space".format(ob) 55 | assert isinstance(done, bool), "Expected {} to be a boolean".format(done) 56 | 57 | if hasattr(env_from_spec, "num_agents"): # multi agent environment 58 | assert len(reward) == env_from_spec.num_agents 59 | assert isinstance(env_from_spec.observation_space, Tuple), "Observations should be Tuples" 60 | assert isinstance(env_from_spec.action_space, Tuple), "Actions should be Tuples" 61 | assert len(env_from_spec.observation_space.spaces) == env_from_spec.num_agents 62 | assert len(env_from_spec.action_space.spaces) == env_from_spec.num_agents 63 | else: # pragma: no cover 64 | assert np.isscalar(reward), "{} is not a scalar for {}".format(reward, env_from_spec) 65 | 66 | for mode in env_from_spec.metadata.get("render.modes", []): 67 | env_from_spec.render(mode=mode) 68 | 69 | # Make sure we can render the environment after close. 70 | for mode in env_from_spec.metadata.get("render.modes", []): 71 | env_from_spec.render(mode=mode) 72 | 73 | 74 | # Test VecMultiEnv classes 75 | 76 | 77 | def assert_envs_equal(env1, env2, num_steps, check_info: bool = True): 78 | """ 79 | Compare two environments over num_steps steps and make sure 80 | that the observations produced by each are the same when given 81 | the same actions. 82 | """ 83 | assert env1.num_envs == env2.num_envs 84 | assert env1.observation_space == env2.observation_space 85 | assert env1.action_space == env2.action_space 86 | 87 | try: 88 | obs1, obs2 = env1.reset(), env2.reset() 89 | assert type(obs1) == type(obs2) 90 | # TODO: sample actions sensitive to num_envs. 91 | # (Maybe add a helper function to make this easy in VecEnv? Feels like a design flaw.) 92 | 93 | if isinstance(obs1, tuple): 94 | for x, y in zip(obs1, obs2): 95 | assert x.shape == y.shape 96 | assert np.allclose(x, y) 97 | else: # pragma: no cover 98 | assert np.array(obs1).shape == np.array(obs2).shape 99 | assert np.allclose(obs1, obs2) 100 | 101 | if isinstance(env1.action_space, Tuple): 102 | for space in env1.action_space.spaces: 103 | space.np_random.seed(1337) 104 | else: # pragma: no cover 105 | env1.action_space.np_random.seed(1337) 106 | 107 | for _ in range(num_steps): 108 | actions = tuple((env1.action_space.sample() for _ in range(env1.num_envs))) 109 | actions = multi_agent.tuple_transpose(actions) 110 | for env in [env1, env2]: 111 | env.step_async(actions) 112 | outs1 = env1.step_wait() 113 | outs2 = env2.step_wait() 114 | # Check ob, rew, done; ignore infos 115 | for out1, out2 in zip(outs1[:3], outs2[:3]): 116 | assert np.allclose(out1, out2) 117 | if check_info: 118 | assert list(outs1[3]) == list(outs2[3]) 119 | finally: 120 | env1.close() 121 | env2.close() 122 | 123 | 124 | @pytest.mark.parametrize("spec", spec_list) 125 | def test_vec_env(spec): 126 | """Test that our {Dummy,Subproc}VecMultiEnv gives the same results as 127 | each other.""" 128 | env_fns = [lambda: make_env(spec, i) for i in range(4)] 129 | venv1 = multi_agent.make_dummy_vec_multi_env(env_fns) 130 | venv2 = multi_agent.make_subproc_vec_multi_env(env_fns) 131 | is_multicomp = spec.id.startswith("multicomp/") 132 | # Can't easily compare info dicts returned by multicomp/ environments, so just skip that check 133 | assert_envs_equal(venv1, venv2, 100, check_info=not is_multicomp) 134 | -------------------------------------------------------------------------------- /vendor/Xdummy-entrypoint: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import argparse 3 | import os 4 | import sys 5 | import subprocess 6 | 7 | parser = argparse.ArgumentParser() 8 | args, extra_args = parser.parse_known_args() 9 | subprocess.Popen(["nohup", "Xdummy"], stdout=open('/dev/null', 'w'), stderr=open('/dev/null', 'w')) 10 | os.environ['DISPLAY'] = ':0' 11 | if not extra_args: 12 | sys.argv = ['/bin/bash'] 13 | else: 14 | sys.argv = extra_args 15 | # Explicitly flush right before the exec since otherwise things might get 16 | # lost in Python's buffers around stdout/stderr (!). 17 | sys.stdout.flush() 18 | sys.stderr.flush() 19 | os.execvpe(sys.argv[0], sys.argv, os.environ) 20 | 21 | --------------------------------------------------------------------------------