├── .dockerignore
├── .env
├── .gitignore
├── LICENSE
├── README.md
├── config
    ├── locals
    ├── ray-autoscaler-ec2.yaml
    └── ray-autoscaler-gce.yaml
├── docker
    ├── Dockerfile.softlearning
    ├── Dockerfile.softlearning.base.cpu
    ├── Dockerfile.softlearning.base.gpu
    ├── cloudbuild.yaml
    ├── docker-compose.cloud.yml
    ├── docker-compose.dev.cpu.yml
    ├── docker-compose.dev.gpu.yml
    └── entrypoint.sh
├── environment.yml
├── examples
    ├── __init__.py
    ├── development
    │   ├── __init__.py
    │   ├── main.py
    │   ├── main_test.py
    │   ├── simulate_policy.py
    │   └── variants.py
    ├── instrument.py
    ├── multi_goal
    │   ├── __init__.py
    │   ├── main.py
    │   └── variants.py
    └── utils.py
├── models
    ├── cross_maze_ant.xml
    ├── pusher_2d.xml
    └── simple_maze_ant.xml
├── requirements.txt
├── scripts
    ├── __init__.py
    ├── archive_gs.py
    ├── deploy-aws.sh
    ├── install_mujoco.py
    ├── sync_gs.py
    └── test-cloud-build.sh
├── setup.py
└── softlearning
    ├── __init__.py
    ├── algorithms
        ├── __init__.py
        ├── rl_algorithm.py
        ├── sac.py
        └── sql.py
    ├── distributions
        ├── __init__.py
        └── bijectors
        │   ├── __init__.py
        │   ├── conditional_scale.py
        │   ├── conditional_scale_test.py
        │   ├── conditional_shift.py
        │   ├── conditional_shift_test.py
        │   ├── real_nvp_flow.py
        │   └── real_nvp_flow_test.py
    ├── environments
        ├── __init__.py
        ├── adapters
        │   ├── __init__.py
        │   ├── dm_control_adapter.py
        │   ├── dm_control_adapter_test.py
        │   ├── gym_adapter.py
        │   ├── gym_adapter_test.py
        │   ├── robosuite_adapter.py
        │   ├── robosuite_adapter_test.py
        │   ├── softlearning_env.py
        │   └── softlearning_env_test.py
        ├── dm_control
        │   ├── __init__.py
        │   └── suite
        │   │   ├── __init__.py
        │   │   └── wrappers
        │   │       └── __init__.py
        ├── gym
        │   ├── __init__.py
        │   ├── mujoco
        │   │   ├── __init__.py
        │   │   ├── image_pusher_2d.py
        │   │   └── pusher_2d.py
        │   ├── multi_goal.py
        │   ├── robotics
        │   │   └── __init__.py
        │   └── wrappers
        │   │   ├── __init__.py
        │   │   ├── rescale_observation.py
        │   │   └── rescale_observation_test.py
        ├── helpers.py
        └── utils.py
    ├── misc
        ├── __init__.py
        ├── kernel.py
        └── plotter.py
    ├── models
        ├── __init__.py
        ├── convnet.py
        ├── feedforward.py
        ├── feedforward_test.py
        └── utils.py
    ├── policies
        ├── __init__.py
        ├── base_policy.py
        ├── gaussian_policy.py
        ├── gaussian_policy_test.py
        ├── real_nvp_policy.py
        ├── real_nvp_policy_test.py
        ├── uniform_policy.py
        ├── uniform_policy_test.py
        └── utils.py
    ├── preprocessors
        └── __init__.py
    ├── replay_pools
        ├── __init__.py
        ├── flexible_replay_pool.py
        ├── flexible_replay_pool_test.py
        ├── goal_replay_pool.py
        ├── hindsight_experience_replay_pool.py
        ├── hindsight_experience_replay_pool_test.py
        ├── replay_pool.py
        ├── simple_replay_pool.py
        ├── simple_replay_pool_test.py
        └── union_pool.py
    ├── samplers
        ├── __init__.py
        ├── base_sampler.py
        ├── dummy_sampler.py
        ├── goal_sampler.py
        ├── remote_sampler.py
        ├── remote_sampler_test.py
        ├── simple_sampler.py
        └── utils.py
    ├── scripts
        ├── __init__.py
        └── console_scripts.py
    ├── utils
        ├── __init__.py
        ├── dict.py
        ├── gcp.py
        ├── git.py
        ├── gym.py
        ├── misc.py
        ├── numpy.py
        ├── random.py
        ├── serialization.py
        ├── serialization_test.py
        ├── tensorflow.py
        ├── times.py
        ├── tune.py
        └── video.py
    └── value_functions
        ├── __init__.py
        ├── base_value_function.py
        ├── base_value_function_test.py
        └── vanilla.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | .dockerignore
 2 | Dockerfile
 3 | db.sqlite3
 4 | __pycache__
 5 | *.pyc
 6 | *.pyo
 7 | *.pyd
 8 | .Python
 9 | env
10 | pip-log.txt
11 | pip-delete-this-directory.txt
12 | .tox
13 | .coverage
14 | .coverage.*
15 | .cache
16 | coverage.xml
17 | *,cover
18 | *.log
19 | .git
20 | data/
21 | tmp/
22 | vis/
23 | .vscode
24 | 


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | SOFTLEARNING_DEV_TAG=20181212-dev-cpu-v1
2 | SOFTLEARNING_DEV_CPU_TAG=20181212-dev-cpu-v1
3 | SOFTLEARNING_DEV_GPU_TAG=20181212-dev-gpu-v1
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .venv
 86 | env/
 87 | venv/
 88 | ENV/
 89 | env.bak/
 90 | venv.bak/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | .spyproject
 95 | 
 96 | # Rope project settings
 97 | .ropeproject
 98 | 
 99 | # mkdocs documentation
100 | /site
101 | 
102 | # mypy
103 | .mypy_cache/
104 | 
105 | # soft learning specific things
106 | *.swp
107 | .idea
108 | *.mp4
109 | data/
110 | vis/
111 | tmp/
112 | vendor/*
113 | .pkl
114 | 
115 | 
116 | .mujoco/
117 | .vscode/
118 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Softlearning authors and contributors
 4 | 
 5 | Softlearning uses a shared copyright model: each contributor holds copyright over
 6 | their contributions to Softlearning. The project versioning records all such
 7 | contribution and copyright details.
 8 | 
 9 | By contributing to the Softlearning repository through pull-request, comment,
10 | or otherwise, the contributor releases their content to the license and
11 | copyright terms herein.
12 | 
13 | Permission is hereby granted, free of charge, to any person obtaining a copy
14 | of this software and associated documentation files (the "Software"), to deal
15 | in the Software without restriction, including without limitation the rights
16 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 | copies of the Software, and to permit persons to whom the Software is
18 | furnished to do so, subject to the following conditions:
19 | 
20 | The above copyright notice and this permission notice shall be included in all
21 | copies or substantial portions of the Software.
22 | 
23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 | SOFTWARE.
30 | 


--------------------------------------------------------------------------------
/config/locals:
--------------------------------------------------------------------------------
1 | AWS_ECR_REGISTRY_URL=""
2 | 


--------------------------------------------------------------------------------
/config/ray-autoscaler-ec2.yaml:
--------------------------------------------------------------------------------
  1 | # An unique identifier for the head node and workers of this cluster.
  2 | cluster_name: softlearning
  3 | 
  4 | # The minimum number of workers nodes to launch in addition to the head
  5 | # node. This number should be >= 0.
  6 | min_workers: 0
  7 | 
  8 | # The maximum number of workers nodes to launch in addition to the head
  9 | # node. This takes precedence over min_workers.
 10 | max_workers: 100
 11 | 
 12 | # The initial number of worker nodes to launch in addition to the head
 13 | # node. When the cluster is first brought up (or when it is refreshed with a
 14 | # subsequent `ray up`) this number of nodes will be started.
 15 | initial_workers: 0
 16 | 
 17 | # This executes all commands on all nodes in the docker container,
 18 | # and opens all the necessary ports to support the Ray cluster.
 19 | # Empty string means disabled.
 20 | docker:
 21 |     image: ""
 22 |     container_name: "" # e.g. ray_docker
 23 |     # container_name: "softlearning"
 24 | 
 25 | # The autoscaler will scale up the cluster to this target fraction of resource
 26 | # usage. For example, if a cluster of 10 nodes is 100% busy and
 27 | # target_utilization is 0.8, it would resize the cluster to 13. This fraction
 28 | # can be decreased to increase the aggressiveness of upscaling.
 29 | # This value must be less than 1.0 for scaling to happen.
 30 | target_utilization_fraction: 0.99
 31 | 
 32 | # If a node is idle for this many minutes, it will be removed.
 33 | idle_timeout_minutes: 5
 34 | 
 35 | # Cloud-provider specific configuration.
 36 | provider:
 37 |     type: aws
 38 |     region: us-west-2
 39 |     availability_zone: us-west-2a
 40 | 
 41 | # How Ray will authenticate with newly launched nodes.
 42 | auth:
 43 |     ssh_user: ubuntu
 44 | # By default Ray creates a new private keypair, but you can also use your own.
 45 | # If you do so, make sure to also set "KeyName" in the head and worker node
 46 | # configurations below.
 47 | #    ssh_private_key: /path/to/your/key.pem
 48 | 
 49 | # Provider-specific config for the head node, e.g. instance type. By default
 50 | # Ray will auto-configure unspecified fields such as SubnetId and KeyName.
 51 | # For more documentation on available fields, see:
 52 | # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
 53 | head_node:
 54 |     # TODO: pick suitable instance type (https://aws.amazon.com/ec2/instance-types)
 55 |     InstanceType: c5.2xlarge
 56 |     ImageId: <TODO:AMI>
 57 | 
 58 |     # # You can provision additional disk space with a conf as follows
 59 |     # BlockDeviceMappings:
 60 |     #     - DeviceName: /dev/sda1
 61 |     #       Ebs:
 62 |     #           VolumeSize: 50
 63 | 
 64 |     # Additional options in the boto docs.
 65 | 
 66 | # Provider-specific config for worker nodes, e.g. instance type. By default
 67 | # Ray will auto-configure unspecified fields such as SubnetId and KeyName.
 68 | # For more documentation on available fields, see:
 69 | # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
 70 | worker_nodes:
 71 |     # TODO: pick suitable instance type (https://aws.amazon.com/ec2/instance-types)
 72 |     InstanceType: c5.2xlarge
 73 |     # InstanceType: t2.micro
 74 |     ImageId: <TODO:AMI>
 75 | 
 76 |     # Run workers on spot by default. Comment this out to use on-demand.
 77 |     InstanceMarketOptions:
 78 |         MarketType: spot
 79 |         # Additional options can be found in the boto docs, e.g.
 80 |         # SpotOptions:
 81 |         #     MaxPrice: 0.5
 82 | 
 83 |     # Additional options in the boto docs.
 84 | 
 85 | # Files or directories to copy to the head and worker nodes. The format is a
 86 | # dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
 87 | file_mounts: {
 88 |     "/tmp/current_git_HEAD": "<TODO>/.git/HEAD",
 89 |     "~/softlearning": "<TODO>",
 90 |     "~/.mujoco/mjkey.txt": "~/.mujoco/mjkey.txt",
 91 | }
 92 | 
 93 | # List of shell commands to run to set up nodes.
 94 | setup_commands:
 95 |     - >-
 96 |         pip install -U -e ~/softlearning
 97 | 
 98 | initialization_commands: []
 99 | 
100 | # Custom commands that will be run on the head node after common setup.
101 | head_setup_commands: []
102 | 
103 | # Custom commands that will be run on worker nodes after common setup.
104 | worker_setup_commands: []
105 | 
106 | # Command to start ray on the head node. You don't need to change this.
107 | head_start_ray_commands:
108 |     - ray stop
109 |     - ray start
110 |         --head
111 |         --redis-port=6379
112 |         --object-manager-port=8076
113 |         --autoscaling-config=~/ray_bootstrap_config.yaml
114 |         --internal-config='{
115 |             "num_heartbeats_timeout":300,
116 |             "raylet_heartbeat_timeout_milliseconds":1000
117 |         }'
118 | 
119 | # Command to start ray on worker nodes. You don't need to change this.
120 | worker_start_ray_commands:
121 |     - ray stop
122 |     - >-
123 |         ray start
124 |         --address=$RAY_HEAD_IP:6379
125 |         --object-manager-port=8076
126 | 


--------------------------------------------------------------------------------
/config/ray-autoscaler-gce.yaml:
--------------------------------------------------------------------------------
  1 | # An unique identifier for the head node and workers of this cluster.
  2 | cluster_name: softlearning
  3 | 
  4 | # The minimum number of workers nodes to launch in addition to the head
  5 | # node. This number should be >= 0.
  6 | min_workers: 0
  7 | 
  8 | # The maximum number of workers nodes to launch in addition to the head
  9 | # node. This takes precedence over min_workers.
 10 | max_workers: 100
 11 | 
 12 | # The initial number of worker nodes to launch in addition to the head
 13 | # node. When the cluster is first brought up (or when it is refreshed with a
 14 | # subsequent `ray up`) this number of nodes will be started.
 15 | initial_workers: 0
 16 | 
 17 | # This executes all commands on all nodes in the docker container,
 18 | # and opens all the necessary ports to support the Ray cluster.
 19 | # Empty string means disabled.
 20 | docker:
 21 |     image: ""
 22 |     container_name: "" # e.g. ray_docker
 23 |     # container_name: "softlearning"
 24 | 
 25 | # The autoscaler will scale up the cluster to this target fraction of resource
 26 | # usage. For example, if a cluster of 10 nodes is 100% busy and
 27 | # target_utilization is 0.8, it would resize the cluster to 13. This fraction
 28 | # can be decreased to increase the aggressiveness of upscaling.
 29 | # This value must be less than 1.0 for scaling to happen.
 30 | target_utilization_fraction: 0.99
 31 | 
 32 | # If a node is idle for this many minutes, it will be removed.
 33 | idle_timeout_minutes: 5
 34 | 
 35 | # Cloud-provider specific configuration.
 36 | provider:
 37 |     type: gcp
 38 |     region: us-west1
 39 |     availability_zone: us-west1-a
 40 |     project_id: <TODO>
 41 | 
 42 | # How Ray will authenticate with newly launched nodes.
 43 | auth:
 44 |     ssh_user: ubuntu
 45 | # By default Ray creates a new private keypair, but you can also use your own.
 46 | # If you do so, make sure to also set "KeyName" in the head and worker node
 47 | # configurations below.
 48 | #    ssh_private_key: /path/to/your/key.pem
 49 | 
 50 | # Provider-specific config for the head node, e.g. instance type. By default
 51 | # Ray will auto-configure unspecified fields such as SubnetId and KeyName.
 52 | # For more documentation on available fields, see:
 53 | # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
 54 | head_node:
 55 |     machineType: n1-standard-4 # n1-highcpu-16
 56 |     disks:
 57 |       - boot: true
 58 |         autoDelete: true
 59 |         type: PERSISTENT
 60 |         initializeParams:
 61 |           diskSizeGb: 50
 62 |           # See https://cloud.google.com/compute/docs/images for more images
 63 |           sourceImage: projects/<TODO:projectId>/global/images/family/<TODO:familyId>
 64 | 
 65 |     # Additional options can be found in in the compute docs at
 66 |     # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert
 67 | 
 68 | # Provider-specific config for worker nodes, e.g. instance type. By default
 69 | # Ray will auto-configure unspecified fields such as SubnetId and KeyName.
 70 | # For more documentation on available fields, see:
 71 | # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
 72 | worker_nodes:
 73 |     machineType: n1-standard-8 # n1-highcpu-8
 74 |     disks:
 75 |       - boot: true
 76 |         autoDelete: true
 77 |         type: PERSISTENT
 78 |         initializeParams:
 79 |           diskSizeGb: 50
 80 |           # See https://cloud.google.com/compute/docs/images for more images
 81 |           sourceImage: projects/<TODO:projectId>/global/images/family/<TODO:familyId>
 82 |     # Run workers on preemtible instance by default.
 83 |     # Note that GCP preemptible instances automatically shut down after 24h.
 84 |     # Comment this out to use on-demand.
 85 |     scheduling:
 86 |       - preemptible: true
 87 |       - onHostMaintenance: TERMINATE
 88 | 
 89 |     # Additional options can be found in in the compute docs at
 90 |     # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert
 91 | 
 92 | # Files or directories to copy to the head and worker nodes. The format is a
 93 | # dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
 94 | file_mounts: {
 95 |     "/tmp/current_git_HEAD": "<TODO>/.git/HEAD",
 96 |     "~/softlearning": "<TODO>",
 97 |     "~/.mujoco/mjkey.txt": "~/.mujoco/mjkey.txt",
 98 | }
 99 | 
100 | # List of shell commands to run to set up nodes.
101 | setup_commands:
102 |     - >-
103 |         pip install -U -e ~/softlearning
104 | 
105 | initialization_commands:
106 |   - gcloud auth configure-docker
107 | 
108 | # Custom commands that will be run on the head node after common setup.
109 | head_setup_commands: []
110 | 
111 | # Custom commands that will be run on worker nodes after common setup.
112 | worker_setup_commands: []
113 | 
114 | # Command to start ray on the head node. You don't need to change this.
115 | head_start_ray_commands:
116 |     - ray stop
117 |     - ray start
118 |         --head
119 |         --redis-port=6379
120 |         --object-manager-port=8076
121 |         --autoscaling-config=~/ray_bootstrap_config.yaml
122 |         --internal-config='{
123 |             "num_heartbeats_timeout":300,
124 |             "raylet_heartbeat_timeout_milliseconds":1000
125 |         }'
126 | 
127 | # Command to start ray on worker nodes. You don't need to change this.
128 | worker_start_ray_commands:
129 |     - ray stop
130 |     - >-
131 |         ray start
132 |         --address=$RAY_HEAD_IP:6379
133 |         --object-manager-port=8076
134 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.softlearning:
--------------------------------------------------------------------------------
 1 | # WIP
 2 | 
 3 | # Dockerfile that clones the softlearning repo into the softlearning base
 4 | # image. Should be used for running stuff on the cloud, e.g. with ray.
 5 | 
 6 | # Base container to clone the softlearning-private repo
 7 | FROM ubuntu:18.04 as git_cloner
 8 | # Note that the SSH_PRIVATE_KEY arg is NOT saved on the final container
 9 | 
10 | # add credentials on build
11 | ARG SSH_PRIVATE_KEY
12 | 
13 | # install git
14 | RUN apt-get update \
15 |     && apt-get install -y git \
16 |     && mkdir /root/.ssh/ \
17 |     && echo "${SSH_PRIVATE_KEY}" > /root/.ssh/id_rsa \
18 |     && chmod 0600 /root/.ssh/id_rsa \
19 |     && touch /root/.ssh/known_hosts \
20 |     && ssh-keyscan github.com >> /root/.ssh/known_hosts \
21 |     && git clone git@github.com:rail-berkeley/softlearning.git /root/softlearning \
22 |     && rm -vf /root/.ssh/id_rsa
23 | 
24 | # Base container to clone the sac_envs repo
25 | FROM ubuntu:18.04 as sac_envs_cloner
26 | # Note that the SSH_PRIVATE_KEY arg is NOT saved on the final container
27 | 
28 | # add credentials on build
29 | ARG SSH_PRIVATE_KEY
30 | 
31 | # install git
32 | RUN apt-get update \
33 |     && apt-get install -y git \
34 |     && mkdir /root/.ssh/ \
35 |     && echo "${SSH_PRIVATE_KEY}" > /root/.ssh/id_rsa \
36 |     && chmod 0600 /root/.ssh/id_rsa \
37 |     && touch /root/.ssh/known_hosts \
38 |     && ssh-keyscan github.com >> /root/.ssh/known_hosts \
39 |     && git clone git@github.com:vikashplus/sac_envs.git /root/sac_envs \
40 |     && rm -vf /root/.ssh/id_rsa
41 | 
42 | FROM softlearning-dev
43 | 
44 | # ========== Add codebase stub ==========
45 | COPY --from=softlearning_cloner /root/softlearning /root/softlearning
46 | COPY --from=sac_envs_cloner /root/sac_envs /root/sac_envs
47 | WORKDIR /root/softlearning
48 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.softlearning.base.cpu:
--------------------------------------------------------------------------------
  1 | # syntax = docker/dockerfile:1.0-experimental
  2 | 
  3 | # Base softlearning container that contains all softlearning requirements,
  4 | # but not the actual softlearning repo. Could be used for example when developing
  5 | # softlearning, in which case you would mount softlearning repo in to the container
  6 | # as a volume, and thus be able to modify code on the host, yet run things inside
  7 | # the container. You are encouraged to use docker-compose (docker-compose.dev.yml),
  8 | # which should allow you to setup your environment with a single one command.
  9 | #
 10 | # Usage:
 11 | # 1) Build image. Typically `docker-compose` would handle this automatically for us
 12 | # # but due to incompatible secret handling, we have to build the image manually.
 13 | # DOCKER_BUILDKIT=1 \
 14 | #   docker build \
 15 | #   -f ./docker/Dockerfile.softlearning.base.cpu \
 16 | #   -t softlearning:latest-cpu \
 17 | #   --progress=plain \
 18 | #   --secret id=mjkey,src="${HOME}/.mujoco/mjkey.txt" .
 19 | # 2) Run:
 20 | # docker-compose \
 21 | #   -p ${USER} \
 22 | #   -f ./docker/docker-compose.dev.cpu.yml \
 23 | #   up \
 24 | #   -d \
 25 | #   --force-recreate
 26 | 
 27 | 
 28 | ARG UBUNTU_VERSION=18.04
 29 | 
 30 | FROM ubuntu:${UBUNTU_VERSION} as base
 31 | 
 32 | ARG UBUNTU_VERSION
 33 | 
 34 | SHELL ["/bin/bash", "-c"]
 35 | 
 36 | # MAINTAINER Kristian Hartikainen <kristian.hartikainen@gmail.com>
 37 | 
 38 | ENV DEBIAN_FRONTEND="noninteractive"
 39 | # See http://bugs.python.org/issue19846
 40 | ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
 41 | ENV PATH /opt/conda/bin:$PATH
 42 | 
 43 | RUN apt-get update --fix-missing && apt-get install -y wget bzip2 ca-certificates \
 44 |     libglib2.0-0 libxext6 libsm6 libxrender1 \
 45 |     git mercurial subversion
 46 | 
 47 | RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \
 48 |     /bin/bash /tmp/miniconda.sh -b -p /opt/conda && \
 49 |     rm /tmp/miniconda.sh && \
 50 |     ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
 51 |     echo ". /opt/conda/etc/profile.d/conda.sh" >> /etc/bash.bashrc
 52 | 
 53 | RUN apt-get install -y curl grep sed dpkg && \
 54 |     TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \
 55 |     curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \
 56 |     dpkg -i tini.deb && \
 57 |     rm tini.deb && \
 58 |     apt-get clean \
 59 |     && rm -rf /var/lib/apt/lists/*
 60 | 
 61 | 
 62 | RUN conda update -y --name base conda \
 63 |     && conda clean --all -y
 64 | 
 65 | 
 66 | # ========== Softlearning dependencies ==========
 67 | RUN apt-get update -y \
 68 |     && apt-get install -y --no-install-recommends \
 69 |         build-essential \
 70 |         curl \
 71 |         git \
 72 |         gnupg2 \
 73 |         make \
 74 |         cmake \
 75 |         ffmpeg \
 76 |         swig \
 77 |         libz-dev \
 78 |         unzip \
 79 |         zlib1g-dev \
 80 |         libglfw3 \
 81 |         libglfw3-dev \
 82 |         libxrandr2 \
 83 |         libxinerama-dev \
 84 |         libxi6 \
 85 |         libxcursor-dev \
 86 |         libgl1-mesa-dev \
 87 |         libgl1-mesa-glx \
 88 |         libglew-dev \
 89 |         libosmesa6-dev \
 90 |         lsb-release \
 91 |         ack-grep \
 92 |         patchelf \
 93 |         vim \
 94 |         emacs \
 95 |         wget \
 96 |         xpra \
 97 |         xserver-xorg-dev \
 98 |         xvfb \
 99 |     && export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" \
100 |     && echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" \
101 |             | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \
102 |     && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
103 |             | apt-key add - \
104 |     && apt-get update -y \
105 |     && apt-get install -y google-cloud-sdk \
106 |     && apt-get clean \
107 |     && rm -rf /var/lib/apt/lists/*
108 | 
109 | 
110 | # ========= MuJoCo ===============
111 | COPY ./scripts/install_mujoco.py /tmp/
112 | 
113 | RUN /tmp/install_mujoco.py --mujoco-path=/root/.mujoco --versions 1.50 2.00 \
114 |     && ln -s /root/.mujoco/mujoco200_linux /root/.mujoco/mujoco200 \
115 |     && rm /tmp/install_mujoco.py
116 | 
117 | ENV LD_LIBRARY_PATH /root/.mujoco/mjpro150/bin:${LD_LIBRARY_PATH}
118 | ENV LD_LIBRARY_PATH /root/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH}
119 | ENV LD_LIBRARY_PATH /root/.mujoco/mujoco200_linux/bin:${LD_LIBRARY_PATH}
120 | 
121 | # This is a hack required to make mujocopy to compile in gpu mode
122 | RUN mkdir -p /usr/lib/nvidia-000
123 | ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/usr/lib/nvidia-000
124 | 
125 | 
126 | # ========== Conda Environment ==========
127 | COPY ./environment.yml /tmp/
128 | COPY ./requirements.txt /tmp/
129 | 
130 | # NOTE: Fetch `mjkey.txt` from secret mount to avoid writing it to the build
131 | # history. For details, see:
132 | # https://docs.docker.com/develop/develop-images/build_enhancements/#new-docker-build-secret-information
133 | RUN --mount=type=secret,id=mjkey,dst=/root/.mujoco/mjkey.txt \
134 |     conda env update -f /tmp/environment.yml \
135 |     && conda clean --all -y
136 | 
137 | RUN echo "conda activate softlearning" >> ~/.bashrc \
138 |     && echo "cd ~/softlearning" >> ~/.bashrc
139 | 
140 | 
141 | # =========== Container Entrypoint =============
142 | COPY ./docker/entrypoint.sh /entrypoint.sh
143 | ENTRYPOINT ["/usr/bin/tini", "--", "/entrypoint.sh"]
144 | 


--------------------------------------------------------------------------------
/docker/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | # https://cloud.google.com/cloud-build/docs/build-config
 2 | steps:
 3 | 
 4 | # 1. Build gpu image
 5 | # Build image with docker-compose
 6 | - name: 'docker/compose:1.24.0'
 7 |   args:
 8 |     - '-f'
 9 |     - '/workspace/docker/docker-compose.dev.gpu.yml'
10 |     - 'build'
11 |     - '--force-rm'
12 |     - '--parallel'
13 |   secretEnv:
14 |     - MJKEY
15 |   env:
16 |     - 'IMAGE_NAME=${REPO_NAME}'
17 |     - 'IMAGE_TAG=${SHORT_SHA}'
18 | 
19 | # 2. Retag the gpu image into a gce repository.
20 | - name: 'gcr.io/cloud-builders/docker'
21 |   args:
22 |     - 'tag'
23 |     - '${REPO_NAME}:${SHORT_SHA}-gpu'
24 |     - 'gcr.io/${PROJECT_ID}/${REPO_NAME}:${SHORT_SHA}-gpu'
25 | 
26 | - name: 'gcr.io/cloud-builders/docker'
27 |   args:
28 |     - 'tag'
29 |     - '${REPO_NAME}:${SHORT_SHA}-gpu'
30 |     - 'gcr.io/${PROJECT_ID}/${REPO_NAME}:latest-gpu'
31 | 
32 | 
33 | # 1. Build cpu image
34 | - name: 'docker/compose:1.24.0'
35 |   args:
36 |     - '-f'
37 |     - '/workspace/docker/docker-compose.dev.cpu.yml'
38 |     - 'build'
39 |     - '--force-rm'
40 |     - '--parallel'
41 |   secretEnv:
42 |     - MJKEY
43 |   env:
44 |     - 'IMAGE_NAME=${REPO_NAME}'
45 |     - 'IMAGE_TAG=${SHORT_SHA}'
46 | 
47 | # 2. Retag the cpu image into a gce repository.
48 | - name: 'gcr.io/cloud-builders/docker'
49 |   args:
50 |     - 'tag'
51 |     - '${REPO_NAME}:${SHORT_SHA}-cpu'
52 |     - 'gcr.io/${PROJECT_ID}/${REPO_NAME}:${SHORT_SHA}-cpu'
53 | 
54 | - name: 'gcr.io/cloud-builders/docker'
55 |   args:
56 |     - 'tag'
57 |     - '${REPO_NAME}:${SHORT_SHA}-cpu'
58 |     - 'gcr.io/${PROJECT_ID}/${REPO_NAME}:latest-cpu'
59 | 
60 | 
61 | options:
62 |     substitution_option: 'ALLOW_LOOSE'
63 | secrets:
64 | - kmsKeyName: projects/tuomas-softlearning/locations/global/keyRings/softlearning-ring/cryptoKeys/softlearning-test
65 |   secretEnv:
66 |     MJKEY: |
67 |       CiQAAis3HkqhleV++GZn2GvPx8fsw7lGNoPscpAqqdhMA8T64ZgS7wcAsW+D72srQXBnF3Gxpn/D
68 |       WVPtYTiehuCCC+Knnl9MqU/X4/8b1TIqYzPfZMZmP85b4gzWZJ4LPYJVVZbjzZI3vSn8OB0ejXa3
69 |       5AK+NIjonhq/6/f6CeXJxlEXj8OL7PwFFe09yMjmpvcJPsvgJrGseKuZU/Bbl4iR3DTtNqA/0eId
70 |       YhPmuq3XL4MCnVQ+OKNFfQZtCHEVXNvtBjZ0j0U6/pQMEOmhbOzp/zMEYeMH4/P553bF3m3L7Qi5
71 |       Zt8lIVqIHs4mI9VBKQ2CRlUzIh5+Y8Luk5csErHM9ilg6dViEJAA3+cEijd92x3Df2/NevQdN7FB
72 |       c74obb5u87V6GRjYor0HJujH9RJNcFXKs5Wi9x1/8Fw4fNH2fDBEAdOjDsSlL9zF/b6+9D3ncvan
73 |       aWhtzNjr14coi9Ay9LoSJZRaCLUIB1VG6w5deQMpKuEs9b54u9UkwgMys5H9sEEnsuc6IQR1Prnz
74 |       7xQN7I8XfiSYwg3xoWgHMNHrcyZ/FLNXhP3j51L1AboSaNfe1SPrtiJie2pjrcaLMNI7iWbUkLgs
75 |       JFvszGbbDTFTw6RA+U+uz6S4EnioFJJHHxsM0nB7CU7JC81PQ2m1lKdaKWMcZ5qsIyj7iFZjQWn0
76 |       b1LMuPD0xdOt2FQ7BPlX8uE9Qrc3xJRFgscW0O0I1880OrjFaLQlosQjE/Sz43VaBE/xTsnRWAxT
77 |       gVK3wK0pok5oiLxwbvXII0T/tv5lQdOxAcbKhEMXSWLZv86tQaOKW6rFPrL2MY2yEOcE3bk1oHzX
78 |       vutfuCxdWKz42IPWFEhs69NxgxT1iBLRqR9KjKiTYnnXdTjOxJ0i/M1Q8PoMTB6QwXsyrYwDjsR7
79 |       yK8jmKNuFmi4N535bOLg2z+wN4ClHdwlJODrcBFBCI0Xbykd/KBlX+VuStd/E6NOuAEgl3XqBPNw
80 |       baHVo9OAyhLFNxGc9mrX3uKywzwEfkiDi0Zo0KLN4hi7J19kGvuKja22sm9aMpmeZKFMEP7bMc/3
81 |       YwGvLAMPmGKhDbmFDOkyKwy/RPifVHomCJ0U8s29PSHxjTmukooYLsVHe1OVbkz44Xo68xQ6afLz
82 |       LBVLfEIcWClbmDNYxDrCUQXnrZyGHpeNG3rCzqTX6a7ZUDh0locX9f+JphggJrcV05zBNiXeQ+XZ
83 |       lxuAI0cnx4euiHZb6MRXA3H6TlS9PIEF4n7eLuIC827w55qMRmJmEY59mZ/1xqs8buln087mcz4b
84 |       HIG5KpRwrSC80JHVpdiXrxupOjvknSWmvMo34dmNZvazcVkcWqT8otjwV8FDU7kTlIe+pXbV9YQx
85 |       eBMtntxk93yy9vM7RHvMccGObx/iaQ==
86 | 
87 | 
88 | # logsBucket: 'gs://<bucket-name>'
89 | images:
90 |   - 'gcr.io/${PROJECT_ID}/${REPO_NAME}:latest-gpu'
91 |   - 'gcr.io/${PROJECT_ID}/${REPO_NAME}:latest-cpu'
92 |   - 'gcr.io/${PROJECT_ID}/${REPO_NAME}:${SHORT_SHA}-gpu'
93 |   - 'gcr.io/${PROJECT_ID}/${REPO_NAME}:${SHORT_SHA}-cpu'
94 | 


--------------------------------------------------------------------------------
/docker/docker-compose.cloud.yml:
--------------------------------------------------------------------------------
 1 | # WIP
 2 | 
 3 | # Docker compose file that builds images and runs the containers needed for
 4 | # running softlearning on cloud (e.g. with ray). You need to have your
 5 | # MJKEY set in the environment.
 6 | #
 7 | # docker-compose \
 8 | #     -f ./docker/docker-compose.dev.yml \
 9 | #     build \
10 | #         --build-arg MJKEY="$(cat ~/.mujoco/mjkey.txt)"
11 | 
12 | version: "3"
13 | services:
14 |   softlearning-dev:
15 |     image: softlearning-dev-compose-test
16 |     build:
17 |       context: ../.
18 |       dockerfile: docker/Dockerfile.softlearning.base.gpu
19 |       args:
20 |         - MJKEY
21 |     ports:
22 |       - "6006-6016"  # Tensorboard
23 |       - "5000-5010"  # Viskit
24 |       - "8888-8898"  # Jupyter
25 |     volumes:
26 |        - ~/.aws:/root/.aws  # If using aws, e.g. for s3
27 |        - ~/.config/gcloud:/root/.config/gcloud  # If using gcp, e.g. for gs
28 |        - ~/.mujoco:/root/.mujoco  # mjkey.txt
29 |        - ..:/root/softlearning-private
30 | 


--------------------------------------------------------------------------------
/docker/docker-compose.dev.cpu.yml:
--------------------------------------------------------------------------------
 1 | # Docker compose file that builds images and runs the containers needed for
 2 | # development.
 3 | #
 4 | # NOTE(hartikainen): This file doesn't currently work since docker-compose
 5 | # doesn't support buildkit secrets.
 6 | 
 7 | 
 8 | version: "2.4"
 9 | services:
10 |   softlearning-dev-cpu:
11 |     image: ${IMAGE_NAME:-softlearning}:${IMAGE_TAG:-latest}-cpu
12 |     container_name: softlearning-dev-cpu
13 |     init: true
14 |     working_dir: /root/softlearning
15 |     environment:
16 |       - DISPLAY=:0
17 |     ports:
18 |       - "6006"  # Tensorboard
19 |       - "5000"  # Viskit
20 |       - "8888"  # Jupyter
21 |     volumes:
22 |        - ~/.aws:/root/.aws  # If using aws, e.g. for s3
23 |        - ~/.config/gcloud:/root/.config/gcloud  # If using gcp, e.g. for gs
24 |        - ~/.mujoco/mjkey.txt:/root/.mujoco/mjkey.txt
25 |        - ..:/root/softlearning
26 |        - ~/ray_results/softlearning-dev-cpu:/root/ray_results
27 |     command:
28 |       - bash
29 |     stdin_open: true
30 |     tty: true
31 | 


--------------------------------------------------------------------------------
/docker/docker-compose.dev.gpu.yml:
--------------------------------------------------------------------------------
 1 | # Docker compose file that builds images and runs the containers needed for
 2 | # development.
 3 | #
 4 | # NOTE(hartikainen): This file doesn't currently work since docker-compose
 5 | # doesn't support buildkit secrets.
 6 | 
 7 | 
 8 | version: "2.4"
 9 | services:
10 |   softlearning-dev-gpu:
11 |     image: ${IMAGE_NAME:-softlearning}:${IMAGE_TAG:-latest}-gpu
12 |     container_name: softlearning-dev-gpu
13 |     runtime: nvidia
14 |     init: true
15 |     working_dir: /root/softlearning
16 |     environment:
17 |       - DISPLAY=:0
18 |     ports:
19 |       - "6006"  # Tensorboard
20 |       - "5000"  # Viskit
21 |       - "8888"  # Jupyter
22 |     volumes:
23 |        - ~/.aws:/root/.aws  # If using aws, e.g. for s3
24 |        - ~/.config/gcloud:/root/.config/gcloud  # If using gcp, e.g. for gs
25 |        - ~/.mujoco/mjkey.txt:/root/.mujoco/mjkey.txt
26 |        - ..:/root/softlearning
27 |        - ~/ray_results/softlearning-dev-gpu:/root/ray_results
28 |     command:
29 |       - bash
30 |     stdin_open: true
31 |     tty: true
32 | 


--------------------------------------------------------------------------------
/docker/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | # Set up display; otherwise rendering will fail
 6 | Xvfb -screen 0 320x240x24 &
 7 | export DISPLAY=:0
 8 | 
 9 | # Wait for the file to come up
10 | file="/tmp/.X11-unix/X0"
11 | for i in $(seq 1 10); do
12 |     if [ -e "$file" ]; then
13 | 	break
14 |     fi
15 | 
16 |     echo "Waiting for $file to be created (try $i/10)"
17 |     sleep "$i"
18 | done
19 | if ! [ -e "$file" ]; then
20 |     echo "Timing out: $file was not created"
21 |     exit 1
22 | fi
23 | 
24 | exec "$@"
25 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: softlearning
 2 | channels:
 3 | - defaults
 4 | - conda-forge
 5 | dependencies:
 6 | - python>=3.8,<3.9
 7 | - pip>=20.0
 8 | - conda>=4.8
 9 | # - cudatoolkit==11.0.221
10 | # - nvidia::cudnn==8.0.4
11 | - pip:
12 |     - -r ./requirements.txt
13 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/examples/__init__.py


--------------------------------------------------------------------------------
/examples/development/__init__.py:
--------------------------------------------------------------------------------
 1 | """Provides functions that are utilized by the command line interface.
 2 | 
 3 | In particular, the examples are exposed to the command line interface
 4 | (defined in `softlearning.scripts.console_scripts`) through the
 5 | `get_trainable_class`, `get_variant_spec`, and `get_parser` functions.
 6 | """
 7 | 
 8 | 
 9 | def get_trainable_class(*args, **kwargs):
10 |     from .main import ExperimentRunner
11 |     return ExperimentRunner
12 | 
13 | 
14 | def get_variant_spec(command_line_args, *args, **kwargs):
15 |     from .variants import get_variant_spec
16 |     variant_spec = get_variant_spec(command_line_args, *args, **kwargs)
17 |     return variant_spec
18 | 
19 | 
20 | def get_parser():
21 |     from examples.utils import get_parser
22 |     parser = get_parser()
23 |     return parser
24 | 


--------------------------------------------------------------------------------
/examples/development/simulate_policy.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | from pathlib import Path
  5 | import pickle
  6 | 
  7 | import pandas as pd
  8 | 
  9 | from softlearning.environments.utils import get_environment_from_params
 10 | from softlearning import policies
 11 | from softlearning.samplers import rollouts
 12 | from softlearning.utils.tensorflow import set_gpu_memory_growth
 13 | from softlearning.utils.video import save_video
 14 | from .main import ExperimentRunner
 15 | 
 16 | 
 17 | DEFAULT_RENDER_KWARGS = {
 18 |     'mode': 'human',
 19 | }
 20 | 
 21 | 
 22 | def parse_args():
 23 |     parser = argparse.ArgumentParser()
 24 |     parser.add_argument('checkpoint_path',
 25 |                         type=str,
 26 |                         help='Path to the checkpoint.')
 27 |     parser.add_argument('--max-path-length', '-l', type=int, default=1000)
 28 |     parser.add_argument('--num-rollouts', '-n', type=int, default=10)
 29 |     parser.add_argument('--render-kwargs', '-r',
 30 |                         type=json.loads,
 31 |                         default='{}',
 32 |                         help="Kwargs for rollouts renderer.")
 33 |     parser.add_argument('--video-save-path',
 34 |                         type=Path,
 35 |                         default=None)
 36 | 
 37 |     args = parser.parse_args()
 38 | 
 39 |     return args
 40 | 
 41 | 
 42 | def load_variant_progress_metadata(checkpoint_path):
 43 |     checkpoint_path = checkpoint_path.rstrip('/')
 44 |     trial_path = os.path.dirname(checkpoint_path)
 45 | 
 46 |     variant_path = os.path.join(trial_path, 'params.pkl')
 47 |     with open(variant_path, 'rb') as f:
 48 |         variant = pickle.load(f)
 49 | 
 50 |     metadata_path = os.path.join(checkpoint_path, ".tune_metadata")
 51 |     if os.path.exists(metadata_path):
 52 |         with open(metadata_path, "rb") as f:
 53 |             metadata = pickle.load(f)
 54 |     else:
 55 |         metadata = None
 56 | 
 57 |     progress_path = os.path.join(trial_path, 'progress.csv')
 58 |     progress = pd.read_csv(progress_path)
 59 | 
 60 |     return variant, progress, metadata
 61 | 
 62 | 
 63 | def load_environment(variant):
 64 |     environment_params = (
 65 |         variant['environment_params']['training']
 66 |         if 'evaluation' in variant['environment_params']
 67 |         else variant['environment_params']['training'])
 68 | 
 69 |     environment = get_environment_from_params(environment_params)
 70 |     return environment
 71 | 
 72 | 
 73 | def load_policy(checkpoint_dir, variant, environment):
 74 |     policy_params = variant['policy_params'].copy()
 75 |     policy_params['config'] = {
 76 |         **policy_params['config'],
 77 |         'action_range': (environment.action_space.low,
 78 |                          environment.action_space.high),
 79 |         'input_shapes': environment.observation_shape,
 80 |         'output_shape': environment.action_shape,
 81 |     }
 82 | 
 83 |     policy = policies.get(policy_params)
 84 | 
 85 |     policy_save_path = ExperimentRunner._policy_save_path(checkpoint_dir)
 86 |     status = policy.load_weights(policy_save_path)
 87 |     status.assert_consumed().run_restore_ops()
 88 | 
 89 |     return policy
 90 | 
 91 | 
 92 | def simulate_policy(checkpoint_path,
 93 |                     num_rollouts,
 94 |                     max_path_length,
 95 |                     render_kwargs,
 96 |                     video_save_path=None,
 97 |                     evaluation_environment_params=None):
 98 |     checkpoint_path = os.path.abspath(checkpoint_path.rstrip('/'))
 99 |     variant, progress, metadata = load_variant_progress_metadata(
100 |         checkpoint_path)
101 |     environment = load_environment(variant)
102 |     policy = load_policy(checkpoint_path, variant, environment)
103 |     render_kwargs = {**DEFAULT_RENDER_KWARGS, **render_kwargs}
104 | 
105 |     paths = rollouts(num_rollouts,
106 |                      environment,
107 |                      policy,
108 |                      path_length=max_path_length,
109 |                      render_kwargs=render_kwargs)
110 | 
111 |     if video_save_path and render_kwargs.get('mode') == 'rgb_array':
112 |         fps = 1 // getattr(environment, 'dt', 1/30)
113 |         for i, path in enumerate(paths):
114 |             video_save_dir = os.path.expanduser('/tmp/simulate_policy/')
115 |             video_save_path = os.path.join(video_save_dir, f'episode_{i}.mp4')
116 |             save_video(path['images'], video_save_path, fps=fps)
117 | 
118 |     return paths
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     set_gpu_memory_growth(True)
123 |     args = parse_args()
124 |     simulate_policy(**vars(args))
125 | 


--------------------------------------------------------------------------------
/examples/multi_goal/__init__.py:
--------------------------------------------------------------------------------
 1 | """Provides functions that are utilized by the command line interface.
 2 | 
 3 | In particular, the examples are exposed to the command line interface
 4 | (defined in `softlearning.scripts.console_scripts`) through the
 5 | `get_trainable_class`, `get_variant_spec`, and `get_parser` functions.
 6 | """
 7 | 
 8 | 
 9 | def get_trainable_class(*args, **kwargs):
10 |     from .main import run_experiment
11 |     return run_experiment
12 | 
13 | 
14 | def get_variant_spec(command_line_args, *args, **kwargs):
15 |     from .variants import get_variant_spec
16 |     variant_spec = get_variant_spec(command_line_args, *args, **kwargs)
17 |     return variant_spec
18 | 
19 | 
20 | def get_parser():
21 |     from examples.utils import get_parser
22 |     parser = get_parser()
23 | 
24 |     for dest, value in (('universe', 'gym'),
25 |                         ('task', 'MultiGoal'),
26 |                         ('domain', 'Default-v0')):
27 |         option = next(x for x in parser._actions if x.dest == dest)
28 |         option.default = value
29 |         option.choices = {value}
30 | 
31 |     return parser
32 | 


--------------------------------------------------------------------------------
/examples/multi_goal/main.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import numpy as np
 4 | 
 5 | from softlearning import algorithms
 6 | from softlearning.environments.utils import get_environment
 7 | from softlearning.misc.plotter import QFPolicyPlotter
 8 | from softlearning.samplers import SimpleSampler
 9 | from softlearning import policies
10 | from softlearning.replay_pools import SimpleReplayPool
11 | from softlearning import value_functions
12 | from examples.instrument import run_example_local
13 | 
14 | 
15 | def run_experiment(variant, reporter):
16 |     training_environment = (
17 |         get_environment('gym', 'MultiGoal', 'Default-v0', {
18 |             'actuation_cost_coeff': 30,
19 |             'distance_cost_coeff': 1,
20 |             'goal_reward': 10,
21 |             'init_sigma': 0.1,
22 |         }))
23 |     evaluation_environment = training_environment.copy()
24 | 
25 |     pool = SimpleReplayPool(
26 |         environment=training_environment,
27 |         max_size=1e6)
28 | 
29 |     sampler = SimpleSampler(max_path_length=30)
30 | 
31 |     variant['Q_params']['config'].update({
32 |         'input_shapes': (
33 |             training_environment.observation_shape,
34 |             training_environment.action_shape,
35 |         )
36 |     })
37 |     Qs = value_functions.get(variant['Q_params'])
38 | 
39 |     variant['policy_params']['config'].update({
40 |         'action_range': (training_environment.action_space.low,
41 |                          training_environment.action_space.high),
42 |         'input_shapes': training_environment.observation_shape,
43 |         'output_shape': training_environment.action_shape,
44 |     })
45 |     policy = policies.get(variant['policy_params'])
46 | 
47 |     plotter = QFPolicyPlotter(
48 |         Q=Qs[0],
49 |         policy=policy,
50 |         obs_lst=np.array(((-2.5, 0.0),
51 |                           (0.0, 0.0),
52 |                           (2.5, 2.5),
53 |                           (-2.5, -2.5))),
54 |         default_action=(np.nan, np.nan),
55 |         n_samples=100)
56 | 
57 |     variant['algorithm_params']['config'].update({
58 |         'training_environment': training_environment,
59 |         'evaluation_environment': evaluation_environment,
60 |         'policy': policy,
61 |         'Qs': Qs,
62 |         'pool': pool,
63 |         'sampler': sampler,
64 |         'min_pool_size': 100,
65 |         'batch_size': 64,
66 |         'plotter': plotter,
67 |     })
68 |     algorithm = algorithms.get(variant['algorithm_params'])
69 | 
70 |     for train_result in algorithm.train():
71 |         reporter(**train_result)
72 | 
73 | 
74 | def main(argv=None):
75 |     """Run ExperimentRunner locally on ray.
76 | 
77 |     To run this example on cloud (e.g. gce/ec2), use the setup scripts:
78 |     'softlearning launch_example_{gce,ec2} examples.development <options>'.
79 | 
80 |     Run 'softlearning launch_example_{gce,ec2} --help' for further
81 |     instructions.
82 |     """
83 |     run_example_local('examples.multi_goal', argv)
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     main(argv=sys.argv[1:])
88 | 


--------------------------------------------------------------------------------
/examples/multi_goal/variants.py:
--------------------------------------------------------------------------------
 1 | from softlearning.utils.dict import deep_update
 2 | 
 3 | ALGORITHM_PARAMS_BASE = {
 4 |     'class_name': 'SAC',
 5 | 
 6 |     'config': {
 7 |         'epoch_length': 100,
 8 |         'n_epochs': 1000,
 9 |         'n_train_repeat': 1,
10 |         'eval_render_kwargs': {
11 |             'mode': 'human',
12 |         },
13 |         'eval_n_episodes': 10,
14 | 
15 |         'discount': 0.99,
16 |         'reward_scale': 1.0,
17 |         'save_full_state': True,
18 |         'target_update_interval': 1000,
19 |         'tau': 1.0,
20 |     }
21 | }
22 | 
23 | ALGORITHM_PARAMS_ADDITIONAL = {
24 |     'SAC': {
25 |         'class_name': 'SAC',
26 |         'config': {
27 |             'lr': 3e-4,
28 |             'reward_scale': 0.1,
29 |             'target_entropy': 'auto',
30 |             'initial_exploration_policy': None
31 |         }
32 |     },
33 |     'SQL': {
34 |         'class_name': 'SQL',
35 |         'config': {
36 |             'policy_lr': 3e-4,
37 |             'reward_scale': 0.1,
38 |             'value_n_particles': 16,
39 |             'kernel_n_particles': 32,
40 |             'kernel_update_ratio': 0.5,
41 |         }
42 |     }
43 | }
44 | 
45 | 
46 | def get_variant_spec(args):
47 |     algorithm = args.algorithm
48 | 
49 |     layer_size = 128
50 |     variant_spec = {
51 |         'layer_size': layer_size,
52 |         'policy_params': {
53 |             'class_name': 'FeedforwardGaussianPolicy',
54 |             'config': {
55 |                 'hidden_layer_sizes': (layer_size, layer_size),
56 |                 'squash': True,
57 |             },
58 |         },
59 |         'algorithm_params': deep_update(
60 |             ALGORITHM_PARAMS_BASE,
61 |             ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {})
62 |         ),
63 |         'Q_params': {
64 |             'class_name': 'double_feedforward_Q_function',
65 |             'config': {
66 |                 'hidden_layer_sizes': (layer_size, layer_size),
67 |             },
68 |         },
69 |         'run_params': {
70 |             'seed': 1,
71 |         },
72 |     }
73 | 
74 |     return variant_spec
75 | 


--------------------------------------------------------------------------------
/models/pusher_2d.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="arm3d">
 2 |     <compiler inertiafromgeom="true" angle="radian" coordinate="local" />
 3 |     <custom>
 4 |         <numeric name="frame_skip" data="5" />
 5 |     </custom>
 6 |     <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
 7 | 
 8 |     <asset>
 9 |       <texture name="texplane" type="2d" builtin="flat" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="5" height="5" />
10 |       <material name='MatPlane' texture="texplane" shininess="0" texrepeat="60 60" specular="0"  reflectance="0" />
11 |     </asset>
12 | 
13 |     <default>
14 |         <joint armature='0.04' damping="1" limited="true"/>
15 |         <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="1" conaffinity="1"/>
16 |     </default>
17 | 
18 |     <worldbody>
19 |         <light diffuse=".5 .5 .5" pos="0 0 5" dir="0 0 -1"/>
20 |         <geom name="floor" rgba="0 0 0 1" type="plane" material="MatPlane" pos="0 0.5 -0.15" size="4 4 0.1" contype="1" conaffinity="1"/>
21 | 
22 |         <body name="palm" pos="0 0 0">
23 |             <geom rgba="0. 1. 0. 1" type="capsule" fromto="0 0 -0.1 0 0 0.1" size="0.12"/>
24 |             <body name="proximal_1" pos="0 0 -0.075" axisangle="0 0 1 0.785">
25 |                 <joint name="proximal_j_1" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.5 2.5" damping="1.0" />
26 |                 <geom rgba="0. 1. 0. 1" type="capsule"  fromto="0 0 0 0.4 0 0" size="0.06" contype="1" conaffinity="1"/>
27 |                 <body name="distal_1" pos="0.4 0 0" axisangle="0 0 1 -0.785">
28 |                     <joint name="distal_j_1" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.3213 2.3" damping="1.0"/>
29 |                     <geom rgba="0. 1. 0. 1" type="capsule"  fromto="0 0 0 0.4 0 0" size="0.06" contype="1" conaffinity="1"/>
30 |                     <body name="distal_2" pos="0.4 0 0" axisangle="0 0 1 -1.57">
31 |                         <joint name="distal_j_2" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.3213 2.3" damping="1.0"/>
32 |                         <geom rgba="0. 1. 0. 1" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" contype="1" conaffinity="1"/>
33 |                         <body name="distal_4" pos="0.4 0 0">
34 |                             <site name="tip arml" pos="0.1 -0.2 0" size="0.01" />
35 |                             <site name="tip armr" pos="0.1 0.2 0" size="0.01" />
36 |                             <!--<joint name="distal_j_3" type="hinge" pos="0 0 0" axis="1 0 0" range="-3.3213 3.3" damping="0.5"/>-->
37 |                             <geom rgba="0. 1. 0. 1" type="capsule" fromto="0 -0.2 0 0 0.2 0" size="0.04" contype="1" conaffinity="1" />
38 |                             <geom rgba="0. 1. 0. 1" type="capsule" fromto="0 -0.2 0 0.2 -0.2 0" size="0.04" contype="1" conaffinity="1" />
39 |                             <geom rgba="0. 1. 0. 1" type="capsule" fromto="0 0.2 0 0.2 0.2 0" size="0.04" contype="1" conaffinity="1" />
40 |                         </body>
41 |                     </body>
42 |                 </body>
43 |             </body>
44 |         </body>
45 | 
46 |         <body name="object" pos="0 0 -0.1">
47 |             <!--<geom rgba="1. 1. 1. 1" type="box" size="0.05 0.05 0.05" density='0.00001' contype="1" conaffinity="1"/>-->
48 |             <geom rgba="1. 1. 1. 1" type="cylinder" size="0.1 0.1 0.1" density='0.00001' contype="1" conaffinity="1"/>
49 |             <joint name="obj_slidex" type="slide" pos="0.025 0.025 0.025" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
50 |             <joint name="obj_slidey" type="slide" pos="0.025 0.025 0.025" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
51 |         </body>
52 | 
53 |         <body name="goal" pos="0 -1 -0.145">
54 |             <!--<body name="goal" pos="0.0 0.0 -0.1">-->
55 |             <!--<geom rgba="1. 0. 0. 1" type="box" size="0.1 0.1 0.1" density='0.00001' contype="0" conaffinity="0"/>-->
56 |             <geom rgba="1. 0. 0. 1" type="cylinder" size="0.17 0.005 0.2" density='0.00001' contype="0" conaffinity="0"/>
57 |             <!-- <geom rgba="1. 0. 0. 1" type="box" size="0.1 0.1 0.2" density='0.00001' contype="0" conaffinity="0"/>-->
58 |             <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
59 |             <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
60 |         </body>
61 | 
62 |     <!-- <geom conaffinity="0" contype="0" density="0.00001" name="object0" pos="-0.384949 0.226851 -0.1" rgba="0.551315 0.719469 0.423106 1" size="0.1 0.1 0.1" type="cylinder" /><geom conaffinity="0" contype="0" density="0.00001" name="object1" pos="0.865376 0.684830 -0.1" rgba="0.480932 0.392118 0.343178 1" size="0.1 0.1 0.1" type="cylinder" /><geom conaffinity="0" contype="0" density="0.00001" name="object2" pos="0.412289 0.438572 -0.1" rgba="0.059678 0.398044 0.737995 1" size="0.1 0.1 0.1" type="cylinder" /><geom conaffinity="0" contype="0" density="0.00001" name="object3" pos="-0.571515 0.175452 -0.1" rgba="0.531551 0.531828 0.634401 1" size="0.1 0.1 0.1" type="cylinder" /><geom conaffinity="0" contype="0" density="0.00001" name="object4" pos="-0.71515 0.075452 -0.1" rgba="0.531551 0.531828 0.634401 1" size="0.1 0.1 0.1" type="cylinder" /> -->
63 |     </worldbody>
64 | 
65 |     <actuator>
66 |         <motor joint="proximal_j_1" ctrlrange="-3 3" ctrllimited="true"/>
67 |         <motor joint="distal_j_1" ctrlrange="-3 3" ctrllimited="true"/>
68 |         <motor joint="distal_j_2" ctrlrange="-3 3" ctrllimited="true"/>
69 |         <!--<motor joint="distal_j_3" ctrlrange="-3 3" ctrllimited="true"/>-->
70 |     </actuator>
71 | </mujoco>
72 | 


--------------------------------------------------------------------------------
/models/simple_maze_ant.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="ant">
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="local" />
 3 |   <option timestep="0.02" integrator="RK4" />
 4 |   <custom>
 5 |     <numeric name="init_qpos" data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" />
 6 |   </custom>
 7 |   <default>
 8 |     <joint limited="true" armature="1" damping="1" />
 9 |     <geom condim="3" conaffinity="0" margin="0.01" friction="1 0.5 0.5" solref=".02 1" solimp=".8 .8 .01" rgba="0.8 0.6 0.4 1" density="5.0" />
10 |   </default>
11 |   <asset>
12 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
13 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
14 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
15 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
16 |     <material name='geom' texture="texgeom" texuniform="true" />
17 |   </asset>
18 |   <worldbody>
19 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
20 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
21 |     <body name="torso" pos="0 0 0.75">
22 |       <geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
23 |       <joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
24 |       <body name="front_left_leg" pos="0 0 0">
25 |         <geom name="aux_1_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
26 |         <body name="aux_1" pos="0.2 0.2 0">
27 |           <joint name="hip_1" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
28 |           <geom name="left_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
29 |           <body pos="0.2 0.2 0">
30 |             <joint name="ankle_1" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="30 70" />
31 |             <geom name="left_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 0.4 0.0" />
32 |           </body>
33 |         </body>
34 |       </body>
35 |       <body name="front_right_leg" pos="0 0 0">
36 |         <geom name="aux_2_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
37 |         <body name="aux_2" pos="-0.2 0.2 0">
38 |           <joint name="hip_2" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
39 |           <geom name="right_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
40 |           <body pos="-0.2 0.2 0">
41 |             <joint name="ankle_2" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="-70 -30" />
42 |             <geom name="right_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 0.4 0.0" />
43 |           </body>
44 |         </body>
45 |       </body>
46 |       <body name="back_leg" pos="0 0 0">
47 |         <geom name="aux_3_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
48 |         <body name="aux_3" pos="-0.2 -0.2 0">
49 |           <joint name="hip_3" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
50 |           <geom name="back_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
51 |           <body pos="-0.2 -0.2 0">
52 |             <joint name="ankle_3" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="-70 -30" />
53 |             <geom name="third_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" />
54 |           </body>
55 |         </body>
56 |       </body>
57 |       <body name="right_back_leg" pos="0 0 0">
58 |         <geom name="aux_4_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
59 |         <body name="aux_4" pos="0.2 -0.2 0">
60 |           <joint name="hip_4" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
61 |           <geom name="rightback_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
62 |           <body pos="0.2 -0.2 0">
63 |             <joint name="ankle_4" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="30 70" />
64 |             <geom name="fourth_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 -0.4 0.0" />
65 |           </body>
66 |         </body>
67 |       </body>
68 |     </body>
69 | 
70 |     <geom name="wall-0" pos="3.5 2.5 0.5" rgba="1. 1. 1. 1" type="box" size="5.5 0.5 1" density='0.00001' contype="1" conaffinity="1"/>
71 |     <geom name="wall-1" pos="8.5 -2.5 0.5" rgba="1. 1. 1. 1" type="box" size="0.5 5.5 1" density='0.00001' contype="1" conaffinity="1"/>
72 |     <geom name="wall-2" pos="6 -7.5 0.5" rgba="1. 1. 1. 1" type="box" size="3 0.5 1" density='0.00001' contype="1" conaffinity="1"/>
73 |     <geom name="wall-3" pos="3.5 -5 0.5" rgba="1. 1. 1. 1" type="box" size="0.5 3 1" density='0.00001' contype="1" conaffinity="1"/>
74 |     <geom name="wall-4" pos="1 -2.5 0.5" rgba="1. 1. 1. 1" type="box" size="3 0.5 1" density='0.00001' contype="1" conaffinity="1"/>
75 |     <geom name="wall-5" pos="-1.5 0 0.5" rgba="1. 1. 1. 1" type="box" size="0.5 3 1" density='0.00001' contype="1" conaffinity="1"/>
76 |     <geom name="target" pos="6 -6 0" rgba="0 1 0 0.2" type="sphere" size="2"/>
77 | 
78 |   </worldbody>
79 |   <actuator>
80 |     <motor joint="hip_4" ctrlrange="-150.0 150.0" ctrllimited="true" />
81 |     <motor joint="ankle_4" ctrlrange="-150.0 150.0" ctrllimited="true" />
82 |     <motor joint="hip_1" ctrlrange="-150.0 150.0" ctrllimited="true" />
83 |     <motor joint="ankle_1" ctrlrange="-150.0 150.0" ctrllimited="true" />
84 |     <motor joint="hip_2" ctrlrange="-150.0 150.0" ctrllimited="true" />
85 |     <motor joint="ankle_2" ctrlrange="-150.0 150.0" ctrllimited="true" />
86 |     <motor joint="hip_3" ctrlrange="-150.0 150.0" ctrllimited="true" />
87 |     <motor joint="ankle_3" ctrlrange="-150.0 150.0" ctrllimited="true" />
88 |   </actuator>
89 | </mujoco>
90 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.11.0
  2 | aiohttp==3.7.4
  3 | aiohttp-cors==0.7.0
  4 | aioredis==1.3.1
  5 | appnope==0.1.2
  6 | astunparse==1.6.3
  7 | async-timeout==3.0.1
  8 | attrs==20.3.0
  9 | backcall==0.2.0
 10 | blessings==1.7
 11 | brotlipy==0.7.0
 12 | cachetools==4.2.0
 13 | certifi==2020.12.5
 14 | cffi==1.14.4
 15 | chardet==3.0.4
 16 | click==7.1.2
 17 | cloudpickle==1.6.0
 18 | colorama==0.4.4
 19 | colorful==0.5.4
 20 | conda==4.9.2
 21 | conda-package-handling==1.7.2
 22 | cryptography==3.3.2
 23 | cycler==0.10.0
 24 | Cython==0.29.21
 25 | decorator==4.4.2
 26 | dm-control==0.0.322773188
 27 | dm-env==1.3
 28 | dm-tree==0.1.5
 29 | fasteners==0.16
 30 | filelock==3.0.12
 31 | flatbuffers==1.12
 32 | future==0.18.2
 33 | gast==0.3.3
 34 | gitdb==4.0.5
 35 | GitPython==3.1.12
 36 | glfw==2.0.0
 37 | google-api-core==1.25.0
 38 | google-auth==1.24.0
 39 | google-auth-oauthlib==0.4.2
 40 | google-pasta==0.2.0
 41 | googleapis-common-protos==1.52.0
 42 | gpustat==0.6.0
 43 | grpcio==1.32.0
 44 | gtimer==1.0.0b5
 45 | gym==0.18.0
 46 | h5py==2.10.0
 47 | hiredis==1.1.0
 48 | idna==2.10
 49 | imageio==2.9.0
 50 | iniconfig==1.1.1
 51 | ipdb==0.13.4
 52 | ipython==7.19.0
 53 | ipython-genutils==0.2.0
 54 | jedi==0.18.0
 55 | joblib==1.0.0
 56 | jsonschema==3.2.0
 57 | Keras-Preprocessing==1.1.2
 58 | kiwisolver==1.3.1
 59 | labmaze==1.0.3
 60 | lxml==4.6.2
 61 | Markdown==3.3.3
 62 | matplotlib==3.3.3
 63 | msgpack==1.0.2
 64 | mujoco-py==2.0.2.13
 65 | multidict==5.1.0
 66 | networkx==2.5
 67 | numpy==1.19.5
 68 | nvidia-ml-py3==7.352.0
 69 | oauthlib==3.1.0
 70 | opencensus==0.7.12
 71 | opencensus-context==0.1.2
 72 | opt-einsum==3.3.0
 73 | packaging==20.8
 74 | pandas==1.2.0
 75 | parso==0.8.1
 76 | pexpect==4.8.0
 77 | pickleshare==0.7.5
 78 | Pillow==7.2.0
 79 | pip==20.3.3
 80 | pluggy==0.13.1
 81 | prometheus-client==0.9.0
 82 | prompt-toolkit==3.0.10
 83 | protobuf==3.14.0
 84 | psutil==5.8.0
 85 | ptyprocess==0.7.0
 86 | py==1.10.0
 87 | py-spy==0.3.4
 88 | pyasn1==0.4.8
 89 | pyasn1-modules==0.2.8
 90 | pycosat==0.6.3
 91 | pycparser==2.20
 92 | pyglet==1.5.0
 93 | Pygments==2.7.4
 94 | PyOpenGL==3.1.5
 95 | PyOpenGL-accelerate==3.1.5
 96 | pyOpenSSL==20.0.1
 97 | pyparsing==2.4.7
 98 | pyrsistent==0.17.3
 99 | PySocks==1.7.1
100 | pytest==6.2.1
101 | python-dateutil==2.8.1
102 | pytz==2020.5
103 | PyWavelets==1.1.1
104 | PyYAML==5.4
105 | ray[tune]==1.2.0
106 | redis==3.5.3
107 | requests==2.25.1
108 | requests-oauthlib==1.3.0
109 | rsa==4.7
110 | ruamel-yaml==0.15.87
111 | scikit-image==0.18.1
112 | scikit-learn==0.24.1
113 | scikit-video==1.1.11
114 | scipy==1.6.0
115 | setproctitle==1.2.1
116 | six==1.15.0
117 | smmap==3.0.4
118 | tabulate==0.8.7
119 | tensorboard==2.4.1
120 | tensorboard-plugin-wit==1.8.0
121 | tensorboardX==2.1
122 | tensorflow==2.4.1
123 | tensorflow-addons==0.12.1
124 | tensorflow-estimator==2.4.0
125 | tensorflow-probability==0.12.1
126 | termcolor==1.1.0
127 | threadpoolctl==2.1.0
128 | tifffile==2021.1.14
129 | toml==0.10.2
130 | tqdm==4.55.1
131 | traitlets==5.0.5
132 | typeguard==2.10.0
133 | typing-extensions==3.7.4.3
134 | urllib3==1.26.3
135 | wcwidth==0.2.5
136 | Werkzeug==1.0.1
137 | wheel==0.36.2
138 | wrapt==1.12.1
139 | yarl==1.6.3
140 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/scripts/__init__.py


--------------------------------------------------------------------------------
/scripts/archive_gs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import argparse
 4 | import os
 5 | import subprocess
 6 | 
 7 | 
 8 | def parse_args():
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument('archive_path', type=str, default=None, nargs='?')
11 |     parser.add_argument('--unarchive', action='store_true', default=False)
12 |     parser.add_argument('--dry', action='store_true', default=False)
13 |     args = parser.parse_args()
14 | 
15 |     return args
16 | 
17 | 
18 | def archive_gs(args):
19 |     """Archive files in google cloud storage bucket.
20 | 
21 |     Moves files from `<bucket>/ray/results` to `<bucket>/archive/ray/results`.
22 | 
23 |     TODO(hartikainen): Refactor this to use project config instead of
24 |         environment variables (e.g. `SAC_GS_BUCKET`).
25 |     """
26 |     if 'SAC_GS_BUCKET' not in os.environ:
27 |         raise ValueError(
28 |             "'SAC_GS_BUCKET' environment variable needs to be set.")
29 | 
30 |     bucket = os.environ['SAC_GS_BUCKET']
31 |     fresh_results_path = os.path.join(bucket, 'ray', 'results')
32 |     archive_results_path = os.path.join(bucket, 'archive', 'ray', 'results')
33 | 
34 |     fresh_url = os.path.join(fresh_results_path, args.archive_path)
35 |     archive_url = os.path.join(archive_results_path, args.archive_path)
36 | 
37 |     src_url, dst_url = (
38 |         (archive_url, fresh_url)
39 |         if args.unarchive
40 |         else (fresh_url, archive_url))
41 | 
42 |     command_parts = ['gsutil', '-m', 'mv', src_url, dst_url]
43 |     command = " ".join(command_parts)
44 | 
45 |     if args.dry:
46 |         print(command)
47 |         return
48 | 
49 |     subprocess.call(command, shell=True)
50 | 
51 | 
52 | def main():
53 |     args = parse_args()
54 |     archive_gs(args)
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     main()
59 | 


--------------------------------------------------------------------------------
/scripts/deploy-aws.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | declare -r SCRIPT_DIRECTORY="$(dirname $(realpath ${BASH_SOURCE[0]}))"
 4 | declare -r PROJECT_ROOT="$(dirname ${SCRIPT_DIRECTORY})"
 5 | 
 6 | cd "${PROJECT_ROOT}" \
 7 |     && . ./.env \
 8 |     && . ./config/locals
 9 | 
10 | if [ -z "${AWS_ECR_REGISTRY_URL}" ]; then
11 |     echo "AWS_ECR_REGISTRY_URL variable in 'config/locals' is empty or unset." \
12 |          " Fill in the values in 'config/locals' and rerun this file."
13 |     exit 1
14 | fi
15 | 
16 | declare -r IMAGE_NAME="softlearning"
17 | declare -r IMAGE_TAG="${SOFTLEARNING_DEV_TAG}"
18 | declare -r TARGET_REGISTRY="${AWS_ECR_REGISTRY_URL}"
19 | 
20 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
21 | 
22 | build_docker_image() {
23 | 
24 |     echo "Building Docker image."
25 | 
26 |     docker-compose \
27 |         -f ./docker/docker-compose.dev.cpu.yml \
28 |         build \
29 |         --build-arg MJKEY="$(cat ~/.mujoco/mjkey.txt)"
30 | 
31 |     echo "Build successful."
32 | 
33 | }
34 | 
35 | push_image_to_aws_ecr() {
36 | 
37 |     SOURCE_IMAGE="${IMAGE_NAME}:${IMAGE_TAG}"
38 |     TARGET_IMAGE="${TARGET_REGISTRY}/${SOURCE_IMAGE}"
39 | 
40 |     echo "${SOURCE_IMAGE}"
41 |     echo "${TARGET_IMAGE}"
42 | 
43 |     $(aws ecr get-login --no-include-email)
44 | 
45 |     docker tag "${SOURCE_IMAGE}" "${TARGET_IMAGE}"
46 |     docker push "${TARGET_IMAGE}"
47 | 
48 | }
49 | 
50 | main() {
51 | 
52 |     build_docker_image
53 |     push_image_to_aws_ecr
54 | 
55 | }
56 | 
57 | main
58 | 


--------------------------------------------------------------------------------
/scripts/install_mujoco.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | from distutils.version import LooseVersion
  5 | import os
  6 | import subprocess
  7 | import sys
  8 | 
  9 | 
 10 | KNOWN_PLATFORMS = ('linux', 'darwin')
 11 | DEFAULT_MUJOCO_PATH = '~/.mujoco'
 12 | 
 13 | 
 14 | def get_parser():
 15 |     parser = argparse.ArgumentParser()
 16 |     parser.add_argument('--mujoco-path', type=str, default=DEFAULT_MUJOCO_PATH)
 17 |     parser.add_argument('--versions',
 18 |                         type=str,
 19 |                         nargs='+',
 20 |                         default=('2.00', ))
 21 |     return parser
 22 | 
 23 | 
 24 | def get_mujoco_zip_name(platform, version):
 25 |     past_150 = LooseVersion(version) > LooseVersion("1.50")
 26 |     basename = "mujoco" if past_150 else "mjpro"
 27 | 
 28 |     if platform == 'darwin':
 29 |         platform_id = 'macos' if past_150 else 'osx'
 30 |     elif platform == 'linux':
 31 |         platform_id = 'linux'
 32 |     else:
 33 |         raise ValueError(platform)
 34 | 
 35 |     # For example: "mujoco200_linux.zip"
 36 |     zip_name = f"{basename}{version.replace('.', '')}_{platform_id}.zip"
 37 |     return zip_name
 38 | 
 39 | 
 40 | def install_mujoco(platform, version, mujoco_path):
 41 |     print(f"Installing MuJoCo version {version} to {mujoco_path}")
 42 | 
 43 |     mujoco_zip_name = get_mujoco_zip_name(platform, version)
 44 |     mujoco_dir_name = os.path.splitext(mujoco_zip_name)[0]
 45 |     if os.path.exists(os.path.join(mujoco_path, mujoco_dir_name)):
 46 |         print(f"MuJoCo {platform}, {version} already installed.")
 47 |         return
 48 | 
 49 |     mujoco_zip_url = f"https://www.roboti.us/download/{mujoco_zip_name}"
 50 | 
 51 |     if subprocess.call(["command", "-v", "wget"], shell=True) == 0:
 52 |         subprocess.check_call([
 53 |             "wget",
 54 |             "--progress=bar:force",
 55 |             "--show-progress",
 56 |             "--timestamping",
 57 |             "--directory-prefix",
 58 |             mujoco_path,
 59 |             mujoco_zip_url])
 60 |     elif subprocess.call(["command", "-v", "curl"], shell=True) == 0:
 61 |         subprocess.check_call([
 62 |             "curl",
 63 |             "--location",
 64 |             "--show-error",
 65 |             "--output",
 66 |             os.path.join(mujoco_path, mujoco_zip_name),
 67 |             mujoco_zip_url])
 68 |     else:
 69 |         raise ValueError("Need either `wget` or `curl` to download mujoco.")
 70 | 
 71 |     subprocess.call([
 72 |         "unzip",
 73 |         "-n",
 74 |         os.path.join(mujoco_path, mujoco_zip_name),
 75 |         "-d",
 76 |         mujoco_path])
 77 |     subprocess.call(["rm", os.path.join(mujoco_path, mujoco_zip_name)])
 78 | 
 79 |     if LooseVersion(version) == LooseVersion('2.0'):
 80 |         subprocess.call([
 81 |             "ln",
 82 |             "-s",
 83 |             os.path.join(mujoco_path, mujoco_dir_name),
 84 |             os.path.join(mujoco_path, "mujoco200"),
 85 |         ])
 86 | 
 87 | 
 88 | def main():
 89 |     parser = get_parser()
 90 |     args = parser.parse_args()
 91 |     mujoco_path = os.path.expanduser(args.mujoco_path)
 92 | 
 93 |     if not os.path.exists(mujoco_path):
 94 |         os.makedirs(mujoco_path)
 95 | 
 96 |     platform = sys.platform
 97 |     assert platform in KNOWN_PLATFORMS, (platform, KNOWN_PLATFORMS)
 98 | 
 99 |     for version in args.versions:
100 |         install_mujoco(platform, version, mujoco_path)
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     main()
105 | 


--------------------------------------------------------------------------------
/scripts/sync_gs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import argparse
 4 | import os
 5 | import shlex
 6 | import subprocess
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser()
11 | 
12 |     parser.add_argument(
13 |         'sync_path', type=str, default=None, nargs='?')
14 |     parser.add_argument(
15 |         '--sync-checkpoints', action='store_true', default=False)
16 |     parser.add_argument(
17 |         '--dry', action='store_true', default=False)
18 |     args = parser.parse_args()
19 | 
20 |     return args
21 | 
22 | 
23 | def sync_gs(args):
24 |     """Sync files from google cloud storage bucket to local machine.
25 | 
26 |     TODO(hartikainen): Refactor this to use project config instead of
27 |         environment variables (e.g. `SAC_GS_BUCKET`).
28 |     """
29 |     if 'SAC_GS_BUCKET' not in os.environ:
30 |         raise ValueError(
31 |             "'SAC_GS_BUCKET' environment variable needs to be set.")
32 | 
33 |     bucket = os.environ['SAC_GS_BUCKET']
34 | 
35 |     remote_gs_parts = [bucket, 'ray', 'results']
36 |     local_gs_parts = [os.path.expanduser('~/ray_results/gs/')]
37 | 
38 |     if args.sync_path is not None:
39 |         remote_gs_parts.append(args.sync_path)
40 |         local_gs_parts.append(args.sync_path)
41 | 
42 |     remote_gs_path = os.path.join(*remote_gs_parts)
43 |     local_gs_path = os.path.join(*local_gs_parts)
44 | 
45 |     if not os.path.exists(local_gs_path):
46 |         os.makedirs(local_gs_path)
47 | 
48 |     command_parts = ['gsutil', '-m', 'rsync', '-r']
49 | 
50 |     if not args.sync_checkpoints:
51 |         command_parts += ['-x', '".*./checkpoint_.*./.*"']
52 | 
53 |     if args.dry:
54 |         command_parts += ["-n"]
55 | 
56 |     command_parts += [shlex.quote(remote_gs_path), shlex.quote(local_gs_path)]
57 | 
58 |     command = " ".join(command_parts)
59 | 
60 |     subprocess.call(command, shell=True)
61 | 
62 | 
63 | def main():
64 |     args = parse_args()
65 |     sync_gs(args)
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     main()
70 | 


--------------------------------------------------------------------------------
/scripts/test-cloud-build.sh:
--------------------------------------------------------------------------------
1 | cloud-build-local \
2 |     --config=./docker/cloudbuild.yaml \
3 |     --dryrun=false \
4 |     --push \
5 |     --write-workspace=/tmp/workspace \
6 |     --substitutions=REPO_NAME="softlearning",BRANCH_NAME="$(git rev-parse --abbrev-ref HEAD)",COMMIT_SHA="$(git rev-parse HEAD)",SHORT_SHA="$(git rev-parse --short HEAD)" \
7 |     .
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from setuptools import find_packages
 3 | 
 4 | 
 5 | NAME = 'softlearning'
 6 | VERSION = '0.0.1'
 7 | DESCRIPTION = (
 8 |     "Softlearning is a deep reinforcement learning toolbox for training"
 9 |     " maximum entropy policies in continuous domains.")
10 | 
11 | 
12 | setup(
13 |     name=NAME,
14 |     packages=find_packages(
15 |         exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
16 |     version=VERSION,
17 |     description=DESCRIPTION,
18 |     long_description=open('./README.md').read(),
19 |     author='Kristian Hartikainen',
20 |     author_email='kristian.hartikainen@gmail.com',
21 |     url='https://github.com/rail-berkeley/softlearning',
22 |     keywords=(
23 |         'softlearning',
24 |         'soft-actor-critic',
25 |         'sac',
26 |         'soft-q-learning',
27 |         'sql',
28 |         'machine-learning',
29 |         'reinforcement-learning',
30 |         'deep-learning',
31 |         'robotics',
32 |         'tensorflow',
33 |         'tensorflow-2',
34 |     ),
35 |     entry_points={
36 |         'console_scripts': (
37 |             'softlearning=softlearning.scripts.console_scripts:main',
38 |         ),
39 |     },
40 |     install_requires=(
41 |         'Click>=7.0',
42 |         'GitPython==3.1.2',
43 |         'dm-control>=0.0.322773188',
44 |         'gtimer>=1.0.0b5',
45 |         'gym>=0.17.2',
46 |         'mujoco-py>=2.0.2.10',
47 |         'numpy>=1.17.5',
48 |         'pandas',
49 |         'ray[tune]>=1.0.0',
50 |         'scikit-image>=0.17.2',
51 |         'scikit-video>=1.1.11',
52 |         'scipy>=1.4.1',
53 |         'tensorflow>=2.2.0',
54 |         'tensorflow-probability>=0.10.0',
55 |     ),
56 |     zip_safe=True,
57 |     license='MIT'
58 | )
59 | 


--------------------------------------------------------------------------------
/softlearning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/__init__.py


--------------------------------------------------------------------------------
/softlearning/algorithms/__init__.py:
--------------------------------------------------------------------------------
 1 | from softlearning.utils.serialization import (
 2 |     serialize_softlearning_object, deserialize_softlearning_object)
 3 | 
 4 | from .sql import SQL  # noqa: unused-import
 5 | from .sac import SAC  # noqa: unused-import
 6 | 
 7 | 
 8 | def serialize(algorithm):
 9 |     return serialize_softlearning_object(algorithm)
10 | 
11 | 
12 | def deserialize(name, custom_objects=None):
13 |     """Returns a algorithm function or class denoted by input string.
14 | 
15 |     Arguments:
16 |         name : String
17 | 
18 |     Returns:
19 |         Algorithm function or class denoted by input string.
20 | 
21 |     For example:
22 |     >>> softlearning.algorithms.get({'class_name': 'SAC', ...})
23 |       <softlearning.algorithms.sac.SAC object at 0x7fea93d6cdd0>
24 |     >>> softlearning.algorithms.get('abcd')
25 |       Traceback (most recent call last):
26 |       ...
27 |       ValueError: Unknown algorithm: abcd
28 | 
29 |     Args:
30 |       name: The name of the algorithm.
31 | 
32 |     Raises:
33 |         ValueError: `Unknown algorithm` if the input string does not
34 |         denote any defined algorithm.
35 |     """
36 |     return deserialize_softlearning_object(
37 |         name,
38 |         module_objects=globals(),
39 |         custom_objects=custom_objects,
40 |         printable_module_name='algorithm')
41 | 
42 | 
43 | def get(identifier):
44 |     """Returns a algorithm.
45 | 
46 |     Arguments:
47 |         identifier: function, string, or dict.
48 | 
49 |     Returns:
50 |         A algorithm denoted by identifier.
51 | 
52 |     For example:
53 |     >>> softlearning.algorithms.get({'class_name': 'SAC', ...})
54 |       <softlearning.algorithms.sac.SAC object at 0x7fea93d6cdd0>
55 |     >>> softlearning.algorithms.get('abcd')
56 |       Traceback (most recent call last):
57 |       ...
58 |       ValueError: Unknown algorithm: abcd
59 | 
60 |     Raises:
61 |         ValueError: Input is an unknown function or string, i.e., the
62 |         identifier does not denote any defined algorithm.
63 |     """
64 |     if identifier is None:
65 |         return None
66 |     if isinstance(identifier, str):
67 |         return deserialize(identifier)
68 |     elif isinstance(identifier, dict):
69 |         return deserialize(identifier)
70 |     elif callable(identifier):
71 |         return identifier
72 |     else:
73 |         raise TypeError(
74 |             f"Could not interpret algorithm function identifier:"
75 |             " {repr(identifier)}.")
76 | 


--------------------------------------------------------------------------------
/softlearning/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/distributions/__init__.py


--------------------------------------------------------------------------------
/softlearning/distributions/bijectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .conditional_scale import ConditionalScale
2 | from .conditional_shift import ConditionalShift
3 | 
4 | 
5 | __all__ = (
6 |     "ConditionalScale",
7 |     "ConditionalShift",
8 | )
9 | 


--------------------------------------------------------------------------------
/softlearning/distributions/bijectors/conditional_scale.py:
--------------------------------------------------------------------------------
 1 | """Scale bijector."""
 2 | 
 3 | import tensorflow.compat.v2 as tf
 4 | 
 5 | from tensorflow_probability.python.bijectors import bijector
 6 | from tensorflow_probability.python.internal import assert_util
 7 | 
 8 | 
 9 | __all__ = [
10 |     'ConditionalScale',
11 | ]
12 | 
13 | 
14 | class ConditionalScale(bijector.Bijector):
15 |     def __init__(self,
16 |                  dtype=tf.float32,
17 |                  validate_args=False,
18 |                  name='conditional_scale'):
19 |         """Instantiates the `ConditionalScale` bijector.
20 | 
21 |         This `Bijector`'s forward operation is:
22 | 
23 |         ```none
24 |         Y = g(X) = scale * X
25 |         ```
26 | 
27 |         Args:
28 |           validate_args: Python `bool` indicating whether arguments should be
29 |             checked for correctness.
30 |           name: Python `str` name given to ops managed by this object.
31 |         """
32 |         parameters = dict(locals())
33 |         with tf.name_scope(name) as name:
34 |             super(ConditionalScale, self).__init__(
35 |                 forward_min_event_ndims=0,
36 |                 is_constant_jacobian=True,
37 |                 validate_args=validate_args,
38 |                 dtype=dtype,
39 |                 parameters=parameters,
40 |                 name=name)
41 | 
42 |     def _maybe_assert_valid_scale(self, scale):
43 |         if not self.validate_args:
44 |             return ()
45 |         is_non_zero = assert_util.assert_none_equal(
46 |             scale,
47 |             tf.zeros((), dtype=scale.dtype),
48 |             message='Argument `scale` must be non-zero.')
49 |         return (is_non_zero, )
50 | 
51 |     def _forward(self, x, scale):
52 |         with tf.control_dependencies(self._maybe_assert_valid_scale(scale)):
53 |             return x * scale
54 | 
55 |     def _inverse(self, y, scale):
56 |         with tf.control_dependencies(self._maybe_assert_valid_scale(scale)):
57 |             return y / scale
58 | 
59 |     def _forward_log_det_jacobian(self, x, scale):
60 |         with tf.control_dependencies(self._maybe_assert_valid_scale(scale)):
61 |             return tf.math.log(tf.abs(scale))
62 | 


--------------------------------------------------------------------------------
/softlearning/distributions/bijectors/conditional_scale_test.py:
--------------------------------------------------------------------------------
 1 | """ConditionalScale Tests."""
 2 | 
 3 | # Dependency imports
 4 | from absl.testing import parameterized
 5 | import numpy as np
 6 | import tensorflow.compat.v2 as tf
 7 | 
 8 | from softlearning.distributions import bijectors
 9 | from softlearning.internal import test_util
10 | 
11 | 
12 | @test_util.test_all_tf_execution_regimes
13 | class ScaleBijectorTest(test_util.TestCase, parameterized.TestCase):
14 |     """Tests correctness of the Y = scale @ x transformation."""
15 | 
16 |     def testName(self):
17 |         bijector = bijectors.ConditionalScale()
18 |         self.assertStartsWith(bijector.name, 'conditional_scale')
19 | 
20 |     @parameterized.named_parameters(
21 |         dict(testcase_name='static_float32', is_static=True, dtype=np.float32),
22 |         dict(testcase_name='static_float64', is_static=True, dtype=np.float64),
23 |         dict(testcase_name='dynamic_float32', is_static=False, dtype=np.float32),
24 |         dict(testcase_name='dynamic_float64', is_static=False, dtype=np.float64),
25 |     )
26 |     def testNoBatchScale(self, is_static, dtype):
27 |         scale = dtype(2.0)
28 |         bijector = bijectors.ConditionalScale(dtype=dtype)
29 |         x = self.maybe_static(np.array([1., 2, 3], dtype), is_static)
30 |         self.assertAllClose([2., 4, 6], bijector.forward(x, scale=scale))
31 |         self.assertAllClose([.5, 1, 1.5], bijector.inverse(x, scale=scale))
32 |         self.assertAllClose(
33 |             -np.log(2.),
34 |             bijector.inverse_log_det_jacobian(x, scale=scale, event_ndims=0))
35 | 
36 |     @parameterized.named_parameters(
37 |         dict(testcase_name='static_float32', is_static=True, dtype=np.float32),
38 |         dict(testcase_name='static_float64', is_static=True, dtype=np.float64),
39 |         dict(testcase_name='dynamic_float32', is_static=False, dtype=np.float32),
40 |         dict(testcase_name='dynamic_float64', is_static=False, dtype=np.float64),
41 |     )
42 |     def testBatchScale(self, is_static, dtype):
43 |         # Batched scale
44 |         scale = tf.constant([2., 3.], dtype=dtype)
45 |         bijector = bijectors.ConditionalScale(dtype=dtype)
46 |         x = self.maybe_static(np.array([1.], dtype=dtype), is_static)
47 |         self.assertAllClose([2., 3.], bijector.forward(x, scale=scale))
48 |         self.assertAllClose([0.5, 1./3.], bijector.inverse(x, scale=scale))
49 |         self.assertAllClose(
50 |             [-np.log(2.), -np.log(3.)],
51 |             bijector.inverse_log_det_jacobian(x, scale=scale, event_ndims=0))
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     tf.test.main()
56 | 


--------------------------------------------------------------------------------
/softlearning/distributions/bijectors/conditional_shift.py:
--------------------------------------------------------------------------------
 1 | """Shift bijector."""
 2 | 
 3 | import tensorflow.compat.v2 as tf
 4 | from tensorflow_probability.python.internal import dtype_util
 5 | from tensorflow_probability.python import bijectors as tfb
 6 | 
 7 | 
 8 | __all__ = [
 9 |     'ConditionalShift',
10 | ]
11 | 
12 | 
13 | class ConditionalShift(tfb.Bijector):
14 |     """Compute `Y = g(X; shift) = X + shift`.
15 | 
16 |     where `shift` is a numeric `Tensor`.
17 | 
18 |     Example Use:
19 | 
20 |     ```python
21 |     shift = Shift([-1., 0., 1])
22 |     x = [1., 2, 3]
23 |     # `forward` is equivalent to:
24 |     # y = x + shift
25 |     y = shift.forward(x)  # [0., 2., 4.]
26 |     ```
27 | 
28 |     """
29 |     def __init__(self,
30 |                  dtype=tf.float32,
31 |                  validate_args=False,
32 |                  name='conditional_shift'):
33 |         """Instantiates the `ConditionalShift` bijector.
34 | 
35 |         Args:
36 |           validate_args: Python `bool` indicating whether arguments should be
37 |             checked for correctness.
38 |           name: Python `str` name given to ops managed by this object.
39 |         """
40 |         parameters = dict(locals())
41 |         with tf.name_scope(name) as name:
42 |             super(ConditionalShift, self).__init__(
43 |                 forward_min_event_ndims=0,
44 |                 is_constant_jacobian=True,
45 |                 dtype=dtype,
46 |                 validate_args=validate_args,
47 |                 parameters=parameters,
48 |                 name=name)
49 | 
50 |     @classmethod
51 |     def _is_increasing(cls):
52 |         return True
53 | 
54 |     def _forward(self, x, shift):
55 |         return x + shift
56 | 
57 |     def _inverse(self, y, shift):
58 |         return y - shift
59 | 
60 |     def _forward_log_det_jacobian(self, x, shift):
61 |         # is_constant_jacobian = True for this bijector, hence the
62 |         # `log_det_jacobian` need only be specified for a single input, as this will
63 |         # be tiled to match `event_ndims`.
64 |         return tf.zeros((), dtype=dtype_util.base_dtype(x.dtype))
65 | 


--------------------------------------------------------------------------------
/softlearning/distributions/bijectors/conditional_shift_test.py:
--------------------------------------------------------------------------------
 1 | """ConditionalShift Tests."""
 2 | 
 3 | # Dependency imports
 4 | 
 5 | from absl.testing import parameterized
 6 | import tensorflow.compat.v2 as tf
 7 | 
 8 | from softlearning.distributions import bijectors
 9 | from softlearning.internal import test_util
10 | 
11 | 
12 | @test_util.test_all_tf_execution_regimes
13 | class ShiftTest(test_util.TestCase, parameterized.TestCase):
14 | 
15 |     @parameterized.named_parameters(
16 |         dict(testcase_name='static', is_static=True),
17 |         dict(testcase_name='dynamic', is_static=False),
18 |     )
19 |     def testNoBatch(self, is_static):
20 |         shift = bijectors.ConditionalShift()
21 |         x = self.maybe_static([1., 1.], is_static)
22 |         self.assertAllClose([2., 0.], shift.forward(x, shift=[1., -1.]))
23 |         self.assertAllClose([0., 2.], shift.inverse(x, shift=[1., -1.]))
24 |         self.assertAllClose(
25 |             0., shift.inverse_log_det_jacobian(x, shift=[[2., -.5], [1., -3.]], event_ndims=1))
26 | 
27 |     @parameterized.named_parameters(
28 |         dict(testcase_name='static', is_static=True),
29 |         dict(testcase_name='dynamic', is_static=False),
30 |     )
31 |     def testBatch(self, is_static):
32 |         shift = bijectors.ConditionalShift()
33 |         x = self.maybe_static([1., 1.], is_static)
34 | 
35 |         self.assertAllClose([[3., .5], [2., -2.]], shift.forward(
36 |             x, shift=[[2., -.5], [1., -3.]]))
37 |         self.assertAllClose([[-1., 1.5], [0., 4.]], shift.inverse(
38 |             x, shift=[[2., -.5], [1., -3.]]))
39 |         self.assertAllClose(0., shift.inverse_log_det_jacobian(
40 |             x, shift=[[2., -.5], [1., -3.]], event_ndims=1))
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     tf.test.main()
45 | 


--------------------------------------------------------------------------------
/softlearning/distributions/bijectors/real_nvp_flow_test.py:
--------------------------------------------------------------------------------
  1 | """Tests for RealNVPFlow."""
  2 | 
  3 | import pytest
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | from tensorflow_probability import bijectors
  7 | from tensorflow.python.framework import test_util  # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top
  8 | 
  9 | from softlearning.distributions.bijectors.real_nvp_flow import RealNVPFlow
 10 | 
 11 | 
 12 | @pytest.mark.skip(reason="tf2 broke these tests.")
 13 | class RealNVPFlowTest(tf.test.TestCase):
 14 |     def test_build(self):
 15 |         x_ = np.reshape(np.linspace(-1.0, 1.0, 8, dtype=np.float32), (-1, 4))
 16 | 
 17 |         num_coupling_layers = 10
 18 |         hidden_layer_sizes = (64, 64)
 19 | 
 20 |         flow = RealNVPFlow(
 21 |             num_coupling_layers=num_coupling_layers,
 22 |             hidden_layer_sizes=hidden_layer_sizes)
 23 | 
 24 |         self.assertFalse(flow._built)
 25 |         flow.forward(x_)
 26 |         self.assertTrue(flow._built)
 27 | 
 28 |         real_nvp_layers = [
 29 |             layer for layer in flow.flow.bijectors
 30 |             if isinstance(layer, bijectors.RealNVP)
 31 |         ]
 32 |         self.assertEqual(len(real_nvp_layers), num_coupling_layers)
 33 | 
 34 |         permute_layers = [
 35 |             layer for layer in flow.flow.bijectors
 36 |             if isinstance(layer, bijectors.Permute)
 37 |         ]
 38 |         self.assertEqual(len(permute_layers), num_coupling_layers-1)
 39 | 
 40 |         batch_normalization_layers = [
 41 |             layer for layer in flow.flow.bijectors
 42 |             if isinstance(layer, bijectors.BatchNormalization)
 43 |         ]
 44 |         self.assertEqual(len(batch_normalization_layers), 0)
 45 | 
 46 |         self.assertEqual(
 47 |             len(flow.flow.bijectors),
 48 |             len(real_nvp_layers) + len(permute_layers))
 49 | 
 50 |     def test_forward_inverse_returns_identity(self):
 51 |         x_ = np.reshape(np.linspace(-1.0, 1.0, 8, dtype=np.float32), (-1, 4))
 52 | 
 53 |         flow = RealNVPFlow(
 54 |             num_coupling_layers=2,
 55 |             hidden_layer_sizes=(64,))
 56 | 
 57 |         x = tf.constant(x_)
 58 |         forward_x = flow.forward(x)
 59 |         # Use identity to invalidate cache.
 60 |         inverse_y = flow.inverse(tf.identity(forward_x))
 61 |         forward_inverse_y = flow.forward(inverse_y)
 62 |         fldj = flow.forward_log_det_jacobian(x, event_ndims=1)
 63 |         # Use identity to invalidate cache.
 64 |         ildj = flow.inverse_log_det_jacobian(tf.identity(forward_x), event_ndims=1)
 65 | 
 66 |         forward_x_ = forward_x.numpy()
 67 |         inverse_y_ = inverse_y.numpy()
 68 |         forward_inverse_y_ = forward_inverse_y.numpy()
 69 |         ildj_ = ildj.numpy()
 70 |         fldj_ = fldj.numpy()
 71 | 
 72 |         self.assertEqual("real_nvp_flow", flow.name)
 73 |         self.assertAllClose(forward_x_, forward_inverse_y_, rtol=1e-4, atol=0.)
 74 |         self.assertAllClose(x_, inverse_y_, rtol=1e-4, atol=0.0)
 75 |         self.assertAllClose(ildj_, -fldj_, rtol=1e-6, atol=0.0)
 76 | 
 77 |     def test_should_reuse_scale_and_log_scale_variables(self):
 78 |         x_ = np.reshape(np.linspace(-1.0, 1.0, 8, dtype=np.float32), (-1, 4))
 79 | 
 80 |         flow = RealNVPFlow(
 81 |             num_coupling_layers=2,
 82 |             hidden_layer_sizes=(64,))
 83 | 
 84 |         x = tf.constant(x_)
 85 | 
 86 |         assert not tf.compat.v1.trainable_variables()
 87 | 
 88 |         forward_x = flow.forward(x)
 89 | 
 90 |         self.assertEqual(
 91 |             len(tf.compat.v1.trainable_variables()), 4 * flow._num_coupling_layers)
 92 | 
 93 |         inverse_y = flow.inverse(tf.identity(forward_x))
 94 |         forward_inverse_y = flow.forward(inverse_y)
 95 |         fldj = flow.forward_log_det_jacobian(x, event_ndims=1)
 96 |         ildj = flow.inverse_log_det_jacobian(
 97 |             tf.identity(forward_x), event_ndims=1)
 98 | 
 99 |         self.assertEqual(
100 |             len(tf.compat.v1.trainable_variables()), 4 * flow._num_coupling_layers)
101 | 
102 |     def test_batched_flow_with_mlp_transform(self):
103 |         x_ = np.random.normal(0., 1., (3, 8)).astype(np.float32)
104 |         flow = RealNVPFlow(
105 |             num_coupling_layers=2,
106 |             hidden_layer_sizes=(64,),
107 |             use_batch_normalization=False)
108 |         x = tf.constant(x_)
109 |         forward_x = flow.forward(x)
110 |         # Use identity to invalidate cache.
111 |         inverse_y = flow.inverse(forward_x)
112 |         forward_inverse_y = flow.forward(inverse_y)
113 |         fldj = flow.forward_log_det_jacobian(x, event_ndims=1)
114 |         # Use identity to invalidate cache.
115 |         ildj = flow.inverse_log_det_jacobian(forward_x, event_ndims=1)
116 | 
117 |         [
118 |             forward_x_,
119 |             inverse_y_,
120 |             forward_inverse_y_,
121 |             ildj_,
122 |             fldj_,
123 |         ] = [
124 |             forward_x.numpy(),
125 |             inverse_y.numpy(),
126 |             forward_inverse_y.numpy(),
127 |             ildj.numpy(),
128 |             fldj.numpy(),
129 |         ]
130 | 
131 |         self.assertEqual("real_nvp_flow", flow.name)
132 |         self.assertAllClose(forward_x_, forward_inverse_y_, rtol=1e-4, atol=0.)
133 |         self.assertAllClose(x_, inverse_y_, rtol=1e-4, atol=0.)
134 |         self.assertAllClose(ildj_, -fldj_, rtol=1e-6, atol=1e-8)
135 | 
136 |     def test_with_batch_normalization(self):
137 |         x_ = np.reshape(np.linspace(-1.0, 1.0, 8, dtype=np.float32), (-1, 4))
138 | 
139 |         with self.assertRaises(NotImplementedError):
140 |             flow = RealNVPFlow(
141 |                 num_coupling_layers=2,
142 |                 hidden_layer_sizes=(64,),
143 |                 use_batch_normalization=True)
144 | 
145 | 
146 | if __name__ == '__main__':
147 |     tf.test.main()
148 | 


--------------------------------------------------------------------------------
/softlearning/environments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/environments/__init__.py


--------------------------------------------------------------------------------
/softlearning/environments/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | """Module that provides adapters between SoftlearningEnv and other universes"""
2 | 


--------------------------------------------------------------------------------
/softlearning/environments/adapters/dm_control_adapter_test.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import unittest
  3 | 
  4 | import numpy as np
  5 | from gym import spaces
  6 | import pytest
  7 | 
  8 | from .softlearning_env_test import AdapterTestClass
  9 | from softlearning.environments.adapters.dm_control_adapter import (
 10 |     DmControlAdapter)
 11 | 
 12 | 
 13 | class TestDmControlAdapter(unittest.TestCase, AdapterTestClass):
 14 |     def create_adapter(self,
 15 |                        domain='cartpole',
 16 |                        task='swingup',
 17 |                        *args,
 18 |                        **kwargs):
 19 |         return DmControlAdapter(domain, task, *args, **kwargs)
 20 | 
 21 |     def test_environments(self):
 22 |         # Make sure that all the environments are creatable
 23 |         TEST_ENVIRONMENTS = (
 24 |             ('cartpole', 'swingup'),
 25 |         )
 26 | 
 27 |         def verify_reset_and_step(domain, task):
 28 |             env = DmControlAdapter(domain=domain, task=task)
 29 |             env.reset()
 30 |             env.step(env.action_space.sample())
 31 | 
 32 |         for domain, task in TEST_ENVIRONMENTS:
 33 |             print("testing: ", domain, task)
 34 |             verify_reset_and_step(domain, task)
 35 | 
 36 |     def test_render_human(self):
 37 |         env = self.create_adapter()
 38 |         with self.assertRaises(NotImplementedError):
 39 |             result = env.render(mode='human')
 40 |             self.assertIsNone(result)
 41 | 
 42 |     def test_environment_kwargs(self):
 43 |         # TODO(hartikainen): Figure this out later.
 44 |         pass
 45 | 
 46 |     def test_serialize_deserialize(self):
 47 |         domain, task = 'hopper', 'hop'
 48 |         env_kwargs = {
 49 |             'environment_kwargs': {
 50 |                 'flat_observation': True,
 51 |             }
 52 |         }
 53 |         env1 = self.create_adapter(domain=domain, task=task, **env_kwargs)
 54 |         env1.reset()
 55 | 
 56 |         env2 = pickle.loads(pickle.dumps(env1))
 57 | 
 58 |         self.assertEqual(env1.observation_keys, env2.observation_keys)
 59 |         for key, value in env_kwargs['environment_kwargs'].items():
 60 |             self.assertEqual(getattr(env1.unwrapped, f'_{key}'), value)
 61 |             self.assertEqual(getattr(env2.unwrapped, f'_{key}'), value)
 62 | 
 63 |     def test_copy_environments(self):
 64 |         domain, task = 'cartpole', 'swingup'
 65 |         env_kwargs = {
 66 |             'environment_kwargs': {
 67 |                 'flat_observation': False,
 68 |             }
 69 |         }
 70 |         env1 = self.create_adapter(domain=domain, task=task, **env_kwargs)
 71 |         env1.reset()
 72 |         env2 = env1.copy()
 73 | 
 74 |         self.assertEqual(env1.observation_keys, env2.observation_keys)
 75 |         for key, value in env_kwargs['environment_kwargs'].items():
 76 |             self.assertEqual(getattr(env1.unwrapped, f'_{key}'), value)
 77 |             self.assertEqual(getattr(env2.unwrapped, f'_{key}'), value)
 78 | 
 79 |     def test_rescale_action(self):
 80 |         environment_kwargs = {
 81 |             'domain': 'quadruped',
 82 |             'task': 'run',
 83 |         }
 84 |         environment = DmControlAdapter(**environment_kwargs, rescale_action_range=None)
 85 |         new_low, new_high = -1.0, 1.0
 86 | 
 87 |         assert isinstance(environment.action_space, spaces.Box)
 88 |         assert np.any(environment.action_space.low != new_low)
 89 |         assert np.any(environment.action_space.high != new_high)
 90 | 
 91 |         rescaled_environment = DmControlAdapter(
 92 |             **environment_kwargs, rescale_action_range=(new_low, new_high))
 93 | 
 94 |         np.testing.assert_allclose(
 95 |             rescaled_environment.action_space.low, new_low)
 96 |         np.testing.assert_allclose(
 97 |             rescaled_environment.action_space.high, new_high)
 98 | 
 99 |     def test_rescale_observation_raises_exception(self):
100 |         environment_kwargs = {
101 |             'domain': 'quadruped',
102 |             'task': 'run',
103 |             'rescale_observation_range': (-1.0, 1.0),
104 |         }
105 |         with pytest.raises(
106 |                 NotImplementedError, match=r"Observation rescaling .*"):
107 |             environment = DmControlAdapter(**environment_kwargs)
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     unittest.main()
112 | 


--------------------------------------------------------------------------------
/softlearning/environments/adapters/gym_adapter.py:
--------------------------------------------------------------------------------
  1 | """Implements a GymAdapter that converts Gym envs into SoftlearningEnv."""
  2 | 
  3 | from collections import defaultdict, OrderedDict
  4 | import copy
  5 | 
  6 | import gym
  7 | from gym import spaces, wrappers
  8 | from gym.envs.mujoco.mujoco_env import MujocoEnv
  9 | 
 10 | from .softlearning_env import SoftlearningEnv
 11 | from softlearning.environments.gym import register_environments
 12 | from softlearning.environments.gym.wrappers import RescaleObservation
 13 | from softlearning.utils.gym import is_continuous_space
 14 | 
 15 | 
 16 | def parse_domain_task(gym_id):
 17 |     domain_task_parts = gym_id.split('-')
 18 |     domain = '-'.join(domain_task_parts[:1])
 19 |     task = '-'.join(domain_task_parts[1:])
 20 | 
 21 |     return domain, task
 22 | 
 23 | 
 24 | CUSTOM_GYM_ENVIRONMENT_IDS = register_environments()
 25 | CUSTOM_GYM_ENVIRONMENTS = defaultdict(list)
 26 | 
 27 | for gym_id in CUSTOM_GYM_ENVIRONMENT_IDS:
 28 |     domain, task = parse_domain_task(gym_id)
 29 |     CUSTOM_GYM_ENVIRONMENTS[domain].append(task)
 30 | 
 31 | CUSTOM_GYM_ENVIRONMENTS = dict(CUSTOM_GYM_ENVIRONMENTS)
 32 | 
 33 | GYM_ENVIRONMENT_IDS = tuple(gym.envs.registry.env_specs.keys())
 34 | GYM_ENVIRONMENTS = defaultdict(list)
 35 | 
 36 | 
 37 | for gym_id in GYM_ENVIRONMENT_IDS:
 38 |     domain, task = parse_domain_task(gym_id)
 39 |     GYM_ENVIRONMENTS[domain].append(task)
 40 | 
 41 | GYM_ENVIRONMENTS = dict(GYM_ENVIRONMENTS)
 42 | 
 43 | 
 44 | DEFAULT_OBSERVATION_KEY = 'observations'
 45 | 
 46 | 
 47 | class GymAdapter(SoftlearningEnv):
 48 |     """Adapter that implements the SoftlearningEnv for Gym envs."""
 49 | 
 50 |     def __init__(self,
 51 |                  domain,
 52 |                  task,
 53 |                  *args,
 54 |                  env=None,
 55 |                  rescale_action_range=(-1.0, 1.0),
 56 |                  rescale_observation_range=None,
 57 |                  observation_keys=(),
 58 |                  goal_keys=(),
 59 |                  unwrap_time_limit=True,
 60 |                  pixel_wrapper_kwargs=None,
 61 |                  **kwargs):
 62 |         assert not args, (
 63 |             "Gym environments don't support args. Use kwargs instead.")
 64 | 
 65 |         self.rescale_action_range = rescale_action_range
 66 |         self.rescale_observation_range = rescale_observation_range
 67 |         self.unwrap_time_limit = unwrap_time_limit
 68 | 
 69 |         super(GymAdapter, self).__init__(
 70 |             domain, task, *args, goal_keys=goal_keys, **kwargs)
 71 | 
 72 |         if env is None:
 73 |             assert (domain is not None and task is not None), (domain, task)
 74 |             try:
 75 |                 env_id = f"{domain}-{task}"
 76 |                 env = gym.envs.make(env_id, **kwargs)
 77 |             except gym.error.UnregisteredEnv:
 78 |                 env_id = f"{domain}{task}"
 79 |                 env = gym.envs.make(env_id, **kwargs)
 80 |             self._env_kwargs = kwargs
 81 |         else:
 82 |             assert not kwargs
 83 |             assert domain is None and task is None, (domain, task)
 84 | 
 85 |         if isinstance(env, wrappers.TimeLimit) and unwrap_time_limit:
 86 |             # Remove the TimeLimit wrapper that sets 'done = True' when
 87 |             # the time limit specified for each environment has been passed and
 88 |             # therefore the environment is not Markovian (terminal condition
 89 |             # depends on time rather than state).
 90 |             env = env.env
 91 | 
 92 |         if rescale_observation_range:
 93 |             env = RescaleObservation(env, *rescale_observation_range)
 94 | 
 95 |         if rescale_action_range and is_continuous_space(env.action_space):
 96 |             env = wrappers.RescaleAction(env, *rescale_action_range)
 97 | 
 98 |         # TODO(hartikainen): We need the clip action wrapper because sometimes
 99 |         # the tfp.bijectors.Tanh() produces values strictly greater than 1 or
100 |         # strictly less than -1, which causes the env fail without clipping.
101 |         # The error is in the order of 1e-7, which should not cause issues.
102 |         # See https://github.com/tensorflow/probability/issues/664.
103 |         env = wrappers.ClipAction(env)
104 | 
105 |         if pixel_wrapper_kwargs is not None:
106 |             env = wrappers.PixelObservationWrapper(env, **pixel_wrapper_kwargs)
107 | 
108 |         self._env = env
109 | 
110 |         if isinstance(self._env.observation_space, spaces.Dict):
111 |             dict_observation_space = self._env.observation_space
112 |             self.observation_keys = (
113 |                 observation_keys or (*self.observation_space.spaces.keys(), ))
114 |         elif isinstance(self._env.observation_space, spaces.Box):
115 |             dict_observation_space = spaces.Dict(OrderedDict((
116 |                 (DEFAULT_OBSERVATION_KEY, self._env.observation_space),
117 |             )))
118 |             self.observation_keys = (DEFAULT_OBSERVATION_KEY, )
119 | 
120 |         self._observation_space = type(dict_observation_space)([
121 |             (name, copy.deepcopy(space))
122 |             for name, space in dict_observation_space.spaces.items()
123 |             if name in self.observation_keys + self.goal_keys
124 |         ])
125 | 
126 |         if len(self._env.action_space.shape) > 1:
127 |             raise NotImplementedError(
128 |                 "Shape of the action space ({}) is not flat, make sure to"
129 |                 " check the implemenation.".format(self._env.action_space))
130 | 
131 |         self._action_space = self._env.action_space
132 | 
133 |     def step(self, action, *args, **kwargs):
134 |         observation, reward, terminal, info = self._env.step(
135 |             action, *args, **kwargs)
136 | 
137 |         if not isinstance(self._env.observation_space, spaces.Dict):
138 |             observation = {DEFAULT_OBSERVATION_KEY: observation}
139 | 
140 |         observation = self._filter_observation(observation)
141 |         return observation, reward, terminal, info
142 | 
143 |     def reset(self, *args, **kwargs):
144 |         observation = self._env.reset()
145 | 
146 |         if not isinstance(self._env.observation_space, spaces.Dict):
147 |             observation = {DEFAULT_OBSERVATION_KEY: observation}
148 | 
149 |         observation = self._filter_observation(observation)
150 |         return observation
151 | 
152 |     def render(self, *args, width=100, height=100, **kwargs):
153 |         if isinstance(self._env.unwrapped, MujocoEnv):
154 |             self._env.render(*args, width=width, height=height, **kwargs)
155 | 
156 |         return self._env.render(*args, **kwargs)
157 | 
158 |     def seed(self, *args, **kwargs):
159 |         return self._env.seed(*args, **kwargs)
160 | 
161 |     @property
162 |     def unwrapped(self):
163 |         return self._env.unwrapped
164 | 


--------------------------------------------------------------------------------
/softlearning/environments/adapters/robosuite_adapter_test.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import unittest
  3 | 
  4 | import numpy as np
  5 | import pytest
  6 | 
  7 | from .softlearning_env_test import AdapterTestClass
  8 | from softlearning.environments.adapters.robosuite_adapter import (
  9 |     RobosuiteAdapter)
 10 | 
 11 | 
 12 | class TestRobosuiteAdapter(unittest.TestCase, AdapterTestClass):
 13 |     def create_adapter(self, domain='Sawyer', task='Lift', *args, **kwargs):
 14 |         return RobosuiteAdapter(
 15 |             domain,
 16 |             task,
 17 |             *args,
 18 |             **{
 19 |                 'has_renderer': False,
 20 |                 'has_offscreen_renderer': False,
 21 |                 'use_camera_obs': False,
 22 |                 **kwargs
 23 |             })
 24 | 
 25 |     def test_environments(self):
 26 |         # Make sure that all the environments are creatable
 27 |         TEST_ENVIRONMENTS = [('Sawyer', 'Lift')]
 28 | 
 29 |         def verify_reset_and_step(domain, task):
 30 |             env = RobosuiteAdapter(
 31 |                 domain=domain,
 32 |                 task=task,
 33 |                 has_renderer=True,
 34 |                 has_offscreen_renderer=True,
 35 |                 use_camera_obs=False)
 36 |             env.reset()
 37 |             env.step(env.action_space.sample())
 38 | 
 39 |         for domain, task in TEST_ENVIRONMENTS:
 40 |             verify_reset_and_step(domain, task)
 41 | 
 42 |     def test_serialize_deserialize(self):
 43 |         domain, task = 'Sawyer', 'Lift'
 44 |         env_kwargs = {
 45 |             'has_renderer': False,
 46 |             'has_offscreen_renderer': False,
 47 |             'use_camera_obs': False,
 48 |             'reward_shaping': True,
 49 |         }
 50 |         env1 = self.create_adapter(domain=domain, task=task, **env_kwargs)
 51 |         env1.reset()
 52 | 
 53 |         env2 = pickle.loads(pickle.dumps(env1))
 54 | 
 55 |         self.assertEqual(env1.observation_keys, env2.observation_keys)
 56 |         for key, value in env_kwargs.items():
 57 |             self.assertEqual(getattr(env1.unwrapped, f'{key}'), value)
 58 |             self.assertEqual(getattr(env2.unwrapped, f'{key}'), value)
 59 | 
 60 |     def test_copy_environments(self):
 61 |         domain, task = 'Sawyer', 'Lift'
 62 |         env_kwargs = {
 63 |             "gripper_type": "TwoFingerGripper",
 64 |             "table_full_size": (0.8, 0.8, 0.8)
 65 |         }
 66 |         env1 = self.create_adapter(domain=domain, task=task, **env_kwargs)
 67 |         env1.reset()
 68 |         env2 = env1.copy()
 69 | 
 70 |         self.assertEqual(env1.observation_keys, env2.observation_keys)
 71 |         for key, value in env_kwargs.items():
 72 |             self.assertEqual(getattr(env1.unwrapped, key), value)
 73 |             self.assertEqual(getattr(env2.unwrapped, key), value)
 74 | 
 75 |         domain, task = 'Sawyer', 'Lift'
 76 |         robosuite_adapter_kwargs = {
 77 |             'observation_keys': ('joint_pos', 'joint_vel')
 78 |         }
 79 |         env_kwargs = {
 80 |             "gripper_type": "TwoFingerGripper",
 81 |             "table_full_size": (0.8, 0.8, 0.8)
 82 |         }
 83 |         env1 = self.create_adapter(
 84 |             domain=domain, task=task, **robosuite_adapter_kwargs, **env_kwargs)
 85 |         env1.reset()
 86 |         env2 = env1.copy()
 87 | 
 88 |         for key, value in robosuite_adapter_kwargs.items():
 89 |             self.assertEqual(getattr(env1, key), value)
 90 |             self.assertEqual(getattr(env2, key), value)
 91 | 
 92 |         for key, value in env_kwargs.items():
 93 |             self.assertEqual(getattr(env1.unwrapped, key), value)
 94 |             self.assertEqual(getattr(env2.unwrapped, key), value)
 95 | 
 96 |     def test_fails_with_invalid_environment_kwargs(self):
 97 |         domain, task = 'Sawyer', 'Lift'
 98 |         robosuite_adapter_kwargs = {
 99 |             'observation_keys': ('joint_pos', 'invalid_key')
100 |         }
101 |         with self.assertRaises(AssertionError):
102 |             env = self.create_adapter(
103 |                 domain=domain, task=task, **robosuite_adapter_kwargs)
104 | 
105 |     def test_environment_kwargs(self):
106 |         env_kwargs = {
107 |             "has_renderer": False,
108 |             "has_offscreen_renderer": False,
109 |             "use_camera_obs": False,
110 |             "control_freq": 10,
111 |             "horizon": 1000
112 |         }
113 | 
114 |         env = RobosuiteAdapter(
115 |             domain='Sawyer', task='Lift', **env_kwargs)
116 | 
117 |         observation1, reward, done, info = env.step(env.action_space.sample())
118 | 
119 |         self.assertAlmostEqual(reward, 0.0)
120 | 
121 |         for key, expected_value in env_kwargs.items():
122 |             actual_value = getattr(env.unwrapped, key)
123 |             self.assertEqual(actual_value, expected_value)
124 | 
125 |     def test_render_rgb_array(self):
126 |         env = self.create_adapter(
127 |             has_renderer=False,
128 |             has_offscreen_renderer=True)
129 |         env.render(mode='rgb_array', camera_id=0, width=32, height=32)
130 | 
131 |     def test_render_human(self):
132 |         env = self.create_adapter(
133 |             has_renderer=True,
134 |             has_offscreen_renderer=False)
135 |         env.render(mode='human')
136 | 
137 |     def test_fails_with_unnormalized_action_spec(self):
138 |         from robosuite.environments.sawyer_lift import SawyerLift
139 | 
140 |         class UnnormalizedEnv(SawyerLift):
141 |             @property
142 |             def dof(self):
143 |                 return 5
144 | 
145 |             @property
146 |             def action_spec(self):
147 |                 low, high = np.ones(self.dof) * -2.0, np.ones(self.dof) * 2.0
148 |                 return low, high
149 | 
150 |         env = UnnormalizedEnv(
151 |                 has_renderer=False,
152 |                 has_offscreen_renderer=False,
153 |                 use_camera_obs=False)
154 |         with self.assertRaises(AssertionError):
155 |             adapter = RobosuiteAdapter(domain=None, task=None, env=env)
156 | 
157 |     def test_rescale_observation_raises_exception(self):
158 |         environment_kwargs = {
159 |             'domain': 'Sawyer',
160 |             'task': 'Lift',
161 |             'rescale_observation_range': (-1.0, 1.0),
162 |         }
163 |         with pytest.raises(
164 |                 NotImplementedError, match=r"Observation rescaling .*"):
165 |             environment = RobosuiteAdapter(**environment_kwargs)
166 | 
167 | 
168 | if __name__ == '__main__':
169 |     unittest.main()
170 | 


--------------------------------------------------------------------------------
/softlearning/environments/adapters/softlearning_env_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym import spaces
 3 | 
 4 | 
 5 | class AdapterTestClass(object):
 6 |     ENVIRONMENTS = []
 7 | 
 8 |     def test_observation_space(self):
 9 |         env = self.create_adapter()
10 |         observation_space = env.observation_space
11 |         self.assertTrue(
12 |             isinstance(observation_space, (spaces.Box, spaces.Dict)))
13 |         # TODO(hartikainen): Test actual conversion of dimensions and types of
14 |         # inside items; not just outside type.
15 | 
16 |     def test_action_space(self):
17 |         env = self.create_adapter()
18 |         action_space = env.action_space
19 |         self.assertTrue(
20 |             isinstance(action_space, spaces.Box))
21 | 
22 |     def test_step(self):
23 |         env = self.create_adapter()
24 |         env.reset()
25 |         step = env.step(env.action_space.sample())
26 |         self.assertTrue(isinstance(step, tuple))
27 |         self.assertEqual(len(step), 4)
28 | 
29 |         observation, reward, done, info = step
30 |         self.assertIsInstance(observation, dict)
31 |         self.assertIsInstance(reward, np.float)
32 |         self.assertIsInstance(done, bool)
33 |         self.assertIsInstance(info, dict)
34 | 
35 |     def test_reset(self):
36 |         env = self.create_adapter()
37 |         observation = env.reset()
38 |         self.assertIsInstance(observation, dict)
39 | 
40 |     def test_render_rgb_array(self):
41 |         env = self.create_adapter()
42 |         result = env.render(mode='rgb_array')
43 |         self.assertIsInstance(result, np.ndarray)
44 |         env.close()
45 | 
46 |     def test_render_human(self):
47 |         env = self.create_adapter()
48 |         result = env.render(mode='human')
49 |         self.assertIsNone(result)
50 |         env.close()
51 | 
52 |     def test_close(self):
53 |         env = self.create_adapter()
54 |         env.close()
55 | 


--------------------------------------------------------------------------------
/softlearning/environments/dm_control/__init__.py:
--------------------------------------------------------------------------------
1 | """Custom DeepMind Control Suite environments.
2 | 
3 | Every class inside this module should extend a dm_control.suite.Task class. The
4 | # file structure should be similar to dm_control's file structure.
5 | """
6 | 


--------------------------------------------------------------------------------
/softlearning/environments/dm_control/suite/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/environments/dm_control/suite/__init__.py


--------------------------------------------------------------------------------
/softlearning/environments/dm_control/suite/wrappers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/environments/dm_control/suite/wrappers/__init__.py


--------------------------------------------------------------------------------
/softlearning/environments/gym/__init__.py:
--------------------------------------------------------------------------------
  1 | """Custom Gym environments.
  2 | 
  3 | Every class inside this module should extend a gym.Env class. The file
  4 | structure should be similar to gym.envs file structure, e.g. if you're
  5 | implementing a mujoco env, you would implement it under gym.mujoco submodule.
  6 | """
  7 | 
  8 | import gym
  9 | 
 10 | 
 11 | CUSTOM_GYM_ENVIRONMENTS_PATH = __package__
 12 | MUJOCO_ENVIRONMENTS_PATH = f'{CUSTOM_GYM_ENVIRONMENTS_PATH}.mujoco'
 13 | 
 14 | MUJOCO_ENVIRONMENT_SPECS = (
 15 |     {
 16 |         'id': 'Swimmer-Parameterizable-v3',
 17 |         'entry_point': (f'gym.envs.mujoco.swimmer_v3:SwimmerEnv'),
 18 |     },
 19 |     {
 20 |         'id': 'Hopper-Parameterizable-v3',
 21 |         'entry_point': (f'gym.envs.mujoco.hopper_v3:HopperEnv'),
 22 |     },
 23 |     {
 24 |         'id': 'Walker2d-Parameterizable-v3',
 25 |         'entry_point': (f'gym.envs.mujoco.walker2d_v3:Walker2dEnv'),
 26 |     },
 27 |     {
 28 |         'id': 'HalfCheetah-Parameterizable-v3',
 29 |         'entry_point': (f'gym.envs.mujoco.half_cheetah_v3:HalfCheetahEnv'),
 30 |     },
 31 |     {
 32 |         'id': 'Ant-Parameterizable-v3',
 33 |         'entry_point': (f'gym.envs.mujoco.ant_v3:AntEnv'),
 34 |     },
 35 |     {
 36 |         'id': 'Humanoid-Parameterizable-v3',
 37 |         'entry_point': (f'gym.envs.mujoco.humanoid_v3:HumanoidEnv'),
 38 |     },
 39 |     {
 40 |         'id': 'Pusher2d-Default-v0',
 41 |         'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
 42 |                         '.pusher_2d:Pusher2dEnv'),
 43 |     },
 44 |     {
 45 |         'id': 'Pusher2d-DefaultReach-v0',
 46 |         'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
 47 |                         '.pusher_2d:ForkReacherEnv'),
 48 |     },
 49 |     {
 50 |         'id': 'Pusher2d-ImageDefault-v0',
 51 |         'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
 52 |                         '.image_pusher_2d:ImagePusher2dEnv'),
 53 |     },
 54 |     {
 55 |         'id': 'Pusher2d-ImageReach-v0',
 56 |         'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
 57 |                         '.image_pusher_2d:ImageForkReacher2dEnv'),
 58 |     },
 59 |     {
 60 |         'id': 'Pusher2d-BlindReach-v0',
 61 |         'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}'
 62 |                         '.image_pusher_2d:BlindForkReacher2dEnv'),
 63 |     },
 64 | )
 65 | 
 66 | GENERAL_ENVIRONMENT_SPECS = (
 67 |     {
 68 |         'id': 'MultiGoal-Default-v0',
 69 |         'entry_point': (f'{CUSTOM_GYM_ENVIRONMENTS_PATH}'
 70 |                         '.multi_goal:MultiGoalEnv')
 71 |     },
 72 | )
 73 | 
 74 | MUJOCO_ENVIRONMENTS = tuple(
 75 |     environment_spec['id']
 76 |     for environment_spec in MUJOCO_ENVIRONMENT_SPECS)
 77 | 
 78 | 
 79 | GENERAL_ENVIRONMENTS = tuple(
 80 |     environment_spec['id']
 81 |     for environment_spec in GENERAL_ENVIRONMENT_SPECS)
 82 | 
 83 | 
 84 | GYM_ENVIRONMENTS = (
 85 |     *MUJOCO_ENVIRONMENTS,
 86 |     *GENERAL_ENVIRONMENTS,
 87 | )
 88 | 
 89 | 
 90 | def register_mujoco_environments():
 91 |     """Register softlearning mujoco environments."""
 92 |     for mujoco_environment in MUJOCO_ENVIRONMENT_SPECS:
 93 |         gym.register(**mujoco_environment)
 94 | 
 95 |     gym_ids = tuple(
 96 |         environment_spec['id']
 97 |         for environment_spec in  MUJOCO_ENVIRONMENT_SPECS)
 98 | 
 99 |     return gym_ids
100 | 
101 | 
102 | def register_general_environments():
103 |     """Register gym environments that don't fall under a specific category."""
104 |     for general_environment in GENERAL_ENVIRONMENT_SPECS:
105 |         gym.register(**general_environment)
106 | 
107 |     gym_ids = tuple(
108 |         environment_spec['id']
109 |         for environment_spec in  GENERAL_ENVIRONMENT_SPECS)
110 | 
111 |     return gym_ids
112 | 
113 | 
114 | def register_environments():
115 |     registered_mujoco_environments = register_mujoco_environments()
116 |     registered_general_environments = register_general_environments()
117 | 
118 |     return (
119 |         *registered_mujoco_environments,
120 |         *registered_general_environments,
121 |     )
122 | 


--------------------------------------------------------------------------------
/softlearning/environments/gym/mujoco/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/environments/gym/mujoco/__init__.py


--------------------------------------------------------------------------------
/softlearning/environments/gym/mujoco/image_pusher_2d.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from gym import utils
  3 | 
  4 | from softlearning.environments.helpers import random_point_in_circle
  5 | from .pusher_2d import Pusher2dEnv
  6 | 
  7 | 
  8 | class ImagePusher2dEnv(Pusher2dEnv):
  9 |     def __init__(self, image_shape, *args, **kwargs):
 10 |         utils.EzPickle.__init__(**locals())
 11 |         self.image_shape = image_shape
 12 |         Pusher2dEnv.__init__(self, *args, **kwargs)
 13 | 
 14 |     def _get_obs(self):
 15 |         width, height = self.image_shape[:2]
 16 |         image = self.render(mode='rgb_array', width=width, height=height)
 17 |         image = ((2.0 / 255.0) * image - 1.0)
 18 | 
 19 |         return np.concatenate([
 20 |             image.reshape(-1),
 21 |             self.sim.data.qpos.flat[self.JOINT_INDS],
 22 |             self.sim.data.qvel.flat[self.JOINT_INDS],
 23 |         ]).reshape(-1)
 24 | 
 25 |     def step(self, action):
 26 |         """Step, computing reward from 'true' observations and not images."""
 27 | 
 28 |         reward_observations = super(ImagePusher2dEnv, self)._get_obs()
 29 |         reward, info = self.compute_reward(reward_observations, action)
 30 | 
 31 |         self.do_simulation(action, self.frame_skip)
 32 | 
 33 |         observation = self._get_obs()
 34 |         done = False
 35 | 
 36 |         return observation, reward, done, info
 37 | 
 38 |     def viewer_setup(self):
 39 |         self.viewer.cam.trackbodyid = 0
 40 |         self.viewer.cam.lookat[:3] = [0, 0, 0]
 41 |         self.viewer.cam.distance = 3.5
 42 |         self.viewer.cam.elevation = -90
 43 |         self.viewer.cam.azimuth = 0
 44 |         self.viewer.cam.trackbodyid = -1
 45 | 
 46 | 
 47 | class ImageForkReacher2dEnv(ImagePusher2dEnv):
 48 |     def __init__(self,
 49 |                  arm_goal_distance_cost_coeff,
 50 |                  arm_object_distance_cost_coeff,
 51 |                  *args,
 52 |                  **kwargs):
 53 |         utils.EzPickle.__init__(**locals())
 54 | 
 55 |         self._arm_goal_distance_cost_coeff = arm_goal_distance_cost_coeff
 56 |         self._arm_object_distance_cost_coeff = arm_object_distance_cost_coeff
 57 | 
 58 |         super(ImageForkReacher2dEnv, self).__init__(*args, **kwargs)
 59 | 
 60 |     def compute_reward(self, observations, actions):
 61 |         is_batch = True
 62 |         if observations.ndim == 1:
 63 |             observations = observations[None]
 64 |             actions = actions[None]
 65 |             is_batch = False
 66 |         else:
 67 |             raise NotImplementedError('Might be broken.')
 68 | 
 69 |         arm_pos = observations[:, -6:-4]
 70 |         goal_pos = self.get_body_com('goal')[:2][None]
 71 |         object_pos = observations[:, -3:-1]
 72 | 
 73 |         arm_goal_dists = np.linalg.norm(arm_pos - goal_pos, axis=1)
 74 |         arm_object_dists = np.linalg.norm(arm_pos - object_pos, axis=1)
 75 |         ctrl_costs = np.sum(actions**2, axis=1)
 76 | 
 77 |         costs = (
 78 |             + self._arm_goal_distance_cost_coeff * arm_goal_dists
 79 |             + self._arm_object_distance_cost_coeff * arm_object_dists
 80 |             + self._ctrl_cost_coeff * ctrl_costs)
 81 | 
 82 |         rewards = -costs
 83 | 
 84 |         if not is_batch:
 85 |             rewards = rewards.squeeze()
 86 |             arm_goal_dists = arm_goal_dists.squeeze()
 87 |             arm_object_dists = arm_object_dists.squeeze()
 88 | 
 89 |         return rewards, {
 90 |             'arm_goal_distance': arm_goal_dists,
 91 |             'arm_object_distance': arm_object_dists,
 92 |         }
 93 | 
 94 |     def reset_model(self):
 95 |         qpos = np.random.uniform(
 96 |             low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos.squeeze()
 97 | 
 98 |         # qpos[self.JOINT_INDS[0]] = np.random.uniform(-np.pi, np.pi)
 99 |         # qpos[self.JOINT_INDS[1]] = np.random.uniform(
100 |         #     -np.pi/2, np.pi/2) + np.pi/4
101 |         # qpos[self.JOINT_INDS[2]] = np.random.uniform(
102 |         #     -np.pi/2, np.pi/2) + np.pi/2
103 | 
104 |         target_position = np.array(random_point_in_circle(
105 |             angle_range=(0, 2*np.pi), radius=(0.6, 1.2)))
106 |         target_position[1] += 1.0
107 | 
108 |         qpos[self.TARGET_INDS] = target_position
109 |         # qpos[self.TARGET_INDS] = [1.0, 2.0]
110 |         # qpos[self.TARGET_INDS] = self.init_qpos.squeeze()[self.TARGET_INDS]
111 | 
112 |         puck_position = np.random.uniform([-1.0], [1.0], size=[2])
113 |         puck_position = (
114 |             np.sign(puck_position)
115 |             * np.maximum(np.abs(puck_position), 1/2))
116 |         puck_position[np.flatnonzero(puck_position == 0)] = 1.0
117 |         # puck_position[1] += 1.0
118 |         # puck_position = np.random.uniform(
119 |         #     low=[0.3, -1.0], high=[1.0, -0.4]),
120 | 
121 |         qpos[self.PUCK_INDS] = puck_position
122 | 
123 |         qvel = self.init_qvel.copy().squeeze()
124 |         qvel[self.PUCK_INDS] = 0
125 |         qvel[self.TARGET_INDS] = 0
126 | 
127 |         # TODO: remnants from rllab -> gym conversion
128 |         # qacc = np.zeros(self.sim.data.qacc.shape[0])
129 |         # ctrl = np.zeros(self.sim.data.ctrl.shape[0])
130 |         # full_state = np.concatenate((qpos, qvel, qacc, ctrl))
131 | 
132 |         # super(Pusher2dEnv, self).reset(full_state)
133 | 
134 |         self.set_state(qpos, qvel)
135 | 
136 |         return self._get_obs()
137 | 
138 | 
139 | class BlindForkReacher2dEnv(ImageForkReacher2dEnv):
140 |     def _get_obs(self):
141 |         return np.concatenate([
142 |             self.sim.data.qpos.flat[self.JOINT_INDS],
143 |             self.sim.data.qvel.flat[self.JOINT_INDS],
144 |         ]).reshape(-1)
145 | 


--------------------------------------------------------------------------------
/softlearning/environments/gym/robotics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/environments/gym/robotics/__init__.py


--------------------------------------------------------------------------------
/softlearning/environments/gym/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | from .rescale_observation import RescaleObservation  # noqa: unused-import
2 | 


--------------------------------------------------------------------------------
/softlearning/environments/gym/wrappers/rescale_observation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import gym
 4 | from gym import spaces
 5 | 
 6 | 
 7 | def rescale_values(values, old_low, old_high, new_low, new_high):
 8 |     rescaled_values = new_low + (new_high - new_low) * (
 9 |         (values - old_low) / (old_high - old_low))
10 |     rescaled_values = np.clip(rescaled_values, new_low, new_high)
11 |     return rescaled_values
12 | 
13 | 
14 | class RescaleObservation(gym.ObservationWrapper):
15 |     def __init__(self, env, low, high):
16 |         r"""Rescale observation space to a range [`low`, `high`].
17 |         Example:
18 |             >>> RescaleObservation(env, low, high).observation_space == Box(low, high)
19 |             True
20 |         Raises:
21 |             TypeError: If `not isinstance(environment.observation_space, spaces.Box)`.
22 |             ValueError: If either `low` or `high` is not finite.
23 |             ValueError: If any of `observation_space.{low,high}` is not finite.
24 |             ValueError: If `high <= low`.
25 |         TODO(hartikainen): This should be extended to work with Dict and Tuple spaces.
26 |         """
27 |         if np.any(~np.isfinite((low, high))):
28 |             raise ValueError(
29 |                 "Arguments 'low' and 'high' need to be finite."
30 |                 " Got: low={}, high={}".format(low, high))
31 | 
32 |         if np.any(high <= low):
33 |             raise ValueError("Argument `low` must be smaller than `high`"
34 |                              " Got: low={}, high=".format(low, high))
35 | 
36 |         super(RescaleObservation, self).__init__(env)
37 | 
38 |         if not isinstance(env.observation_space, spaces.Box):
39 |             raise TypeError("Expected Box observation space. Got: {}"
40 |                             "".format(type(env.observation_space)))
41 | 
42 |         if np.any(~np.isfinite((
43 |                 env.observation_space.low, env.observation_space.high))):
44 |             raise ValueError(
45 |                 "Observation space 'low' and 'high' need to be finite."
46 |                 " Got: observation_space.low={}, observation_space.high={}"
47 |                 "".format(env.observation_space.low,
48 |                           env.observation_space.high))
49 | 
50 |         shape = env.observation_space.shape
51 |         dtype = env.observation_space.dtype
52 | 
53 |         self.low = low + np.zeros(shape, dtype=dtype)
54 |         self.high = high + np.zeros(shape, dtype=dtype)
55 |         self.observation_space = spaces.Box(
56 |             low=self.low, high=self.high, shape=shape, dtype=dtype)
57 | 
58 |     def observation(self, observation):
59 |         rescaled_observation = rescale_values(
60 |             observation,
61 |             old_low=self.env.observation_space.low,
62 |             old_high=self.env.observation_space.high,
63 |             new_low=self.low,
64 |             new_high=self.high)
65 | 
66 |         return rescaled_observation
67 | 


--------------------------------------------------------------------------------
/softlearning/environments/gym/wrappers/rescale_observation_test.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import numpy as np
  4 | 
  5 | import gym
  6 | from gym import spaces
  7 | from .rescale_observation import RescaleObservation
  8 | 
  9 | 
 10 | class FakeEnvironment(gym.Env):
 11 |     def __init__(self):
 12 |         """Fake environment whose observation equals broadcasted action."""
 13 |         self.observation_space = gym.spaces.Box(
 14 |             shape=(2, ),
 15 |             low=np.array((-1.2, -0.07)),
 16 |             high=np.array((0.6, 0.07)),
 17 |             dtype=np.float32)
 18 |         self.action_space = self.observation_space
 19 | 
 20 |     def reset(self):
 21 |         observation = self.observation_space.sample()
 22 |         return observation
 23 | 
 24 |     def step(self, action):
 25 |         observation = action * np.ones(self.observation_space.shape)
 26 |         reward, terminal, info = 0.0, False, {}
 27 |         return observation, reward, terminal, info
 28 | 
 29 | 
 30 | def test_rescale_observation():
 31 |     new_low, new_high = -1.0, 1.0
 32 |     env = FakeEnvironment()
 33 |     wrapped_env = RescaleObservation(env, new_low, new_high)
 34 | 
 35 |     np.testing.assert_allclose(wrapped_env.observation_space.low, new_low)
 36 |     np.testing.assert_allclose(wrapped_env.observation_space.high, new_high)
 37 | 
 38 |     seed = 0
 39 |     env.seed(seed)
 40 |     wrapped_env.seed(seed)
 41 | 
 42 |     env.reset()
 43 |     wrapped_env.reset()
 44 | 
 45 |     low_observation = env.step(env.observation_space.low)[0]
 46 |     wrapped_low_observation = wrapped_env.step(env.observation_space.low)[0]
 47 | 
 48 |     assert np.allclose(low_observation, env.observation_space.low)
 49 |     assert np.allclose(
 50 |         wrapped_low_observation, wrapped_env.observation_space.low)
 51 | 
 52 |     high_observation = env.step(env.observation_space.high)[0]
 53 |     wrapped_high_observation = wrapped_env.step(env.observation_space.high)[0]
 54 | 
 55 |     assert np.allclose(high_observation, env.observation_space.high)
 56 |     assert np.allclose(
 57 |         wrapped_high_observation, wrapped_env.observation_space.high)
 58 | 
 59 | 
 60 | def test_raises_on_non_finite_low():
 61 |     env = FakeEnvironment()
 62 |     assert isinstance(env.observation_space, spaces.Box)
 63 | 
 64 |     with pytest.raises(ValueError):
 65 |         RescaleObservation(env, -float('inf'), 1.0)
 66 | 
 67 |     with pytest.raises(ValueError):
 68 |         RescaleObservation(env, -1.0, float('inf'))
 69 | 
 70 |     with pytest.raises(ValueError):
 71 |         RescaleObservation(env, -1.0, np.nan)
 72 | 
 73 | 
 74 | def test_raises_on_high_less_than_low():
 75 |     env = FakeEnvironment()
 76 |     assert isinstance(env.observation_space, spaces.Box)
 77 |     with pytest.raises(ValueError):
 78 |         RescaleObservation(env, 1.0, 1.0)
 79 |     with pytest.raises(ValueError):
 80 |         RescaleObservation(env, 1.0, -1.0)
 81 | 
 82 | 
 83 | def test_raises_on_high_equals_low():
 84 |     env = FakeEnvironment()
 85 |     assert isinstance(env.observation_space, spaces.Box)
 86 |     with pytest.raises(ValueError):
 87 |         RescaleObservation(env, 1.0, 1.0)
 88 | 
 89 | 
 90 | def test_raises_on_non_box_space():
 91 |     env = gym.envs.make('Copy-v0')
 92 |     assert isinstance(env.observation_space, spaces.Discrete)
 93 |     with pytest.raises(TypeError):
 94 |         RescaleObservation(env, -1.0, 1.0)
 95 | 
 96 | 
 97 | def test_raises_on_non_finite_space():
 98 |     env = gym.envs.make('Swimmer-v3')
 99 |     assert np.any(np.isinf((
100 |         env.observation_space.low, env.observation_space.high)))
101 |     with pytest.raises(ValueError):
102 |         RescaleObservation(env, -1.0, 1.0)
103 | 


--------------------------------------------------------------------------------
/softlearning/environments/helpers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def random_point_in_circle(angle_range=(0, 2*np.pi), radius=(0, 25)):
 5 |     angle = np.random.uniform(*angle_range)
 6 |     radius = radius if np.isscalar(radius) else np.random.uniform(*radius)
 7 |     x, y = np.cos(angle) * radius, np.sin(angle) * radius
 8 |     point = np.array([x, y])
 9 |     return point
10 | 


--------------------------------------------------------------------------------
/softlearning/environments/utils.py:
--------------------------------------------------------------------------------
 1 | from .adapters.gym_adapter import GymAdapter
 2 | 
 3 | ADAPTERS = {
 4 |     'gym': GymAdapter,
 5 | }
 6 | 
 7 | try:
 8 |     from .adapters.dm_control_adapter import DmControlAdapter
 9 |     ADAPTERS['dm_control'] = DmControlAdapter
10 | except ModuleNotFoundError as e:
11 |     if 'dm_control' not in e.msg:
12 |         raise
13 | 
14 |     print("Warning: dm_control package not found. Run"
15 |           " `pip install git+https://github.com/deepmind/dm_control.git`"
16 |           " to use dm_control environments.")
17 | 
18 | try:
19 |     from .adapters.robosuite_adapter import RobosuiteAdapter
20 |     ADAPTERS['robosuite'] = RobosuiteAdapter
21 | except ModuleNotFoundError as e:
22 |     if 'robosuite' not in e.msg:
23 |         raise
24 | 
25 |     print("Warning: robosuite package not found. Run `pip install robosuite`"
26 |           " to use robosuite environments.")
27 | 
28 | UNIVERSES = set(ADAPTERS.keys())
29 | 
30 | 
31 | def get_environment(universe, domain, task, environment_params):
32 |     return ADAPTERS[universe](domain, task, **environment_params)
33 | 
34 | 
35 | def get_environment_from_params(environment_params):
36 |     universe = environment_params['universe']
37 |     task = environment_params['task']
38 |     domain = environment_params['domain']
39 |     environment_kwargs = environment_params.get('kwargs', {}).copy()
40 | 
41 |     return get_environment(universe, domain, task, environment_kwargs)
42 | 


--------------------------------------------------------------------------------
/softlearning/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/misc/__init__.py


--------------------------------------------------------------------------------
/softlearning/misc/kernel.py:
--------------------------------------------------------------------------------
 1 | from distutils.version import LooseVersion
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | 
 7 | def adaptive_isotropic_gaussian_kernel(xs, ys, h_min=1e-3):
 8 |     """Gaussian kernel with dynamic bandwidth.
 9 | 
10 |     The bandwidth is adjusted dynamically to match median_distance / log(Kx).
11 |     See [2] for more information.
12 | 
13 |     Args:
14 |         xs(`tf.Tensor`): A tensor of shape (N x Kx x D) containing N sets of Kx
15 |             particles of dimension D. This is the first kernel argument.
16 |         ys(`tf.Tensor`): A tensor of shape (N x Ky x D) containing N sets of Kx
17 |             particles of dimension D. This is the second kernel argument.
18 |         h_min(`float`): Minimum bandwidth.
19 | 
20 |     Returns:
21 |         `dict`: Returned dictionary has two fields:
22 |             'output': A `tf.Tensor` object of shape (N x Kx x Ky) representing
23 |                 the kernel matrix for inputs `xs` and `ys`.
24 |             'gradient': A 'tf.Tensor` object of shape (N x Kx x Ky x D)
25 |                 representing the gradient of the kernel with respect to `xs`.
26 | 
27 |     Reference:
28 |         [2] Qiang Liu,Dilin Wang, "Stein Variational Gradient Descent: A General
29 |             Purpose Bayesian Inference Algorithm," Neural Information Processing
30 |             Systems (NIPS), 2016.
31 |     """
32 |     Kx, D = xs.get_shape().as_list()[-2:]
33 |     Ky, D2 = ys.get_shape().as_list()[-2:]
34 |     assert D == D2
35 | 
36 |     leading_shape = tf.shape(input=xs)[:-2]
37 | 
38 |     # Compute the pairwise distances of left and right particles.
39 |     diff = tf.expand_dims(xs, -2) - tf.expand_dims(ys, -3)
40 |     # ... x Kx x Ky x D
41 | 
42 |     if LooseVersion(tf.__version__) <= LooseVersion('1.5.0'):
43 |         dist_sq = tf.reduce_sum(input_tensor=diff**2, axis=-1, keepdims=False)
44 |     else:
45 |         dist_sq = tf.reduce_sum(input_tensor=diff**2, axis=-1, keepdims=False)
46 |     # ... x Kx x Ky
47 | 
48 |     # Get median.
49 |     input_shape = tf.concat((leading_shape, [Kx * Ky]), axis=0)
50 |     values, _ = tf.nn.top_k(
51 |         input=tf.reshape(dist_sq, input_shape),
52 |         k=(Kx * Ky // 2 + 1),  # This is exactly true only if Kx*Ky is odd.
53 |         sorted=True)  # ... x floor(Ks*Kd/2)
54 | 
55 |     medians_sq = values[..., -1]  # ... (shape) (last element is the median)
56 | 
57 |     h = medians_sq / np.log(Kx)  # ... (shape)
58 |     h = tf.maximum(h, h_min)
59 |     h = tf.stop_gradient(h)  # Just in case.
60 |     h_expanded_twice = tf.expand_dims(tf.expand_dims(h, -1), -1)
61 |     # ... x 1 x 1
62 | 
63 |     kappa = tf.exp(-dist_sq / h_expanded_twice)  # ... x Kx x Ky
64 | 
65 |     # Construct the gradient
66 |     h_expanded_thrice = tf.expand_dims(h_expanded_twice, -1)
67 |     # ... x 1 x 1 x 1
68 |     kappa_expanded = tf.expand_dims(kappa, -1)  # ... x Kx x Ky x 1
69 | 
70 |     kappa_grad = -2 * diff / h_expanded_thrice * kappa_expanded
71 |     # ... x Kx x Ky x D
72 | 
73 |     return {"output": kappa, "gradient": kappa_grad}
74 | 


--------------------------------------------------------------------------------
/softlearning/misc/plotter.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | 
 6 | class QFPolicyPlotter:
 7 |     def __init__(self, Q, policy, obs_lst, default_action, n_samples):
 8 |         self._Q = Q
 9 |         self._policy = policy
10 |         self._obs_lst = obs_lst
11 |         self._default_action = np.array(default_action)
12 |         self._n_samples = n_samples
13 | 
14 |         self._var_inds = np.flatnonzero(np.isnan(default_action))
15 | 
16 |         assert len(self._var_inds) == 2
17 | 
18 |         n_plots = len(obs_lst)
19 | 
20 |         x_size = 5 * n_plots
21 |         y_size = 5
22 | 
23 |         fig = plt.figure(figsize=(x_size, y_size))
24 |         self._ax_lst = []
25 |         for i in range(n_plots):
26 |             ax = fig.add_subplot(100 + n_plots * 10 + i + 1)
27 |             ax.set_xlim((-1, 1))
28 |             ax.set_ylim((-1, 1))
29 |             ax.grid(True)
30 |             self._ax_lst.append(ax)
31 | 
32 |         self._line_objects = list()
33 | 
34 |     def draw(self):
35 |         # noinspection PyArgumentList
36 |         [h.remove() for h in self._line_objects]
37 |         self._line_objects = list()
38 | 
39 |         self._plot_level_curves()
40 |         self._plot_action_samples()
41 | 
42 |         plt.draw()
43 |         plt.pause(0.001)
44 | 
45 |     def _plot_level_curves(self):
46 |         # Create mesh grid.
47 |         xs = np.linspace(-1, 1, 50)
48 |         ys = np.linspace(-1, 1, 50)
49 |         xgrid, ygrid = np.meshgrid(xs, ys)
50 |         N = len(xs)*len(ys)
51 | 
52 |         # Copy default values along the first axis and replace nans with
53 |         # the mesh grid points.
54 |         actions = np.tile(self._default_action.astype(np.float32), (N, 1))
55 |         actions[:, self._var_inds[0]] = xgrid.ravel()
56 |         actions[:, self._var_inds[1]] = ygrid.ravel()
57 | 
58 |         for ax, obs in zip(self._ax_lst, self._obs_lst):
59 |             observations = np.tile(
60 |                 obs[None].astype(np.float32), (actions.shape[0], 1))
61 | 
62 |             Q_np = self._Q.values(observations, actions).numpy()
63 |             Q_np = np.reshape(Q_np, xgrid.shape)
64 | 
65 |             cs = ax.contour(xgrid, ygrid, Q_np, 20)
66 |             self._line_objects += cs.collections
67 |             self._line_objects += ax.clabel(
68 |                 cs, inline=1, fontsize=10, fmt='%.2f')
69 | 
70 |     def _plot_action_samples(self):
71 |         for ax, obs in zip(self._ax_lst, self._obs_lst):
72 |             observations = np.ones((self._n_samples, 1)) * obs[None, :]
73 |             actions = self._policy.actions(observations).numpy()
74 | 
75 |             x, y = actions[:, 0], actions[:, 1]
76 |             self._line_objects += ax.plot(x, y, 'b*')
77 | 


--------------------------------------------------------------------------------
/softlearning/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/models/__init__.py


--------------------------------------------------------------------------------
/softlearning/models/convnet.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_probability as tfp
 3 | import tensorflow_addons as tfa
 4 | from tensorflow.keras import layers
 5 | import tree
 6 | 
 7 | 
 8 | tfk = tf.keras
 9 | tfkl = tf.keras.layers
10 | tfpl = tfp.layers
11 | tfd = tfp.distributions
12 | tfb = tfp.bijectors
13 | 
14 | 
15 | def convnet_model(
16 |         conv_filters=(64, 64, 64),
17 |         conv_kernel_sizes=(3, 3, 3),
18 |         conv_strides=(2, 2, 2),
19 |         padding="SAME",
20 |         normalization_type=None,
21 |         normalization_kwargs={},
22 |         downsampling_type='conv',
23 |         activation=layers.LeakyReLU,
24 |         name="convnet",
25 |         *args,
26 |         **kwargs):
27 |     normalization_layer = {
28 |         'batch': layers.BatchNormalization,
29 |         'layer': layers.LayerNormalization,
30 |         'group': tfa.layers.normalizations.GroupNormalization,
31 |         'instance': tfa.layers.normalizations.InstanceNormalization,
32 |         None: None,
33 |     }[normalization_type]
34 | 
35 |     def conv_block(conv_filter, conv_kernel_size, conv_stride):
36 |         block_parts = [
37 |             layers.Conv2D(
38 |                 filters=conv_filter,
39 |                 kernel_size=conv_kernel_size,
40 |                 strides=(conv_stride if downsampling_type == 'conv' else 1),
41 |                 padding=padding,
42 |                 activation='linear',
43 |                 *args,
44 |                 **kwargs),
45 |         ]
46 | 
47 |         if normalization_layer is not None:
48 |             block_parts += [normalization_layer(**normalization_kwargs)]
49 | 
50 |         block_parts += [(layers.Activation(activation)
51 |                          if isinstance(activation, str)
52 |                          else activation())]
53 | 
54 |         if downsampling_type == 'pool' and conv_stride > 1:
55 |             block_parts += [getattr(layers, 'AvgPool2D')(
56 |                 pool_size=conv_stride, strides=conv_stride)]
57 | 
58 |         block = tfk.Sequential(block_parts, name='conv_block')
59 |         return block
60 | 
61 |     def preprocess(x):
62 |         """Cast to float, normalize, and concatenate images along last axis."""
63 |         x = tree.map_structure(
64 |             lambda image: tf.image.convert_image_dtype(image, tf.float32), x)
65 |         x = tree.flatten(x)
66 |         x = tf.concat(x, axis=-1)
67 |         x = (tf.image.convert_image_dtype(x, tf.float32) - 0.5) * 2.0
68 |         return x
69 | 
70 |     model = tf.keras.Sequential((
71 |         tfkl.Lambda(preprocess),
72 |         *[
73 |             conv_block(conv_filter, conv_kernel_size, conv_stride)
74 |             for (conv_filter, conv_kernel_size, conv_stride) in
75 |             zip(conv_filters, conv_kernel_sizes, conv_strides)
76 |         ],
77 |         tfkl.Flatten(),
78 | 
79 |     ), name=name)
80 | 
81 |     return model
82 | 


--------------------------------------------------------------------------------
/softlearning/models/feedforward.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_probability as tfp
 3 | 
 4 | from softlearning.utils.tensorflow import cast_and_concat
 5 | 
 6 | 
 7 | tfk = tf.keras
 8 | tfkl = tf.keras.layers
 9 | tfpl = tfp.layers
10 | tfd = tfp.distributions
11 | tfb = tfp.bijectors
12 | 
13 | 
14 | def feedforward_model(hidden_layer_sizes,
15 |                       output_shape,
16 |                       activation='relu',
17 |                       output_activation='linear',
18 |                       preprocessors=None,
19 |                       name='feedforward_model',
20 |                       *args,
21 |                       **kwargs):
22 |     output_size = tf.reduce_prod(output_shape)
23 |     if 1 < len(output_shape):
24 |         raise NotImplementedError("TODO(hartikainen)")
25 |     model = tf.keras.Sequential((
26 |         tfkl.Lambda(cast_and_concat),
27 |         *[
28 |             tf.keras.layers.Dense(
29 |                 hidden_layer_size, *args, activation=activation, **kwargs)
30 |             for hidden_layer_size in hidden_layer_sizes
31 |         ],
32 |         tf.keras.layers.Dense(
33 |             output_size, *args, activation=output_activation, **kwargs),
34 |         # tf.keras.layers.Reshape(output_shape),
35 |     ), name=name)
36 | 
37 |     return model
38 | 


--------------------------------------------------------------------------------
/softlearning/models/feedforward_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | from softlearning.models.feedforward import feedforward_model
 5 | 
 6 | 
 7 | class FeedforwardTest(tf.test.TestCase):
 8 | 
 9 |     def test_clone_model(self):
10 |         """Make sure that cloning works and clones can predict."""
11 |         output_shape = (5, )
12 |         x_np = np.random.uniform(0, 1, (1, 13)).astype(np.float32)
13 |         x = tf.constant(x_np)
14 | 
15 |         fn1 = feedforward_model(
16 |             output_shape=output_shape,
17 |             hidden_layer_sizes=(6, 4, 2),
18 |             name='feedforward_function')
19 |         result_1 = fn1([x, x]).numpy()
20 | 
21 |         fn2 = tf.keras.models.clone_model(fn1)
22 |         result_2 = fn2([x, x]).numpy()
23 | 
24 |         variable_names = [x.name for x in fn1.variables]
25 |         for variable_name, variable_1, variable_2 in zip(
26 |                 variable_names, fn1.get_weights(), fn2.get_weights()):
27 |             self.assertEqual(variable_1.shape, variable_2.shape)
28 | 
29 |             if 'kernel' in variable_name:
30 |                 self.assertNotAllClose(variable_1, variable_2)
31 | 
32 |         self.assertEqual(
33 |             len(set((v1.experimental_ref() for v1 in fn1.trainable_variables))
34 |                 &
35 |                 set((v2.experimental_ref() for v2 in fn2.trainable_variables))),
36 |             0)
37 | 
38 |         result_1_predict = fn1.predict((x_np, x_np))
39 |         result_2_predict = fn2.predict((x_np, x_np))
40 | 
41 |         self.assertEqual(fn1.name, fn2.name)
42 |         self.assertEqual(result_1_predict.shape, result_2_predict.shape)
43 | 
44 |         self.assertAllEqual(result_1_predict, result_1)
45 |         self.assertAllEqual(result_2_predict, result_2)
46 | 
47 |     def test_without_name(self):
48 |         fn = feedforward_model(
49 |             output_shape=(1, ),
50 |             hidden_layer_sizes=(6, 4, 2))
51 | 
52 |         self.assertEqual(fn.name, 'feedforward_model')
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     tf.test.main()
57 | 


--------------------------------------------------------------------------------
/softlearning/models/utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tree
 3 | 
 4 | 
 5 | def get_inputs_for_nested_shapes(input_shapes, name=None):
 6 |     if isinstance(input_shapes, dict):
 7 |         return type(input_shapes)([
 8 |             (name, get_inputs_for_nested_shapes(value, name))
 9 |             for name, value in input_shapes.items()
10 |         ])
11 |     elif isinstance(input_shapes, (tuple, list)):
12 |         if all(isinstance(x, int) for x in input_shapes):
13 |             return tf.keras.layers.Input(shape=input_shapes, name=name)
14 |         else:
15 |             return type(input_shapes)((
16 |                 get_inputs_for_nested_shapes(input_shape, name=None)
17 |                 for input_shape in input_shapes
18 |             ))
19 |     elif isinstance(input_shapes, tf.TensorShape):
20 |         return tf.keras.layers.Input(shape=input_shapes, name=name)
21 | 
22 |     raise NotImplementedError(input_shapes)
23 | 
24 | 
25 | def flatten_input_structure(inputs):
26 |     inputs_flat = tree.flatten(inputs)
27 |     return inputs_flat
28 | 
29 | 
30 | def create_input(path, shape, dtype=None):
31 |     name = "/".join(str(x) for x in path)
32 | 
33 |     if dtype is None:
34 |         # TODO(hartikainen): This is not a very robust way to handle the
35 |         # dtypes. Need to figure out something better.
36 |         # Try to infer dtype manually
37 |         dtype = (tf.uint8  # Image observation
38 |                  if len(shape) == 3 and shape[-1] in (1, 3)
39 |                  else tf.float32)  # Non-image
40 | 
41 |     input_ = tf.keras.layers.Input(
42 |         shape=shape,
43 |         name=name,
44 |         dtype=dtype
45 |     )
46 | 
47 |     return input_
48 | 
49 | 
50 | def create_inputs(shapes, dtypes=None):
51 |     """Creates `tf.keras.layers.Input`s based on input shapes.
52 | 
53 |     Args:
54 |         input_shapes: (possibly nested) list/array/dict structure of
55 |         inputs shapes.
56 | 
57 |     Returns:
58 |         inputs: nested structure, of same shape as input_shapes, containing
59 |         `tf.keras.layers.Input`s.
60 | 
61 |     TODO(hartikainen): Need to figure out a better way for handling the dtypes.
62 |     """
63 |     if dtypes is None:
64 |         dtypes = tree.map_structure(lambda _: None, shapes)
65 |     inputs = tree.map_structure_with_path(create_input, shapes, dtypes)
66 | 
67 |     return inputs
68 | 
69 | 
70 | def create_sequence_inputs(shapes, dtypes=None):
71 |     """Creates `tf.keras.layers.Input`s usable for sequential models like RNN.
72 | 
73 |     Args:
74 |         See `create_inputs`.
75 | 
76 |     Returns:
77 |         inputs: nested structure, of same shape as input_shapes, containing
78 |         `tf.keras.layers.Input`s, each with shape (None, ...).
79 |     """
80 |     shapes = tree.map_structure(lambda x: tf.TensorShape([None]) + x, shapes)
81 |     sequence_inputs = create_inputs(shapes, dtypes)
82 | 
83 |     return sequence_inputs
84 | 


--------------------------------------------------------------------------------
/softlearning/policies/__init__.py:
--------------------------------------------------------------------------------
 1 | from softlearning.utils.serialization import (
 2 |     serialize_softlearning_object, deserialize_softlearning_object)
 3 | 
 4 | from .base_policy import BasePolicy, LatentSpacePolicy, ContinuousPolicy  # noqa: unused-import
 5 | from .gaussian_policy import GaussianPolicy, FeedforwardGaussianPolicy  # noqa: unused-import
 6 | from .uniform_policy import UniformPolicyMixin, ContinuousUniformPolicy  # noqa: unused-import
 7 | 
 8 | 
 9 | def serialize(policy):
10 |     return serialize_softlearning_object(policy)
11 | 
12 | 
13 | def deserialize(name, custom_objects=None):
14 |     """Returns a policy function or class denoted by input string.
15 | 
16 |     Arguments:
17 |         name : String
18 | 
19 |     Returns:
20 |         Policy function or class denoted by input string.
21 | 
22 |     For example:
23 |     >>> softlearning.policies.get({
24 |     ...     'class_name': 'ContinuousUniformPolicy',
25 |     ...     'config': {
26 |     ...         'action_range': [[-1], [1]],
27 |     ...         'input_shapes': tf.TensorShape((3, )),
28 |     ...         'output_shape': 2
29 |     ...      }
30 |     ... })
31 |       <softlearning.policies.uniform_policy.ContinuousUniformPolicy object at 0x7fea93d6cdd0>
32 |     >>> softlearning.policies.get('abcd')
33 |       Traceback (most recent call last):
34 |       ...
35 |       ValueError: Unknown policy: abcd
36 | 
37 |     Args:
38 |       name: The name of the policy.
39 | 
40 |     Raises:
41 |         ValueError: `Unknown policy` if the input string does not
42 |         denote any defined policy.
43 |     """
44 |     return deserialize_softlearning_object(
45 |         name,
46 |         module_objects=globals(),
47 |         custom_objects=custom_objects,
48 |         printable_module_name='policy')
49 | 
50 | 
51 | def get(identifier):
52 |     """Returns a policy.
53 | 
54 |     Arguments:
55 |         identifier: function, string, or dict.
56 | 
57 |     Returns:
58 |         A policy denoted by identifier.
59 | 
60 |     For example:
61 |     >>> softlearning.policies.get({
62 |     ...     'class_name': 'ContinuousUniformPolicy',
63 |     ...     'config': {
64 |     ...         'action_range': [[-1], [1]],
65 |     ...         'input_shapes': tf.TensorShape((3, )),
66 |     ...         'output_shape': 2
67 |     ...      }
68 |     ... })
69 |       <softlearning.policies.uniform_policy.ContinuousUniformPolicy object at 0x7fea93d6cdd0>
70 |     >>> softlearning.policies.get('abcd')
71 |       Traceback (most recent call last):
72 |       ...
73 |       ValueError: Unknown policy: abcd
74 | 
75 |     Raises:
76 |         ValueError: Input is an unknown function or string, i.e., the
77 |         identifier does not denote any defined policy.
78 |     """
79 |     if identifier is None:
80 |         return None
81 |     if isinstance(identifier, str):
82 |         return deserialize(identifier)
83 |     elif isinstance(identifier, dict):
84 |         return deserialize(identifier)
85 |     elif callable(identifier):
86 |         return identifier
87 |     else:
88 |         raise TypeError(
89 |             f"Could not interpret policy function identifier:"
90 |             " {repr(identifier)}.")
91 | 


--------------------------------------------------------------------------------
/softlearning/policies/real_nvp_policy.py:
--------------------------------------------------------------------------------
  1 | """RealNVPPolicy."""
  2 | 
  3 | from collections import OrderedDict
  4 | 
  5 | import tensorflow as tf
  6 | import tensorflow_probability as tfp
  7 | import tree
  8 | 
  9 | from softlearning.distributions.bijectors.real_nvp_flow import RealNVPFlow
 10 | 
 11 | from .base_policy import LatentSpacePolicy
 12 | 
 13 | 
 14 | class RealNVPPolicy(LatentSpacePolicy):
 15 |     def __init__(self,
 16 |                  hidden_layer_sizes,
 17 |                  num_coupling_layers,
 18 |                  *args,
 19 |                  activation=tf.nn.relu,
 20 |                  use_batch_normalization=False,
 21 |                  **kwargs):
 22 |         super(RealNVPPolicy, self).__init__(*args, **kwargs)
 23 | 
 24 |         base_distribution = tfp.distributions.MultivariateNormalDiag(
 25 |             loc=tf.zeros(self._output_shape),
 26 |             scale_diag=tf.ones(self._output_shape))
 27 | 
 28 |         self.flow_model = RealNVPFlow(
 29 |             num_coupling_layers=num_coupling_layers,
 30 |             hidden_layer_sizes=hidden_layer_sizes,
 31 |             use_batch_normalization=use_batch_normalization,
 32 |             activation=activation)
 33 | 
 34 |         raw_action_distribution = self.flow_model(base_distribution)
 35 | 
 36 |         self.base_distribution = base_distribution
 37 |         self.raw_action_distribution = raw_action_distribution
 38 |         self.action_distribution = self._action_post_processor(
 39 |             raw_action_distribution)
 40 | 
 41 |     @tf.function(experimental_relax_shapes=True)
 42 |     def actions(self, observations):
 43 |         if 0 < self._smoothing_alpha:
 44 |             raise NotImplementedError(
 45 |                 "TODO(hartikainen): Smoothing alpha temporarily dropped on tf2"
 46 |                 " migration. Should add it back. See:"
 47 |                 " https://github.com/rail-berkeley/softlearning/blob/46374df0294b9b5f6dbe65b9471ec491a82b6944/softlearning/policies/base_policy.py#L80")
 48 | 
 49 |         observations = self._filter_observations(observations)
 50 | 
 51 |         batch_shape = tf.shape(tree.flatten(observations)[0])[:-1]
 52 |         actions = self.action_distribution.sample(
 53 |             batch_shape, bijector_kwargs={
 54 |                 self.flow_model.name: {'observations': observations}
 55 |             })
 56 | 
 57 |         return actions
 58 | 
 59 |     @tf.function(experimental_relax_shapes=True)
 60 |     def log_probs(self, observations, actions):
 61 |         observations = self._filter_observations(observations)
 62 |         log_probs = self.action_distribution.log_prob(
 63 |             actions,
 64 |             bijector_kwargs={
 65 |                 self.flow_model.name: {'observations': observations}
 66 |             })[..., tf.newaxis]
 67 | 
 68 |         return log_probs
 69 | 
 70 |     @tf.function(experimental_relax_shapes=True)
 71 |     def probs(self, observations, actions):
 72 |         observations = self._filter_observations(observations)
 73 |         probs = self.action_distribution.prob(
 74 |             actions,
 75 |             bijector_kwargs={
 76 |                 self.flow_model.name: {'observations': observations}
 77 |             })[..., tf.newaxis]
 78 | 
 79 |         return probs
 80 | 
 81 |     def get_weights(self):
 82 |         return self.flow_model.get_weights()
 83 | 
 84 |     def set_weights(self, *args, **kwargs):
 85 |         return self.flow_model.set_weights(*args, **kwargs)
 86 | 
 87 |     @property
 88 |     def trainable_weights(self):
 89 |         return self.flow_model.trainable_variables
 90 | 
 91 |     @property
 92 |     def non_trainable_weights(self):
 93 |         return self.flow_model.non_trainable_weights
 94 | 
 95 |     @tf.function(experimental_relax_shapes=True)
 96 |     def get_diagnostics(self, inputs):
 97 |         """Return diagnostic information of the policy.
 98 | 
 99 |         Returns the mean, min, max, and standard deviation of means and
100 |         covariances.
101 |         """
102 |         actions = self.actions(inputs)
103 |         log_pis = self.log_probs(inputs, actions)
104 | 
105 |         return OrderedDict((
106 |             ('entropy-mean', tf.reduce_mean(-log_pis)),
107 |             ('entropy-std', tf.math.reduce_std(-log_pis)),
108 | 
109 |             ('actions-mean', tf.reduce_mean(actions)),
110 |             ('actions-std', tf.math.reduce_std(actions)),
111 |             ('actions-min', tf.reduce_min(actions)),
112 |             ('actions-max', tf.reduce_max(actions)),
113 |         ))
114 | 


--------------------------------------------------------------------------------
/softlearning/policies/uniform_policy.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_probability as tfp
 3 | import tree
 4 | 
 5 | from .base_policy import ContinuousPolicy
 6 | 
 7 | 
 8 | class UniformPolicyMixin:
 9 |     @tf.function(experimental_relax_shapes=True)
10 |     def actions(self, observations):
11 |         first_observation = tree.flatten(observations)[0]
12 |         first_input_rank = tf.size(tree.flatten(self._input_shapes)[0])
13 |         batch_shape = tf.shape(first_observation)[:-first_input_rank]
14 | 
15 |         actions = self.distribution.sample(batch_shape)
16 | 
17 |         return actions
18 | 
19 |     @tf.function(experimental_relax_shapes=True)
20 |     def log_probs(self, observations, actions):
21 |         log_probs = self.distribution.log_prob(actions)[..., tf.newaxis]
22 |         return log_probs
23 | 
24 |     @tf.function(experimental_relax_shapes=True)
25 |     def probs(self, observations, actions):
26 |         probs = self.distribution.prob(actions)[..., tf.newaxis]
27 |         return probs
28 | 
29 | 
30 | class ContinuousUniformPolicy(UniformPolicyMixin, ContinuousPolicy):
31 |     def __init__(self, *args, **kwargs):
32 |         super(ContinuousUniformPolicy, self).__init__(*args, **kwargs)
33 |         low, high = self._action_range
34 |         self.distribution = tfp.distributions.Independent(
35 |             tfp.distributions.Uniform(low=low, high=high),
36 |             reinterpreted_batch_ndims=1)
37 | 


--------------------------------------------------------------------------------
/softlearning/policies/uniform_policy_test.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import tensorflow_probability as tfp
  6 | import tree
  7 | 
  8 | from softlearning import policies
  9 | from softlearning.policies.uniform_policy import ContinuousUniformPolicy
 10 | from softlearning.environments.utils import get_environment
 11 | from softlearning.samplers import utils as sampler_utils
 12 | 
 13 | 
 14 | class ContinuousUniformPolicyTest(tf.test.TestCase):
 15 |     def setUp(self):
 16 |         self.env = get_environment('gym', 'Swimmer', 'v3', {})
 17 |         self.policy = ContinuousUniformPolicy(
 18 |             action_range=(
 19 |                 self.env.action_space.low,
 20 |                 self.env.action_space.high,
 21 |             ),
 22 |             input_shapes=self.env.observation_shape,
 23 |             output_shape=self.env.action_shape,
 24 |             observation_keys=self.env.observation_keys)
 25 | 
 26 |     def test_actions_and_log_probs(self):
 27 |         observation1_np = self.env.reset()
 28 |         observation2_np = self.env.step(self.env.action_space.sample())[0]
 29 | 
 30 |         observations_np = type(observation1_np)((
 31 |             (key, np.stack((
 32 |                 observation1_np[key], observation2_np[key]
 33 |             ), axis=0).astype(np.float32))
 34 |             for key in observation1_np.keys()
 35 |         ))
 36 | 
 37 |         observations_tf = tree.map_structure(
 38 |             lambda x: tf.constant(x, dtype=x.dtype), observations_np)
 39 | 
 40 |         for observations in (observations_np, observations_tf):
 41 |             actions = self.policy.actions(observations)
 42 |             log_pis = self.policy.log_probs(observations, actions)
 43 | 
 44 |             self.assertAllEqual(
 45 |                 log_pis,
 46 |                 tfp.distributions.Independent(
 47 |                     tfp.distributions.Uniform(
 48 |                         low=self.env.action_space.low,
 49 |                         high=self.env.action_space.high,
 50 |                     ),
 51 |                     reinterpreted_batch_ndims=1,
 52 |                 ).log_prob(actions)[..., None])
 53 | 
 54 |             self.assertEqual(actions.shape, (2, *self.env.action_shape))
 55 | 
 56 |     def test_env_step_with_actions(self):
 57 |         observation_np = self.env.reset()
 58 |         action = self.policy.action(observation_np).numpy()
 59 |         self.env.step(action)
 60 | 
 61 |     def test_trainable_variables(self):
 62 |         self.assertEqual(len(self.policy.trainable_variables), 0)
 63 | 
 64 |     def test_get_diagnostics(self):
 65 |         observation1_np = self.env.reset()
 66 |         observation2_np = self.env.step(self.env.action_space.sample())[0]
 67 |         observations_np = {}
 68 |         observations_np = type(observation1_np)((
 69 |             (key, np.stack((
 70 |                 observation1_np[key], observation2_np[key]
 71 |             ), axis=0).astype(np.float32))
 72 |             for key in observation1_np.keys()
 73 |         ))
 74 | 
 75 |         diagnostics = self.policy.get_diagnostics(observations_np)
 76 |         self.assertTrue(isinstance(diagnostics, OrderedDict))
 77 |         self.assertFalse(diagnostics)
 78 | 
 79 |     def test_serialize_deserialize(self):
 80 |         policy_1 = ContinuousUniformPolicy(
 81 |             action_range=(
 82 |                 self.env.action_space.low,
 83 |                 self.env.action_space.high,
 84 |             ),
 85 |             input_shapes=self.env.observation_shape,
 86 |             output_shape=self.env.action_shape,
 87 |             observation_keys=self.env.observation_keys)
 88 | 
 89 |         self.assertFalse(policy_1.trainable_weights)
 90 | 
 91 |         config = policies.serialize(policy_1)
 92 |         policy_2 = policies.deserialize(config)
 93 | 
 94 |         self.assertEqual(policy_2._action_range, policy_1._action_range)
 95 |         self.assertEqual(policy_2._input_shapes, policy_1._input_shapes)
 96 |         self.assertEqual(policy_2._output_shape, policy_1._output_shape)
 97 |         self.assertEqual(
 98 |             policy_2._observation_keys, policy_1._observation_keys)
 99 | 
100 |         path = sampler_utils.rollout(
101 |             self.env,
102 |             policy_2,
103 |             path_length=10,
104 |             break_on_terminal=False)
105 |         observations = path['observations']
106 |         np.testing.assert_equal(
107 |             policy_1.actions(observations).numpy().shape,
108 |             policy_2.actions(observations).numpy().shape)
109 | 
110 | 
111 | if __name__ == '__main__':
112 |     tf.test.main()
113 | 


--------------------------------------------------------------------------------
/softlearning/policies/utils.py:
--------------------------------------------------------------------------------
 1 | from gym import spaces
 2 | 
 3 | from .uniform_policy import ContinuousUniformPolicy
 4 | 
 5 | 
 6 | def get_uniform_policy(environment):
 7 |     if isinstance(environment.action_space, spaces.Box):
 8 |         return ContinuousUniformPolicy(
 9 |             action_range=(
10 |                 environment.action_space.low,
11 |                 environment.action_space.high,
12 |             ),
13 |             input_shapes=environment.observation_shape,
14 |             output_shape=environment.action_shape,
15 |             observation_keys=environment.observation_keys)
16 | 
17 |     raise NotImplementedError((
18 |         type(environment.action_space), environment.action_space))
19 | 


--------------------------------------------------------------------------------
/softlearning/preprocessors/__init__.py:
--------------------------------------------------------------------------------
 1 | from softlearning.utils.serialization import (
 2 |     serialize_softlearning_object, deserialize_softlearning_object)
 3 | 
 4 | 
 5 | def convnet_preprocessor(name='convnet_preprocessor', **kwargs):
 6 |     from softlearning.models.convnet import convnet_model
 7 | 
 8 |     preprocessor = convnet_model(name=name, **kwargs)
 9 | 
10 |     return preprocessor
11 | 
12 | 
13 | def serialize(preprocessor):
14 |     return serialize_softlearning_object(preprocessor)
15 | 
16 | 
17 | def deserialize(name, custom_objects=None):
18 |     """Returns a preprocessor function or class denoted by input string.
19 | 
20 |     Arguments:
21 |         name : String
22 | 
23 |     Returns:
24 |         Preprocessor function or class denoted by input string.
25 | 
26 |     For example:
27 |     >>> softlearning.preprocessors.get('convnet_preprocessor')
28 |       <function convnet_preprocessor at 0x7fd170125950>
29 |     >>> softlearning.preprocessors.get('abcd')
30 |       Traceback (most recent call last):
31 |       ...
32 |       ValueError: Unknown preprocessor: abcd
33 | 
34 |     Args:
35 |       name: The name of the preprocessor.
36 | 
37 |     Raises:
38 |         ValueError: `Unknown preprocessor` if the input string does not
39 |         denote any defined preprocessor.
40 |     """
41 |     return deserialize_softlearning_object(
42 |         name,
43 |         module_objects=globals(),
44 |         custom_objects=custom_objects,
45 |         printable_module_name='preprocessor')
46 | 
47 | 
48 | def get(identifier):
49 |     """Returns a preprocessor.
50 | 
51 |     Arguments:
52 |         identifier: function, string, or dict.
53 | 
54 |     Returns:
55 |         A preprocessor denoted by identifier.
56 | 
57 |     For example:
58 | 
59 |     >>> softlearning.preprocessors.get('convnet_preprocessor')
60 |       <function convnet_preprocessor at 0x7fd170125950>
61 |     >>> softlearning.preprocessors.get('abcd')
62 |       Traceback (most recent call last):
63 |       ...
64 |       ValueError: Unknown preprocessor: abcd
65 | 
66 |     Raises:
67 |         ValueError: Input is an unknown function or string, i.e., the
68 |         identifier does not denote any defined preprocessor.
69 |     """
70 |     if identifier is None:
71 |         return None
72 |     if isinstance(identifier, str):
73 |         return deserialize(identifier)
74 |     elif isinstance(identifier, dict):
75 |         return deserialize(identifier)
76 |     elif callable(identifier):
77 |         return identifier
78 |     else:
79 |         raise TypeError(
80 |             f"Could not interpret preprocessor function identifier:"
81 |             " {repr(identifier)}.")
82 | 


--------------------------------------------------------------------------------
/softlearning/replay_pools/__init__.py:
--------------------------------------------------------------------------------
 1 | from softlearning.utils.serialization import (
 2 |     serialize_softlearning_object, deserialize_softlearning_object)
 3 | 
 4 | from .simple_replay_pool import SimpleReplayPool  # noqa: unused-import
 5 | from .goal_replay_pool import GoalReplayPool  # noqa: unused-import
 6 | from .union_pool import UnionPool  # noqa: unused-import
 7 | from .hindsight_experience_replay_pool import HindsightExperienceReplayPool  # noqa: unused-import
 8 | 
 9 | 
10 | def serialize(replay_pool):
11 |     return serialize_softlearning_object(replay_pool)
12 | 
13 | 
14 | def deserialize(name, custom_objects=None):
15 |     """Returns a replay pool function or class denoted by input string.
16 | 
17 |     Arguments:
18 |         name : String
19 | 
20 |     Returns:
21 |         Replay Pool function or class denoted by input string.
22 | 
23 |     For example:
24 |     >>> softlearning.replay_pools.get({'class_name': 'SimpleReplayPool', ...})
25 |       <softlearning.replay_pools.simple_replay_pool.SimpleReplayPool object at 0x7fea93d6cdd0>
26 |     >>> softlearning.replay_pools.get('abcd')
27 |       Traceback (most recent call last):
28 |       ...
29 |       ValueError: Unknown replay pool: abcd
30 | 
31 |     Args:
32 |       name: The name of the replay pool.
33 | 
34 |     Raises:
35 |         ValueError: `Unknown replay pool` if the input string does not
36 |         denote any defined replay pool.
37 |     """
38 |     return deserialize_softlearning_object(
39 |         name,
40 |         module_objects=globals(),
41 |         custom_objects=custom_objects,
42 |         printable_module_name='replay pool')
43 | 
44 | 
45 | def get(identifier):
46 |     """Returns a replay pool.
47 | 
48 |     Arguments:
49 |         identifier: function, string, or dict.
50 | 
51 |     Returns:
52 |         A replay pool denoted by identifier.
53 | 
54 |     For example:
55 |     >>> softlearning.replay_pools.get({'class_name': 'SimpleReplayPool', ...})
56 |       <softlearning.replay_pools.simple_replay_pool.SimpleReplayPool object at 0x7fea93d6cdd0>
57 |     >>> softlearning.replay_pools.get('abcd')
58 |       Traceback (most recent call last):
59 |       ...
60 |       ValueError: Unknown replay_pool: abcd
61 | 
62 |     Raises:
63 |         ValueError: Input is an unknown function or string, i.e., the
64 |         identifier does not denote any defined replay pool.
65 |     """
66 |     if identifier is None:
67 |         return None
68 |     if isinstance(identifier, str):
69 |         return deserialize(identifier)
70 |     elif isinstance(identifier, dict):
71 |         return deserialize(identifier)
72 |     elif callable(identifier):
73 |         return identifier
74 |     else:
75 |         raise TypeError(
76 |             f"Could not interpret replay pool function identifier:"
77 |             " {repr(identifier)}.")
78 | 


--------------------------------------------------------------------------------
/softlearning/replay_pools/goal_replay_pool.py:
--------------------------------------------------------------------------------
 1 | from gym.spaces import Dict
 2 | 
 3 | from .flexible_replay_pool import FlexibleReplayPool, Field
 4 | 
 5 | 
 6 | class GoalReplayPool(FlexibleReplayPool):
 7 |     def __init__(self,
 8 |                  environment,
 9 |                  observation_fields=None,
10 |                  new_observation_fields=None,
11 |                  *args,
12 |                  extra_fields=None,
13 |                  **kwargs):
14 |         extra_fields = extra_fields or {}
15 |         observation_space = environment.observation_space
16 |         action_space = environment.action_space
17 |         assert isinstance(observation_space, Dict), observation_space
18 | 
19 |         self._environment = environment
20 |         self._observation_space = observation_space
21 |         self._action_space = action_space
22 | 
23 |         fields = {
24 |             'observations': {
25 |                 name: Field(
26 |                     name=name,
27 |                     dtype=observation_space.dtype,
28 |                     shape=observation_space.shape)
29 |                 for name, observation_space
30 |                 in observation_space.spaces.items()
31 |                 if name in environment.observation_keys
32 |             },
33 |             'next_observations': {
34 |                 name: Field(
35 |                     name=name,
36 |                     dtype=observation_space.dtype,
37 |                     shape=observation_space.shape)
38 |                 for name, observation_space
39 |                 in observation_space.spaces.items()
40 |                 if name in environment.observation_keys
41 |             },
42 |             'goals': {
43 |                 name: Field(
44 |                     name=name,
45 |                     dtype=observation_space.dtype,
46 |                     shape=observation_space.shape)
47 |                 for name, observation_space
48 |                 in observation_space.spaces.items()
49 |                 if name in environment.goal_keys
50 |             },
51 |             'actions': Field(
52 |                 name='actions',
53 |                 dtype=action_space.dtype,
54 |                 shape=environment.action_shape),
55 |             'rewards': Field(
56 |                 name='rewards',
57 |                 dtype='float32',
58 |                 shape=(1, )),
59 |             # terminals[i] = a terminal was received at time i
60 |             'terminals': Field(
61 |                 name='terminals',
62 |                 dtype='bool',
63 |                 shape=(1, )),
64 |             **extra_fields
65 |         }
66 | 
67 |         super(GoalReplayPool, self).__init__(*args, fields=fields, **kwargs)
68 | 
69 |     def add_samples(self, samples, *args, **kwargs):
70 |         observations = type(samples['observations'])(
71 |             (key, values)
72 |             for key, values in samples['observations'].items()
73 |             if key in self._environment.observation_keys
74 |         )
75 |         next_observations = type(samples['next_observations'])(
76 |             (key, values)
77 |             for key, values in samples['next_observations'].items()
78 |             if key in self._environment.observation_keys
79 |         )
80 |         goals = type(samples['observations'])(
81 |             (key, values)
82 |             for key, values in samples['observations'].items()
83 |             if key in self._environment.goal_keys
84 |         )
85 | 
86 |         samples.update({
87 |             'observations': observations,
88 |             'next_observations': next_observations,
89 |             'goals': goals,
90 |         })
91 | 
92 |         return super(GoalReplayPool, self).add_samples(
93 |             samples, *args, **kwargs)
94 | 


--------------------------------------------------------------------------------
/softlearning/replay_pools/replay_pool.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | 
 4 | class ReplayPool(object):
 5 |     """A class used to save and replay data."""
 6 | 
 7 |     @abc.abstractmethod
 8 |     def add_sample(self, sample):
 9 |         """Add a transition tuple."""
10 |         pass
11 | 
12 |     @abc.abstractmethod
13 |     def terminate_episode(self):
14 |         """Clean up pool after episode termination."""
15 |         pass
16 | 
17 |     @property
18 |     @abc.abstractmethod
19 |     def size(self, **kwargs):
20 |         pass
21 | 
22 |     @property
23 |     @abc.abstractmethod
24 |     def add_path(self, path):
25 |         """Add a rollout to the replay pool."""
26 |         pass
27 | 
28 |     @abc.abstractmethod
29 |     def random_batch(self, batch_size):
30 |         """Return a random batch of size `batch_size`."""
31 |         pass
32 | 


--------------------------------------------------------------------------------
/softlearning/replay_pools/simple_replay_pool.py:
--------------------------------------------------------------------------------
 1 | from gym import spaces
 2 | import tree
 3 | 
 4 | from .flexible_replay_pool import FlexibleReplayPool, Field
 5 | 
 6 | 
 7 | def field_from_gym_space(name, space):
 8 |     if isinstance(space, spaces.Box):
 9 |         if isinstance(name, (list, tuple)):
10 |             name = '/'.join(name)
11 |         return Field(name=name, dtype=space.dtype, shape=space.shape)
12 |     elif isinstance(space, spaces.Dict):
13 |         return tree.map_structure_with_path(
14 |             field_from_gym_space, space.spaces)
15 |     else:
16 |         raise NotImplementedError(space)
17 | 
18 | 
19 | class SimpleReplayPool(FlexibleReplayPool):
20 |     def __init__(self,
21 |                  environment,
22 |                  *args,
23 |                  extra_fields=None,
24 |                  **kwargs):
25 |         extra_fields = extra_fields or {}
26 |         observation_space = environment.observation_space
27 |         action_space = environment.action_space
28 | 
29 |         self._environment = environment
30 |         self._observation_space = observation_space
31 |         self._action_space = action_space
32 | 
33 |         fields = {
34 |             'observations': field_from_gym_space(
35 |                 'observations', observation_space),
36 |             'next_observations': field_from_gym_space(
37 |                 'next_observations', observation_space),
38 |             'actions': Field(
39 |                 name='actions',
40 |                 dtype=action_space.dtype,
41 |                 shape=environment.action_space.shape),
42 |             'rewards': Field(
43 |                 name='rewards',
44 |                 dtype='float32',
45 |                 shape=(1, )),
46 |             # terminals[i] = a terminal was received at time i
47 |             'terminals': Field(
48 |                 name='terminals',
49 |                 dtype='bool',
50 |                 shape=(1, )),
51 |             **extra_fields
52 |         }
53 | 
54 |         super(SimpleReplayPool, self).__init__(
55 |             *args, fields=fields, **kwargs)
56 | 


--------------------------------------------------------------------------------
/softlearning/replay_pools/simple_replay_pool_test.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import unittest
  3 | import numpy as np
  4 | import gym
  5 | 
  6 | from softlearning.replay_pools.simple_replay_pool import SimpleReplayPool
  7 | from softlearning.replay_pools.flexible_replay_pool import Field
  8 | from softlearning.environments.utils import get_environment
  9 | 
 10 | 
 11 | def create_pool(env, max_size=100):
 12 |     return SimpleReplayPool(environment=env, max_size=max_size)
 13 | 
 14 | 
 15 | class SimpleReplayPoolTest(unittest.TestCase):
 16 |     def test_create_pool(self):
 17 |         ENVIRONMENTS = (
 18 |             get_environment('gym', 'Swimmer', 'v3', {}),
 19 |             gym.make('Swimmer-v3'),
 20 |             gym.make('HandManipulateBlock-v0'),
 21 |         )
 22 |         for environment in ENVIRONMENTS:
 23 |             pool = create_pool(env=environment, max_size=100)
 24 | 
 25 |             def verify_field(field, expected_name, expected_dtype, expected_shape):
 26 |                 self.assertIsInstance(field, Field)
 27 |                 self.assertEqual(field.name, expected_name)
 28 |                 self.assertEqual(field.dtype, expected_dtype)
 29 |                 self.assertEqual(field.shape, expected_shape)
 30 |                 self.assertEqual(field.initializer, np.zeros)
 31 |                 self.assertEqual(field.default_value, 0.0)
 32 | 
 33 |             if isinstance(environment.observation_space, gym.spaces.Dict):
 34 |                 self.assertIsInstance(pool.fields['observations'], dict)
 35 |                 for name, space in environment.observation_space.spaces.items():
 36 |                     self.assertIn(name, pool.fields['observations'])
 37 |                     field = pool.fields['observations'][name]
 38 |                     verify_field(field, name, space.dtype, space.shape)
 39 | 
 40 |             elif isinstance(environment.observation_space, gym.spaces.Box):
 41 |                 self.assertIsInstance(pool.fields['observations'], Field)
 42 |                 verify_field(field,
 43 |                              'observations',
 44 |                              environment.observation_space.dtype,
 45 |                              environment.observation_space.shape)
 46 |             else:
 47 |                 raise ValueError(environment.observation_space)
 48 | 
 49 |             verify_field(
 50 |                 pool.fields['actions'],
 51 |                 'actions',
 52 |                 environment.action_space.dtype,
 53 |                 environment.action_space.shape)
 54 | 
 55 |             verify_field(pool.fields['rewards'], 'rewards', 'float32', (1, ))
 56 |             verify_field(pool.fields['terminals'], 'terminals', 'bool', (1, ))
 57 | 
 58 |     def test_add_samples_box_observation(self):
 59 |         env = gym.make('Swimmer-v3')
 60 |         pool = create_pool(env=env, max_size=100)
 61 | 
 62 |         env.reset()
 63 | 
 64 |         num_samples = pool._max_size // 2
 65 | 
 66 |         samples = {
 67 |             'observations': np.empty(
 68 |                 (num_samples, *env.observation_space.shape),
 69 |                 dtype=env.observation_space.dtype),
 70 |             'next_observations': np.empty(
 71 |                 (num_samples, *env.observation_space.shape),
 72 |                 dtype=env.observation_space.dtype),
 73 |             'actions': np.empty((num_samples, *env.action_space.shape)),
 74 |             'rewards': np.empty((num_samples, 1), dtype=np.float32),
 75 |             'terminals': np.empty((num_samples, 1), dtype=bool),
 76 |         }
 77 | 
 78 |         for i in range(num_samples):
 79 |             action = env.action_space.sample()
 80 |             observation, reward, terminal, info = env.step(action)
 81 |             samples['observations'][i, :] = observation
 82 |             samples['next_observations'][i, :] = observation
 83 |             samples['actions'][i] = action
 84 |             samples['rewards'][i] = reward
 85 |             samples['terminals'][i] = terminal
 86 | 
 87 |         pool.add_path(samples)
 88 |         last_n_batch = pool.last_n_batch(num_samples)
 89 |         np.testing.assert_equal(
 90 |             {
 91 |                 key: value
 92 |                 for key, value in last_n_batch.items()
 93 |                 if key not in
 94 |                 ('episode_index_backwards', 'episode_index_forwards')
 95 |             },
 96 |             samples)
 97 | 
 98 |     def test_add_samples_dict_observation(self):
 99 |         env = get_environment('gym', 'Swimmer', 'v3', {})
100 |         pool = create_pool(env=env, max_size=100)
101 | 
102 |         env.reset()
103 | 
104 |         num_samples = pool._max_size // 2
105 | 
106 |         samples = {
107 |             'observations': {
108 |                 name: np.empty((num_samples, *space.shape), dtype=space.dtype)
109 |                 for name, space in env.observation_space.spaces.items()
110 |             },
111 |             'next_observations': {
112 |                 name: np.empty((num_samples, *space.shape), dtype=space.dtype)
113 |                 for name, space in env.observation_space.spaces.items()
114 |             },
115 |             'actions': np.empty((num_samples, *env.action_space.shape)),
116 |             'rewards': np.empty((num_samples, 1), dtype=np.float32),
117 |             'terminals': np.empty((num_samples, 1), dtype=bool),
118 |         }
119 | 
120 |         for i in range(num_samples):
121 |             action = env.action_space.sample()
122 |             observation, reward, terminal, info = env.step(action)
123 |             for name, value in observation.items():
124 |                 samples['observations'][name][i, :] = value
125 |                 samples['next_observations'][name][i, :] = value
126 |             samples['actions'][i] = action
127 |             samples['rewards'][i] = reward
128 |             samples['terminals'][i] = terminal
129 | 
130 |         pool.add_path(samples)
131 |         last_n_batch = pool.last_n_batch(num_samples)
132 |         np.testing.assert_equal(
133 |             {
134 |                 key: value
135 |                 for key, value in last_n_batch.items()
136 |                 if key not in
137 |                 ('episode_index_backwards', 'episode_index_forwards')
138 |             },
139 |             samples)
140 | 
141 | 
142 | if __name__ == '__main__':
143 |     unittest.main()
144 | 


--------------------------------------------------------------------------------
/softlearning/replay_pools/union_pool.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .replay_pool import ReplayPool
 4 | 
 5 | 
 6 | class UnionPool(ReplayPool):
 7 |     def __init__(self, pools):
 8 |         pool_sizes = np.array([b.size for b in pools])
 9 |         self._total_size = sum(pool_sizes)
10 |         self._normalized_pool_sizes = pool_sizes / self._total_size
11 | 
12 |         self.pools = pools
13 | 
14 |     def add_sample(self, *args, **kwargs):
15 |         raise NotImplementedError
16 | 
17 |     def terminate_episode(self):
18 |         raise NotImplementedError
19 | 
20 |     @property
21 |     def size(self):
22 |         return self._total_size
23 | 
24 |     def add_path(self, **kwargs):
25 |         raise NotImplementedError
26 | 
27 |     def random_batch(self, batch_size):
28 | 
29 |         # TODO: Hack
30 |         partial_batch_sizes = self._normalized_pool_sizes * batch_size
31 |         partial_batch_sizes = partial_batch_sizes.astype(int)
32 |         partial_batch_sizes[0] = batch_size - sum(partial_batch_sizes[1:])
33 | 
34 |         partial_batches = [
35 |             pool.random_batch(partial_batch_size) for pool,
36 |             partial_batch_size in zip(self.pools, partial_batch_sizes)
37 |         ]
38 | 
39 |         def all_values(key):
40 |             return [partial_batch[key] for partial_batch in partial_batches]
41 | 
42 |         keys = partial_batches[0].keys()
43 | 
44 |         return {key: np.concatenate(all_values(key), axis=0) for key in keys}
45 | 


--------------------------------------------------------------------------------
/softlearning/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from softlearning.utils.serialization import (
 2 |     serialize_softlearning_object, deserialize_softlearning_object)
 3 | 
 4 | from .base_sampler import BaseSampler  # noqa: unused-import
 5 | from .dummy_sampler import DummySampler  # noqa: unused-import
 6 | from .simple_sampler import SimpleSampler  # noqa: unused-import
 7 | from .remote_sampler import RemoteSampler  # noqa: unused-import
 8 | from .utils import rollout, rollouts  # noqa: unused-import
 9 | 
10 | 
11 | def serialize(sampler):
12 |     return serialize_softlearning_object(sampler)
13 | 
14 | 
15 | def deserialize(name, custom_objects=None):
16 |     """Returns a sampler function or class denoted by input string.
17 | 
18 |     Arguments:
19 |         name : String
20 | 
21 |     Returns:
22 |         Sampler function or class denoted by input string.
23 | 
24 |     For example:
25 |     >>> softlearning.samplers.get({'class_name': 'SimpleSampler', ...})
26 |       <softlearning.samplers.simple_sampler.SimpleSampler object at 0x7fea93d6cdd0>
27 |     >>> softlearning.samplers.get('abcd')
28 |       Traceback (most recent call last):
29 |       ...
30 |       ValueError: Unknown sampler: abcd
31 | 
32 |     Args:
33 |       name: The name of the sampler.
34 | 
35 |     Raises:
36 |         ValueError: `Unknown sampler` if the input string does not
37 |         denote any defined sampler.
38 |     """
39 |     return deserialize_softlearning_object(
40 |         name,
41 |         module_objects=globals(),
42 |         custom_objects=custom_objects,
43 |         printable_module_name='sampler')
44 | 
45 | 
46 | def get(identifier):
47 |     """Returns a sampler.
48 | 
49 |     Arguments:
50 |         identifier: function, string, or dict.
51 | 
52 |     Returns:
53 |         A sampler denoted by identifier.
54 | 
55 |     For example:
56 |     >>> softlearning.samplers.get({'class_name': 'SimpleSampler', ...})
57 |       <softlearning.samplers.simple_sampler.SimpleSampler object at 0x7fea93d6cdd0>
58 |     >>> softlearning.samplers.get('abcd')
59 |       Traceback (most recent call last):
60 |       ...
61 |       ValueError: Unknown sampler: abcd
62 | 
63 |     Raises:
64 |         ValueError: Input is an unknown function or string, i.e., the
65 |         identifier does not denote any defined sampler.
66 |     """
67 |     if identifier is None:
68 |         return None
69 |     if isinstance(identifier, str):
70 |         return deserialize(identifier)
71 |     elif isinstance(identifier, dict):
72 |         return deserialize(identifier)
73 |     elif callable(identifier):
74 |         return identifier
75 |     else:
76 |         raise TypeError(
77 |             f"Could not interpret sampler function identifier:"
78 |             " {repr(identifier)}.")
79 | 


--------------------------------------------------------------------------------
/softlearning/samplers/base_sampler.py:
--------------------------------------------------------------------------------
 1 | from collections import deque, OrderedDict
 2 | from itertools import islice
 3 | 
 4 | 
 5 | class BaseSampler(object):
 6 |     def __init__(self,
 7 |                  max_path_length,
 8 |                  environment=None,
 9 |                  policy=None,
10 |                  pool=None,
11 |                  store_last_n_paths=10):
12 |         self._max_path_length = max_path_length
13 |         self._store_last_n_paths = store_last_n_paths
14 |         self._last_n_paths = deque(maxlen=store_last_n_paths)
15 | 
16 |         self.environment = environment
17 |         self.policy = policy
18 |         self.pool = pool
19 | 
20 |     def initialize(self, environment, policy, pool):
21 |         self.environment = environment
22 |         self.policy = policy
23 |         self.pool = pool
24 | 
25 |     def reset(self):
26 |         pass
27 | 
28 |     def set_policy(self, policy):
29 |         self.policy = policy
30 | 
31 |     def clear_last_n_paths(self):
32 |         self._last_n_paths.clear()
33 | 
34 |     def get_last_n_paths(self, n=None):
35 |         if n is None:
36 |             n = self._store_last_n_paths
37 | 
38 |         last_n_paths = tuple(islice(self._last_n_paths, None, n))
39 | 
40 |         return last_n_paths
41 | 
42 |     def sample(self):
43 |         raise NotImplementedError
44 | 
45 |     def terminate(self):
46 |         self.environment.close()
47 | 
48 |     def get_diagnostics(self):
49 |         diagnostics = OrderedDict({'pool-size': self.pool.size})
50 |         return diagnostics
51 | 
52 |     def __getstate__(self):
53 |         state = {
54 |             key: value for key, value in self.__dict__.items()
55 |             if key not in (
56 |                     'environment',
57 |                     'policy',
58 |                     'pool',
59 |                     '_last_n_paths',
60 |                     '_current_observation',
61 |                     '_current_path',
62 |                     '_is_first_step',
63 |             )
64 |         }
65 | 
66 |         return state
67 | 
68 |     def __setstate__(self, state):
69 |         self.__dict__.update(state)
70 | 
71 |         self.environment = None
72 |         self.policy = None
73 |         self.pool = None
74 |         # TODO(hartikainen): Maybe try restoring these from the pool?
75 |         self._last_n_paths = deque(maxlen=self._store_last_n_paths)
76 | 


--------------------------------------------------------------------------------
/softlearning/samplers/dummy_sampler.py:
--------------------------------------------------------------------------------
1 | from .base_sampler import BaseSampler
2 | 
3 | 
4 | class DummySampler(BaseSampler):
5 |     def sample(self):
6 |         pass
7 | 


--------------------------------------------------------------------------------
/softlearning/samplers/goal_sampler.py:
--------------------------------------------------------------------------------
 1 | from .simple_sampler import SimpleSampler
 2 | 
 3 | 
 4 | class GoalSampler(SimpleSampler):
 5 |     @property
 6 |     def _policy_input(self):
 7 |         observation = super(GoalSampler, self)._action_input
 8 |         goal = {
 9 |             key: self._current_observation[key]
10 |             for key in self.policy.goal_keys
11 |         }
12 | 
13 |         return (observation, goal)
14 | 
15 |     def _process_sample(self,
16 |                         observation,
17 |                         action,
18 |                         reward,
19 |                         terminal,
20 |                         next_observation,
21 |                         info):
22 |         full_observation = observation.copy()
23 |         observation = {
24 |             key: full_observation[key]
25 |             for key, value in self.policy.observation_keys
26 |         }
27 |         goal = {
28 |             key: full_observation[key]
29 |             for key, value in self.policy.goal_keys
30 |         }
31 |         processed_observation = {
32 |             'observations': observation,
33 |             'actions': action,
34 |             'rewards': [reward],
35 |             'terminals': [terminal],
36 |             'next_observations': next_observation,
37 |             'goals': goal,
38 |             'infos': info,
39 |         }
40 | 
41 |         return processed_observation
42 | 


--------------------------------------------------------------------------------
/softlearning/samplers/remote_sampler.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | from collections import OrderedDict
  3 | 
  4 | import ray
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | 
  8 | 
  9 | from .base_sampler import BaseSampler
 10 | from .utils import rollout
 11 | 
 12 | 
 13 | class RemoteSampler(BaseSampler):
 14 |     def __init__(self, **kwargs):
 15 |         raise NotImplementedError(
 16 |             "TODO(hartikainen): There's a bug here that causes tf to end up in"
 17 |             " a RecursionError. This should be fixed/refactored before usage.")
 18 |         super(RemoteSampler, self).__init__(**kwargs)
 19 | 
 20 |         self._remote_environment = None
 21 |         self._remote_path = None
 22 |         self._n_episodes = 0
 23 |         self._total_samples = 0
 24 |         self._last_path_return = 0
 25 |         self._max_path_return = -np.inf
 26 | 
 27 |     def _create_remote_environment(self, env, policy):
 28 |         env_pkl = pickle.dumps(env)
 29 |         policy_pkl = pickle.dumps(policy)
 30 | 
 31 |         if not ray.is_initialized():
 32 |             ray.init()
 33 | 
 34 |         self._remote_environment = _RemoteEnv.remote(env_pkl, policy_pkl)
 35 | 
 36 |         # Block until the env and policy is ready
 37 |         initialized = ray.get(self._remote_environment.initialized.remote())
 38 |         assert initialized, initialized
 39 | 
 40 |     def initialize(self, environment, policy, pool):
 41 |         super(RemoteSampler, self).initialize(environment, policy, pool)
 42 |         self._create_remote_environment(environment, policy)
 43 | 
 44 |     def wait_for_path(self, timeout=1):
 45 |         if self._remote_path is None:
 46 |             return [True]
 47 | 
 48 |         path_ready, _ = ray.wait([self._remote_path], timeout=timeout)
 49 |         return path_ready
 50 | 
 51 |     def sample(self, timeout=0):
 52 |         if self._remote_path is None:
 53 |             policy_params = self.policy.get_weights()
 54 |             self._remote_path = self._remote_environment.rollout.remote(
 55 |                 policy_params, self._max_path_length)
 56 | 
 57 |         path_ready = self.wait_for_path(timeout=timeout)
 58 | 
 59 |         if len(path_ready) or not self.batch_ready():
 60 |             path_samples = ray.get(self._remote_path)
 61 |             self._last_n_paths.appendleft(path_samples)
 62 | 
 63 |             self.pool.add_samples({
 64 |                 key: value
 65 |                 for key, value in path_samples.items()
 66 |                 if key != 'infos'
 67 |             })
 68 | 
 69 |             self._remote_path = None
 70 |             self._total_samples += path_samples['rewards'].shape[0]
 71 |             self._last_path_return = np.sum(path_samples['rewards'])
 72 |             self._max_path_return = max(self._max_path_return,
 73 |                                         self._last_path_return)
 74 |             self._n_episodes += 1
 75 | 
 76 |     def get_diagnostics(self):
 77 |         diagnostics = OrderedDict({
 78 |             'max-path-return': self._max_path_return,
 79 |             'last-path-return': self._last_path_return,
 80 |             'pool-size': self.pool.size,
 81 |             'episodes': self._n_episodes,
 82 |             'total-samples': self._total_samples,
 83 |         })
 84 | 
 85 |         return diagnostics
 86 | 
 87 |     def __getstate__(self):
 88 |         super_state = super(RemoteSampler, self).__getstate__()
 89 |         state = {
 90 |             key: value for key, value in super_state.items()
 91 |             if key not in ('_remote_environment', '_remote_path')
 92 |         }
 93 | 
 94 |         return state
 95 | 
 96 |     def __setstate__(self, state):
 97 |         super(RemoteSampler, self).__setstate__(state)
 98 |         self._remote_path = None
 99 | 
100 | 
101 | @ray.remote
102 | class _RemoteEnv(object):
103 |     def __init__(self, env_pkl, policy_pkl):
104 |         gpu_options = tf.GPUOptions(allow_growth=True)
105 |         self._session = tf.Session(
106 |             config=tf.ConfigProto(gpu_options=gpu_options))
107 |         tf.compat.v1.keras.backend.set_session(self._session)
108 | 
109 |         self._env = pickle.loads(env_pkl)
110 |         self._policy = pickle.loads(policy_pkl)
111 | 
112 |         if hasattr(self._env, 'initialize'):
113 |             self._env.initialize()
114 | 
115 |         self._initialized = True
116 | 
117 |     def initialized(self):
118 |         return self._initialized
119 | 
120 |     def rollout(self, policy_weights, path_length):
121 |         self._policy.set_weights(policy_weights)
122 |         path = rollout(self._env, self._policy, path_length)
123 | 
124 |         return path
125 | 


--------------------------------------------------------------------------------
/softlearning/samplers/remote_sampler_test.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import unittest
 3 | import pytest
 4 | 
 5 | from softlearning.environments.utils import get_environment
 6 | from softlearning.samplers.remote_sampler import RemoteSampler
 7 | from softlearning.replay_pools.simple_replay_pool import SimpleReplayPool
 8 | from softlearning import policies
 9 | 
10 | 
11 | @pytest.mark.skip(reason="RemoteSampler is currently broken.")
12 | class RemoteSamplerTest(unittest.TestCase):
13 |     def setUp(self):
14 |         self.env = get_environment('gym', 'Swimmer', 'v3', {})
15 |         self.policy = policies.ContinuousUniformPolicy(
16 |             action_range=(
17 |                 self.env.action_space.low,
18 |                 self.env.action_space.high,
19 |             ),
20 |             input_shapes=self.env.observation_shape,
21 |             output_shape=self.env.action_shape,
22 |             observation_keys=self.env.observation_keys)
23 |         self.pool = SimpleReplayPool(max_size=100, environment=self.env)
24 |         self.remote_sampler = RemoteSampler(max_path_length=10)
25 | 
26 |     def test_initialization(self):
27 |         self.assertEqual(self.pool.size, 0)
28 |         self.remote_sampler.initialize(self.env, self.policy, self.pool)
29 |         self.remote_sampler.sample(timeout=10)
30 |         self.assertEqual(self.pool.size, 10)
31 | 
32 |     def test_serialize_deserialize(self):
33 |         self.assertEqual(self.pool.size, 0)
34 | 
35 |         self.remote_sampler.initialize(self.env, self.policy, self.pool)
36 | 
37 |         self.remote_sampler.sample()
38 | 
39 |         deserialized = pickle.loads(pickle.dumps(self.remote_sampler))
40 |         deserialized.initialize(self.env, self.policy, self.pool)
41 | 
42 |         self.assertEqual(self.pool.size, 10)
43 | 
44 |         self.remote_sampler.sample(timeout=10)
45 |         self.assertEqual(self.pool.size, 20)
46 | 
47 |         deserialized = pickle.loads(pickle.dumps(self.remote_sampler))
48 |         deserialized.initialize(self.env, self.policy, self.pool)
49 | 
50 |         self.assertTrue(isinstance(
51 |             deserialized.environment, type(self.remote_sampler.environment)))
52 |         self.assertEqual(
53 |             self.remote_sampler._n_episodes, deserialized._n_episodes)
54 |         self.assertEqual(
55 |             self.remote_sampler._max_path_return,
56 |             deserialized._max_path_return)
57 |         self.assertEqual(
58 |             self.remote_sampler._last_path_return,
59 |             deserialized._last_path_return)
60 |         self.assertEqual(
61 |             len(self.remote_sampler._last_n_paths),
62 |             len(deserialized._last_n_paths))
63 | 
64 |         self.remote_sampler.sample(timeout=10)
65 |         deserialized.sample(timeout=10)
66 | 
67 |         self.assertEqual(
68 |             self.remote_sampler._n_episodes, deserialized._n_episodes)
69 |         self.assertNotEqual(
70 |             self.remote_sampler._last_path_return,
71 |             deserialized._last_path_return)
72 |         self.assertEqual(
73 |             len(self.remote_sampler._last_n_paths),
74 |             len(deserialized._last_n_paths))
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     unittest.main()
79 | 


--------------------------------------------------------------------------------
/softlearning/samplers/simple_sampler.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | 
  3 | import numpy as np
  4 | import tree
  5 | 
  6 | from .base_sampler import BaseSampler
  7 | 
  8 | 
  9 | class SimpleSampler(BaseSampler):
 10 |     def __init__(self, **kwargs):
 11 |         super(SimpleSampler, self).__init__(**kwargs)
 12 | 
 13 |         self._last_path_return = 0
 14 |         self._max_path_return = -np.inf
 15 |         self._n_episodes = 0
 16 |         self._total_samples = 0
 17 | 
 18 |         self._is_first_step = True
 19 | 
 20 |     def reset(self):
 21 |         if self.policy is not None:
 22 |             self.policy.reset()
 23 | 
 24 |         self._path_length = 0
 25 |         self._path_return = 0
 26 |         self._current_path = []
 27 |         self._current_observation = self.environment.reset()
 28 | 
 29 |     @property
 30 |     def _policy_input(self):
 31 |         return self._current_observation
 32 | 
 33 |     def _process_sample(self,
 34 |                         observation,
 35 |                         action,
 36 |                         reward,
 37 |                         terminal,
 38 |                         next_observation,
 39 |                         info):
 40 |         processed_observation = {
 41 |             'observations': observation,
 42 |             'actions': action,
 43 |             'rewards': np.atleast_1d(reward),
 44 |             'terminals': np.atleast_1d(terminal),
 45 |             'next_observations': next_observation,
 46 |             'infos': info,
 47 |         }
 48 | 
 49 |         return processed_observation
 50 | 
 51 |     def sample(self):
 52 |         if self._is_first_step:
 53 |             self.reset()
 54 | 
 55 |         action = self.policy.action(self._policy_input).numpy()
 56 | 
 57 |         next_observation, reward, terminal, info = self.environment.step(
 58 |             action)
 59 |         self._path_length += 1
 60 |         self._path_return += reward
 61 |         self._total_samples += 1
 62 | 
 63 |         processed_sample = self._process_sample(
 64 |             observation=self._current_observation,
 65 |             action=action,
 66 |             reward=reward,
 67 |             terminal=terminal,
 68 |             next_observation=next_observation,
 69 |             info=info,
 70 |         )
 71 | 
 72 |         self._current_path.append(processed_sample)
 73 | 
 74 |         if terminal or self._path_length >= self._max_path_length:
 75 |             last_path = tree.map_structure(
 76 |                 lambda *x: np.stack(x, axis=0), *self._current_path)
 77 | 
 78 |             self.pool.add_path({
 79 |                 key: value
 80 |                 for key, value in last_path.items()
 81 |                 if key != 'infos'
 82 |             })
 83 | 
 84 |             self._last_n_paths.appendleft(last_path)
 85 | 
 86 |             self._max_path_return = max(self._max_path_return,
 87 |                                         self._path_return)
 88 |             self._last_path_return = self._path_return
 89 |             self._n_episodes += 1
 90 | 
 91 |             self.pool.terminate_episode()
 92 | 
 93 |             self._is_first_step = True
 94 |             # Reset is done in the beginning of next episode, see above.
 95 | 
 96 |         else:
 97 |             self._current_observation = next_observation
 98 |             self._is_first_step = False
 99 | 
100 |         return next_observation, reward, terminal, info
101 | 
102 |     def get_diagnostics(self):
103 |         diagnostics = super(SimpleSampler, self).get_diagnostics()
104 |         diagnostics.update({
105 |             'max-path-return': self._max_path_return,
106 |             'last-path-return': self._last_path_return,
107 |             'episodes': self._n_episodes,
108 |             'total-samples': self._total_samples,
109 |         })
110 | 
111 |         return diagnostics
112 | 


--------------------------------------------------------------------------------
/softlearning/samplers/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | import numpy as np
 4 | 
 5 | from softlearning import replay_pools
 6 | from . import simple_sampler
 7 | 
 8 | 
 9 | DEFAULT_PIXEL_RENDER_KWARGS = {
10 |     'mode': 'rgb_array',
11 |     'width': 100,
12 |     'height': 100,
13 | }
14 | 
15 | DEFAULT_HUMAN_RENDER_KWARGS = {
16 |     'mode': 'human',
17 |     'width': 500,
18 |     'height': 500,
19 | }
20 | 
21 | 
22 | def rollout(environment,
23 |             policy,
24 |             path_length,
25 |             replay_pool_class=replay_pools.SimpleReplayPool,
26 |             sampler_class=simple_sampler.SimpleSampler,
27 |             render_kwargs=None,
28 |             break_on_terminal=True):
29 |     pool = replay_pool_class(environment, max_size=path_length)
30 |     sampler = sampler_class(
31 |         environment=environment,
32 |         policy=policy,
33 |         pool=pool,
34 |         max_path_length=path_length)
35 | 
36 |     render_mode = (render_kwargs or {}).get('mode', None)
37 |     if render_mode == 'rgb_array':
38 |         render_kwargs = {
39 |             **DEFAULT_PIXEL_RENDER_KWARGS,
40 |             **render_kwargs
41 |         }
42 |     elif render_mode == 'human':
43 |         render_kwargs = {
44 |             **DEFAULT_HUMAN_RENDER_KWARGS,
45 |             **render_kwargs
46 |         }
47 |     else:
48 |         render_kwargs = None
49 | 
50 |     images = []
51 |     infos = defaultdict(list)
52 | 
53 |     t = 0
54 |     for t in range(path_length):
55 |         observation, reward, terminal, info = sampler.sample()
56 |         for key, value in info.items():
57 |             infos[key].append(value)
58 | 
59 |         if render_kwargs:
60 |             image = environment.render(**render_kwargs)
61 |             images.append(image)
62 | 
63 |         if terminal:
64 |             policy.reset()
65 |             if break_on_terminal: break
66 | 
67 |     assert pool._size == t + 1
68 | 
69 |     path = pool.batch_by_indices(np.arange(pool._size))
70 |     path['infos'] = infos
71 | 
72 |     if render_mode == 'rgb_array':
73 |         path['images'] = np.stack(images, axis=0)
74 | 
75 |     return path
76 | 
77 | 
78 | def rollouts(n_paths, *args, **kwargs):
79 |     paths = [rollout(*args, **kwargs) for i in range(n_paths)]
80 |     return paths
81 | 


--------------------------------------------------------------------------------
/softlearning/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/scripts/__init__.py


--------------------------------------------------------------------------------
/softlearning/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rail-berkeley/softlearning/13cf187cc93d90f7c217ea2845067491c3c65464/softlearning/utils/__init__.py


--------------------------------------------------------------------------------
/softlearning/utils/dict.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | 
 4 | def deep_update(d, *us):
 5 |     d = d.copy()
 6 | 
 7 |     for u in us:
 8 |         u = u.copy()
 9 |         for k, v in u.items():
10 |             d[k] = (
11 |                 deep_update(d.get(k, {}), v)
12 |                 if isinstance(v, collections.Mapping)
13 |                 else v)
14 | 
15 |     return d
16 | 


--------------------------------------------------------------------------------
/softlearning/utils/gcp.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | 
 4 | def instance_preempted():
 5 |     try:
 6 |         response = requests.get(
 7 |             "http://metadata/computeMetadata/v1/instance/preempted",
 8 |             headers={'Metadata-Flavor': 'Google'}
 9 |         )
10 |         preempted = (response.status_code == 200
11 |                      and response.text != 'FALSE')
12 |     except Exception:
13 |         preempted = False
14 | 
15 |     return preempted
16 | 


--------------------------------------------------------------------------------
/softlearning/utils/git.py:
--------------------------------------------------------------------------------
 1 | from softlearning.utils.misc import PROJECT_PATH
 2 | 
 3 | 
 4 | def get_git_rev(path=PROJECT_PATH, search_parent_directories=True):
 5 |     try:
 6 |         import git
 7 |     except ImportError:
 8 |         print(
 9 |             "Warning: gitpython not installed."
10 |             " Unable to log git rev."
11 |             " Run `pip install gitpython` if you want git revs to be logged.")
12 |         return None
13 | 
14 |     try:
15 |         repo = git.Repo(
16 |             path, search_parent_directories=search_parent_directories)
17 |         if repo.head.is_detached:
18 |             git_rev = repo.head.object.name_rev
19 |         else:
20 |             git_rev = repo.active_branch.commit.name_rev
21 |     except git.InvalidGitRepositoryError:
22 |         git_rev = None
23 | 
24 |     return git_rev
25 | 


--------------------------------------------------------------------------------
/softlearning/utils/gym.py:
--------------------------------------------------------------------------------
 1 | from gym import spaces
 2 | 
 3 | 
 4 | DISCRETE_SPACES = (
 5 |     spaces.Discrete,
 6 |     spaces.MultiBinary,
 7 |     spaces.MultiDiscrete,
 8 | )
 9 | CONTINUOUS_SPACES = (spaces.Box, )
10 | 
11 | 
12 | def is_continuous_space(space):
13 |     return isinstance(space, CONTINUOUS_SPACES)
14 | 
15 | 
16 | def is_discrete_space(space):
17 |     return isinstance(space, DISCRETE_SPACES)
18 | 


--------------------------------------------------------------------------------
/softlearning/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | 
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | 
 7 | 
 8 | PROJECT_PATH = os.path.dirname(
 9 |     os.path.realpath(os.path.join(__file__, '..', '..')))
10 | 
11 | 
12 | def set_seed(seed):
13 |     seed %= 4294967294
14 |     random.seed(seed)
15 |     np.random.seed(seed)
16 |     tf.random.set_seed(seed)
17 |     print(f"Using seed {seed}")
18 | 
19 | 
20 | def get_host_name():
21 |     try:
22 |         import socket
23 |         return socket.gethostname()
24 |     except Exception as e:
25 |         print("Failed to get host name!")
26 |         return None
27 | 


--------------------------------------------------------------------------------
/softlearning/utils/numpy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | 
4 | def softmax(x):
5 |     max_x = np.max(x)
6 |     exp_x = np.exp(x - max_x)
7 |     return exp_x / np.sum(exp_x)
8 | 


--------------------------------------------------------------------------------
/softlearning/utils/random.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | 
4 | def spherical(size=None, ndim=2):
5 |     size = np.atleast_1d(size if size is not None else ())
6 |     random_normal = np.random.standard_normal((ndim, *size))
7 |     normalized = random_normal / np.linalg.norm(random_normal, axis=0)
8 |     return normalized
9 | 


--------------------------------------------------------------------------------
/softlearning/utils/tensorflow.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tree
 3 | 
 4 | 
 5 | def set_gpu_memory_growth(growth):
 6 |     gpus = tf.config.experimental.list_physical_devices('GPU')
 7 |     if gpus:
 8 |         try:
 9 |             # Currently, memory growth needs to be the same across GPUs
10 |             for gpu in gpus:
11 |                 tf.config.experimental.set_memory_growth(gpu, growth)
12 |             logical_gpus = tf.config.experimental.list_logical_devices('GPU')
13 |             print(len(gpus), "Physical GPUs,", len(logical_gpus),
14 |                   "Logical GPUs")
15 |         except RuntimeError as e:
16 |             # Memory growth must be set before GPUs have been initialized
17 |             print(e)
18 | 
19 | 
20 | def apply_preprocessors(preprocessors, inputs):
21 |     tree.assert_same_structure(inputs, preprocessors)
22 |     preprocessed_inputs = tree.map_structure(
23 |         lambda preprocessor, input_: (
24 |             preprocessor(input_) if preprocessor is not None else input_),
25 |         preprocessors,
26 |         inputs,
27 |     )
28 | 
29 |     return preprocessed_inputs
30 | 
31 | 
32 | def cast_and_concat(x):
33 |     x = tree.map_structure(
34 |         lambda element: tf.cast(element, tf.float32), x)
35 |     x = tree.flatten(x)
36 |     x = tf.concat(x, axis=-1)
37 |     return x
38 | 


--------------------------------------------------------------------------------
/softlearning/utils/times.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | 
 4 | def datetimestamp(divider='-', datetime_divider='T'):
 5 |     now = datetime.datetime.now()
 6 |     return now.strftime(
 7 |         '%Y{d}%m{d}%dT%H{d}%M{d}%S'
 8 |         ''.format(d=divider, dtd=datetime_divider))
 9 | 
10 | 
11 | def datestamp(divider='-'):
12 |     return datetime.date.today().isoformat().replace('-', divider)
13 | 
14 | 
15 | def timestamp(divider='-'):
16 |     now = datetime.datetime.now()
17 |     time_now = datetime.datetime.time(now)
18 |     return time_now.strftime(
19 |         '%H{d}%M{d}%S'.format(d=divider))
20 | 


--------------------------------------------------------------------------------
/softlearning/utils/tune.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os
  3 | from pprint import pprint
  4 | import re
  5 | import shutil
  6 | 
  7 | 
  8 | RESULT_FILE_REGEXES = (
  9 |     "^result.json$",
 10 |     "^progress.csv$",
 11 |     "^events.out.tfevents.\\d+.\\w$",
 12 | )
 13 | 
 14 | 
 15 | PARAMS_FILE_REGEXES = (
 16 |     "^params.json$",
 17 |     "^params.pkl$",
 18 | )
 19 | 
 20 | CHECKPOINT_DIRECTORY_REGEXES = (
 21 |     "^checkpoint_\\d+$"
 22 | )
 23 | 
 24 | 
 25 | def is_result_file(filename):
 26 |     return any(
 27 |         re.match(result_file_regex, filename)
 28 |         for result_file_regex in RESULT_FILE_REGEXES)
 29 | 
 30 | 
 31 | def is_params_file(filename):
 32 |     return any(
 33 |         re.match(params_file_regex, filename)
 34 |         for params_file_regex in PARAMS_FILE_REGEXES)
 35 | 
 36 | 
 37 | def is_checkpoint_directory(dirname):
 38 |     # TODO(hartikainen): might want to check the contents of this directory.
 39 |     # e.g. check `.tune_metadata`, etc.
 40 |     return any(
 41 |         re.match(checkpoint_directory_regex, dirname)
 42 |         for checkpoint_directory_regex in CHECKPOINT_DIRECTORY_REGEXES)
 43 | 
 44 | 
 45 | def is_trial_directory(root_dir):
 46 |     if not os.path.isdir(root_dir):
 47 |         return False
 48 | 
 49 |     root, directories, files = next(os.walk(root_dir))
 50 |     # json logger: params.json, result.json, params.pkl
 51 |     # csv logger: progress.csv
 52 |     # tf logger: events.out.tfevents.1562394433.ray-hopp-2-head-4ba37bcf
 53 |     # log_syncxurz09ic.log
 54 | 
 55 |     result_files = [
 56 |         filename
 57 |         for filename in files
 58 |         if is_result_file(filename)
 59 |     ]
 60 | 
 61 |     params_files = [
 62 |         filename
 63 |         for filename in files
 64 |         if is_params_file(filename)
 65 |     ]
 66 | 
 67 |     # TODO(hartikainen): checkpoint_directories are currently unused here
 68 |     checkpoint_directories = [
 69 |         directory
 70 |         for directory in directories
 71 |         if is_checkpoint_directory(os.path.join(root, directory))
 72 |     ]
 73 | 
 74 |     # TODO(hartikainen): might want to check if "^log_sync\\d{8}.log$" exists
 75 | 
 76 |     return result_files and params_files
 77 | 
 78 | 
 79 | def is_experiment_directory(root_dir):
 80 |     if not os.path.isdir(root_dir):
 81 |         return False
 82 | 
 83 |     root, directories, files = next(os.walk(root_dir))
 84 |     # 1) experiment_state.json exists -> is experiment
 85 |     experiment_state_paths = glob.glob(
 86 |         os.path.join(root, "experiment_state*.json"))
 87 | 
 88 |     if experiment_state_paths:
 89 |         # TODO(hartikainen): This needs to be fixed. In general, a directory
 90 |         # can have multiple experiment state files. Softlearning experiment
 91 |         # directories shouldn't though.
 92 |         assert len(experiment_state_paths) == 1, experiment_state_paths
 93 |         return True
 94 | 
 95 |     # 2) All the subfolders are trials -> is experiment
 96 |     if directories and all(
 97 |             is_trial_directory(os.path.join(root, directory))
 98 |             for directory in directories):
 99 |         return True
100 | 
101 |     return False
102 | 
103 | 
104 | def find_all_experiment_directories(root_dir):
105 |     """Given a directory path, recursively find all experiment directories in it.
106 | 
107 |     TODO(hartikainen): Should maybe have an option for recursive=False?
108 |     """
109 | 
110 |     root_dir = os.path.expanduser(root_dir)
111 | 
112 |     if is_experiment_directory(root_dir):
113 |         return (root_dir, )
114 | 
115 |     directories = next(os.walk(root_dir))[1]
116 |     all_experiment_directories = sum((
117 |         find_all_experiment_directories(os.path.join(root_dir, directory))
118 |         for directory in directories
119 |     ), ())
120 | 
121 |     return all_experiment_directories
122 | 
123 | 
124 | def find_all_trial_directories(experiment_dir):
125 |     """Given a path to experiment, find all trial directories in it.
126 | 
127 |     Raises an error if given experiment path is not actually an experiment
128 |     path.
129 |     """
130 | 
131 |     assert is_experiment_directory(experiment_dir), experiment_dir
132 | 
133 |     experiment_dir = os.path.expanduser(experiment_dir)
134 |     directories = next(os.walk(experiment_dir))[1]
135 | 
136 |     all_trial_directories = [
137 |         os.path.join(experiment_dir, directory)
138 |         for directory in directories
139 |         if is_trial_directory(os.path.join(experiment_dir, directory))
140 |     ]
141 | 
142 |     return all_trial_directories
143 | 


--------------------------------------------------------------------------------
/softlearning/utils/video.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def _make_dir(filename):
 7 |     folder = os.path.dirname(filename)
 8 |     if not os.path.exists(folder):
 9 |         os.makedirs(folder)
10 | 
11 | 
12 | def save_video(video_frames, filename, fps=60, video_format='mp4'):
13 |     assert fps == int(fps), fps
14 |     import skvideo.io
15 |     _make_dir(filename)
16 | 
17 |     skvideo.io.vwrite(
18 |         filename,
19 |         video_frames,
20 |         inputdict={
21 |             '-r': str(int(fps)),
22 |         },
23 |         outputdict={
24 |             '-f': video_format,
25 |             '-pix_fmt': 'yuv420p', # '-pix_fmt=yuv420p' needed for osx https://github.com/scikit-video/scikit-video/issues/74
26 |         }
27 |     )
28 | 
29 | 
30 | def create_video_grid(col_and_row_frames):
31 |     video_grid_frames = np.concatenate([
32 |         np.concatenate(row_frames, axis=-2)
33 |         for row_frames in col_and_row_frames
34 |     ], axis=-3)
35 | 
36 |     return video_grid_frames
37 | 


--------------------------------------------------------------------------------
/softlearning/value_functions/__init__.py:
--------------------------------------------------------------------------------
 1 | from .vanilla import (  # noqa: unused-import
 2 |     feedforward_Q_function,
 3 |     double_feedforward_Q_function,
 4 |     ensemble_feedforward_Q_function,
 5 | )
 6 | 
 7 | from softlearning.utils.serialization import (
 8 |     serialize_softlearning_object, deserialize_softlearning_object)
 9 | 
10 | 
11 | def serialize(value_function):
12 |     return serialize_softlearning_object(value_function)
13 | 
14 | 
15 | def deserialize(name, custom_objects=None):
16 |     """Returns a value function or class denoted by input string.
17 | 
18 |     Arguments:
19 |         name : String
20 | 
21 |     Returns:
22 |         Value function function or class denoted by input string.
23 | 
24 |     For example:
25 |     >>> softlearning.value_functions.get('double_feedforward_Q_function')
26 |       <function double_feedforward_Q_function at 0x7f86e3691e60>
27 |     >>> softlearning.value_functions.get('abcd')
28 |       Traceback (most recent call last):
29 |       ...
30 |       ValueError: Unknown value function: abcd
31 | 
32 |     Args:
33 |       name: The name of the value function.
34 | 
35 |     Raises:
36 |         ValueError: `Unknown value function` if the input string does not
37 |         denote any defined value function.
38 |     """
39 |     return deserialize_softlearning_object(
40 |         name,
41 |         module_objects=globals(),
42 |         custom_objects=custom_objects,
43 |         printable_module_name='value function')
44 | 
45 | 
46 | def get(identifier):
47 |     """Returns a value function.
48 | 
49 |     Arguments:
50 |         identifier: function, string, or dict.
51 | 
52 |     Returns:
53 |         A value function denoted by identifier.
54 | 
55 |     For example:
56 | 
57 |     >>> softlearning.value_functions.get('double_feedforward_Q_function')
58 |       <function double_feedforward_Q_function at 0x7f86e3691e60>
59 |     >>> softlearning.value_functions.get('abcd')
60 |       Traceback (most recent call last):
61 |       ...
62 |       ValueError: Unknown value function: abcd
63 | 
64 |     Raises:
65 |         ValueError: Input is an unknown function or string, i.e., the
66 |         identifier does not denote any defined value function.
67 |     """
68 |     if identifier is None:
69 |         return None
70 |     if isinstance(identifier, str):
71 |         return deserialize(identifier)
72 |     elif isinstance(identifier, dict):
73 |         return deserialize(identifier)
74 |     elif callable(identifier):
75 |         return identifier
76 |     else:
77 |         raise TypeError(
78 |             f"Could not interpret value function function identifier:"
79 |             " {repr(identifier)}.")
80 | 


--------------------------------------------------------------------------------
/softlearning/value_functions/base_value_function.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from collections import OrderedDict
  3 | 
  4 | import tensorflow as tf
  5 | import tree
  6 | 
  7 | 
  8 | class BaseValueFunction:
  9 |     def __init__(self, model, observation_keys, name='value_function'):
 10 |         self._observation_keys = observation_keys
 11 |         self.model = model
 12 |         self._name = name
 13 | 
 14 |     @property
 15 |     def name(self):
 16 |         return self._name
 17 | 
 18 |     @property
 19 |     def observation_keys(self):
 20 |         return self._observation_keys
 21 | 
 22 |     def reset(self):
 23 |         """Reset and clean the value function."""
 24 | 
 25 |     def get_weights(self, *args, **kwargs):
 26 |         return self.model.get_weights(*args, **kwargs)
 27 | 
 28 |     def set_weights(self, *args, **kwargs):
 29 |         return self.model.set_weights(*args, **kwargs)
 30 | 
 31 |     def save_weights(self, *args, **kwargs):
 32 |         self.model.save_weights(*args, **kwargs)
 33 | 
 34 |     def load_weights(self, *args, **kwargs):
 35 |         self.model.load_weights(*args, **kwargs)
 36 | 
 37 |     @property
 38 |     def weights(self):
 39 |         """Returns the list of all policy variables/weights.
 40 | 
 41 |         Returns:
 42 |           A list of variables.
 43 |         """
 44 |         return self.trainable_weights + self.non_trainable_weights
 45 | 
 46 |     @property
 47 |     def trainable_weights(self):
 48 |         return self.model.trainable_weights
 49 | 
 50 |     @property
 51 |     def non_trainable_weights(self):
 52 |         return self.model.non_trainable_weights
 53 | 
 54 |     @property
 55 |     def variables(self):
 56 |         """Returns the list of all policy variables/weights.
 57 | 
 58 |         Alias of `self.weights`.
 59 | 
 60 |         Returns:
 61 |           A list of variables.
 62 |         """
 63 |         return self.weights
 64 | 
 65 |     @property
 66 |     def trainable_variables(self):
 67 |         return self.trainable_weights
 68 | 
 69 |     @property
 70 |     def non_trainable_variables(self):
 71 |         return self.non_trainable_weights
 72 | 
 73 |     @abc.abstractmethod
 74 |     def values(self, inputs):
 75 |         """Compute values for given inputs, (e.g. observations)."""
 76 |         raise NotImplementedError
 77 | 
 78 |     def value(self, *args, **kwargs):
 79 |         """Compute a value for a single input, (e.g. observation)."""
 80 |         args_, kwargs_ = tree.map_structure(
 81 |             lambda x: x[None, ...], (args, kwargs))
 82 |         values = self.values(*args_, **kwargs_)
 83 |         value = tree.map_structure(lambda x: x[0], values)
 84 |         return value
 85 | 
 86 |     def _filter_observations(self, observations):
 87 |         if (isinstance(observations, dict)
 88 |             and self._observation_keys is not None):
 89 |             observations = type(observations)((
 90 |                 (key, observations[key])
 91 |                 for key in self.observation_keys
 92 |             ))
 93 |         return observations
 94 | 
 95 |     def get_diagnostics(self, *inputs):
 96 |         """Return loggable diagnostic information of the value function."""
 97 |         diagnostics = OrderedDict()
 98 |         return diagnostics
 99 | 
100 |     def __getstate__(self):
101 |         state = self.__dict__.copy()
102 |         model = state.pop('model')
103 |         state.update({
104 |             'model_config': model.get_config(),
105 |             'model_weights': model.get_weights(),
106 |         })
107 |         return state
108 | 
109 |     def __setstate__(self, state):
110 |         model_config = state.pop('model_config')
111 |         model_weights = state.pop('model_weights')
112 |         model = tf.keras.Model.from_config(model_config)
113 |         model.set_weights(model_weights)
114 |         state['model'] = model
115 |         self.__dict__ = state
116 | 
117 | 
118 | class StateValueFunction(BaseValueFunction):
119 |     def values(self, observations, **kwargs):
120 |         """Compute values given observations."""
121 |         observations = self._filter_observations(observations)
122 |         values = self.model(observations, **kwargs)
123 |         return values
124 | 
125 | 
126 | class StateActionValueFunction(BaseValueFunction):
127 |     def values(self, observations, actions, **kwargs):
128 |         """Compute values given observations."""
129 |         observations = self._filter_observations(observations)
130 |         values = self.model((observations, actions), **kwargs)
131 |         return values
132 | 


--------------------------------------------------------------------------------
/softlearning/value_functions/base_value_function_test.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | from collections import OrderedDict
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import tree
  7 | 
  8 | from softlearning.value_functions.vanilla import feedforward_Q_function
  9 | from softlearning.environments.utils import get_environment
 10 | 
 11 | 
 12 | class ValueFunctionTest(tf.test.TestCase):
 13 |     def setUp(self):
 14 |         self.env = get_environment('gym', 'Swimmer', 'v3', {})
 15 |         self.hidden_layer_sizes = (8, 8)
 16 | 
 17 |         observation_shapes = OrderedDict((
 18 |             (key, value) for key, value in self.env.observation_shape.items()
 19 |         ))
 20 |         action_shape = self.env.action_shape
 21 |         input_shapes = (observation_shapes, action_shape)
 22 |         self.value_function = feedforward_Q_function(
 23 |             input_shapes=input_shapes,
 24 |             hidden_layer_sizes=self.hidden_layer_sizes,
 25 |         )
 26 | 
 27 |     def test_values(self):
 28 |         _ = self.env.reset()
 29 |         action1_np = self.env.action_space.sample()
 30 |         observation1_np = self.env.step(action1_np)[0]
 31 |         action2_np = self.env.action_space.sample()
 32 |         observation2_np = self.env.step(action2_np)[0]
 33 | 
 34 |         observations_np = type(observation1_np)((
 35 |             (key, np.stack((
 36 |                 observation1_np[key], observation2_np[key]
 37 |             ), axis=0).astype(np.float32))
 38 |             for key in observation1_np.keys()
 39 |         ))
 40 | 
 41 |         actions_np = np.stack((
 42 |             action1_np, action2_np
 43 |         ), axis=0).astype(np.float32)
 44 | 
 45 |         observations_tf = tree.map_structure(
 46 |             lambda x: tf.constant(x, dtype=x.dtype), observations_np)
 47 |         actions_tf = tree.map_structure(
 48 |             lambda x: tf.constant(x, dtype=x.dtype), actions_np)
 49 | 
 50 |         for observations, actions in (
 51 |                 (observations_np, actions_np),
 52 |                 (observations_tf, actions_tf)):
 53 |             values = self.value_function.values(observations, actions)
 54 | 
 55 |             tf.debugging.assert_shapes(((values, (2, 1)),))
 56 | 
 57 |     def test_trainable_variables(self):
 58 |         self.assertEqual(
 59 |             len(self.value_function.trainable_variables),
 60 |             2 * (len(self.hidden_layer_sizes) + 1))
 61 | 
 62 |     def test_get_diagnostics(self):
 63 |         _ = self.env.reset()
 64 |         action1 = self.env.action_space.sample()
 65 |         observation1 = self.env.step(action1)[0]
 66 |         action2 = self.env.action_space.sample()
 67 |         observation2 = self.env.step(action2)[0]
 68 | 
 69 |         observations = type(observation1)((
 70 |             (key, np.stack((
 71 |                 observation1[key], observation2[key]
 72 |             ), axis=0).astype(np.float32))
 73 |             for key in observation1.keys()
 74 |         ))
 75 | 
 76 |         actions = np.stack((
 77 |             action1, action2
 78 |         ), axis=0).astype(np.float32)
 79 | 
 80 |         diagnostics = self.value_function.get_diagnostics(
 81 |             observations, actions)
 82 | 
 83 |         self.assertTrue(isinstance(diagnostics, OrderedDict))
 84 |         self.assertEqual(tuple(diagnostics.keys()), ())
 85 | 
 86 |         for value in diagnostics.values():
 87 |             self.assertTrue(np.isscalar(value))
 88 | 
 89 |     def test_serialize_deserialize(self):
 90 |         _ = self.env.reset()
 91 |         action1_np = self.env.action_space.sample()
 92 |         observation1_np = self.env.step(action1_np)[0]
 93 |         action2_np = self.env.action_space.sample()
 94 |         observation2_np = self.env.step(action2_np)[0]
 95 | 
 96 |         observations = type(observation1_np)((
 97 |             (key, np.stack((
 98 |                 observation1_np[key], observation2_np[key]
 99 |             ), axis=0).astype(np.float32))
100 |             for key in observation1_np.keys()
101 |         ))
102 | 
103 |         actions = np.stack((
104 |             action1_np, action2_np
105 |         ), axis=0).astype(np.float32)
106 | 
107 |         weights_1 = self.value_function.get_weights()
108 | 
109 |         values_1 = self.value_function.values(observations, actions).numpy()
110 | 
111 |         serialized = pickle.dumps(self.value_function)
112 |         deserialized = pickle.loads(serialized)
113 | 
114 |         weights_2 = deserialized.get_weights()
115 |         values_2 = deserialized.values(observations, actions).numpy()
116 | 
117 |         for weight_1, weight_2 in zip(weights_1, weights_2):
118 |             np.testing.assert_array_equal(weight_1, weight_2)
119 | 
120 |         np.testing.assert_array_equal(values_1, values_2)
121 | 
122 | 
123 | if __name__ == '__main__':
124 |     tf.test.main()
125 | 


--------------------------------------------------------------------------------
/softlearning/value_functions/vanilla.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tree
 3 | 
 4 | from softlearning.models.feedforward import feedforward_model
 5 | from softlearning.models.utils import create_inputs
 6 | from softlearning.utils.tensorflow import apply_preprocessors
 7 | from softlearning import preprocessors as preprocessors_lib
 8 | from softlearning.utils.tensorflow import cast_and_concat
 9 | 
10 | from .base_value_function import StateActionValueFunction
11 | 
12 | 
13 | def create_ensemble_value_function(N, value_fn, *args, **kwargs):
14 |     # TODO(hartikainen): The ensemble Q-function should support the same
15 |     # interface as the regular ones. Implement the double min-thing
16 |     # as a Keras layer.
17 |     value_fns = tuple(value_fn(*args, **kwargs) for i in range(N))
18 |     return value_fns
19 | 
20 | 
21 | def double_feedforward_Q_function(*args, **kwargs):
22 |     return create_ensemble_value_function(
23 |         2, feedforward_Q_function, *args, **kwargs)
24 | 
25 | 
26 | def ensemble_feedforward_Q_function(N, *args, **kwargs):
27 |     return create_ensemble_value_function(
28 |         N, feedforward_Q_function, *args, **kwargs)
29 | 
30 | 
31 | def feedforward_Q_function(input_shapes,
32 |                            *args,
33 |                            preprocessors=None,
34 |                            observation_keys=None,
35 |                            name='feedforward_Q',
36 |                            **kwargs):
37 |     inputs = create_inputs(input_shapes)
38 | 
39 |     if preprocessors is None:
40 |         preprocessors = tree.map_structure(lambda _: None, inputs)
41 | 
42 |     preprocessors = tree.map_structure_up_to(
43 |         inputs, preprocessors_lib.deserialize, preprocessors)
44 | 
45 |     preprocessed_inputs = apply_preprocessors(preprocessors, inputs)
46 | 
47 |     # NOTE(hartikainen): `feedforward_model` would do the `cast_and_concat`
48 |     # step for us, but tf2.2 broke the sequential multi-input handling: See:
49 |     # https://github.com/tensorflow/tensorflow/issues/37061.
50 |     out = tf.keras.layers.Lambda(cast_and_concat)(preprocessed_inputs)
51 |     Q_model_body = feedforward_model(
52 |         *args,
53 |         output_shape=[1],
54 |         name=name,
55 |         **kwargs
56 |     )
57 | 
58 |     Q_model = tf.keras.Model(inputs, Q_model_body(out), name=name)
59 | 
60 |     Q_function = StateActionValueFunction(
61 |         model=Q_model, observation_keys=observation_keys, name=name)
62 | 
63 |     return Q_function
64 | 


--------------------------------------------------------------------------------