├── .github └── workflows │ ├── robobase.yml │ └── robobase_integration.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── README.md ├── extra_install_scripts └── install_coppeliasim.sh ├── robobase ├── __init__.py ├── cfgs │ ├── __init__.py │ ├── env │ │ ├── bigym.yaml │ │ ├── bigym │ │ │ ├── cupboards_close_all.yaml │ │ │ ├── cupboards_open_all.yaml │ │ │ ├── dishwasher_close.yaml │ │ │ ├── dishwasher_close_trays.yaml │ │ │ ├── dishwasher_load_cups.yaml │ │ │ ├── dishwasher_load_cutlery.yaml │ │ │ ├── dishwasher_load_plates.yaml │ │ │ ├── dishwasher_open.yaml │ │ │ ├── dishwasher_open_trays.yaml │ │ │ ├── dishwasher_unload_cups.yaml │ │ │ ├── dishwasher_unload_cups_long.yaml │ │ │ ├── dishwasher_unload_cutlery.yaml │ │ │ ├── dishwasher_unload_cutlery_long.yaml │ │ │ ├── dishwasher_unload_plates.yaml │ │ │ ├── dishwasher_unload_plates_long.yaml │ │ │ ├── drawer_top_close.yaml │ │ │ ├── drawer_top_open.yaml │ │ │ ├── drawers_close_all.yaml │ │ │ ├── drawers_open_all.yaml │ │ │ ├── flip_cup.yaml │ │ │ ├── flip_cutlery.yaml │ │ │ ├── move_plate.yaml │ │ │ ├── move_two_plates.yaml │ │ │ ├── pick_box.yaml │ │ │ ├── put_cups.yaml │ │ │ ├── reach_target_dual.yaml │ │ │ ├── reach_target_multi_modal.yaml │ │ │ ├── reach_target_single.yaml │ │ │ ├── sandwich_flip.yaml │ │ │ ├── sandwich_remove.yaml │ │ │ ├── sandwich_toast.yaml │ │ │ ├── saucepan_to_hob.yaml │ │ │ ├── stack_blocks.yaml │ │ │ ├── store_box.yaml │ │ │ ├── store_groceries_lower.yaml │ │ │ ├── store_groceries_upper.yaml │ │ │ ├── store_kitchenware.yaml │ │ │ ├── take_cups.yaml │ │ │ ├── wall_cupboard_close.yaml │ │ │ └── wall_cupboard_open.yaml │ │ ├── d4rl.yaml │ │ ├── d4rl │ │ │ ├── ant.yaml │ │ │ ├── antmaze.yaml │ │ │ ├── halfcheetah.yaml │ │ │ ├── hopper.yaml │ │ │ └── walker2d.yaml │ │ ├── dmc.yaml │ │ ├── dmc │ │ │ ├── acrobot_swingup.yaml │ │ │ ├── cartpole_balance.yaml │ │ │ ├── cartpole_balance_sparse.yaml │ │ │ ├── cartpole_swingup.yaml │ │ │ ├── cartpole_swingup_sparse.yaml │ │ │ ├── cheetah_run.yaml │ │ │ ├── cup_catch.yaml │ │ │ ├── easy.yaml │ │ │ ├── finger_spin.yaml │ │ │ ├── finger_turn_easy.yaml │ │ │ ├── finger_turn_hard.yaml │ │ │ ├── hard.yaml │ │ │ ├── hopper_hop.yaml │ │ │ ├── hopper_stand.yaml │ │ │ ├── humanoid_run.yaml │ │ │ ├── humanoid_stand.yaml │ │ │ ├── humanoid_walk.yaml │ │ │ ├── lift_brick.yaml │ │ │ ├── medium.yaml │ │ │ ├── pendulum_swingup.yaml │ │ │ ├── quadruped_run.yaml │ │ │ ├── quadruped_walk.yaml │ │ │ ├── reach_duplo.yaml │ │ │ ├── reacher_easy.yaml │ │ │ ├── reacher_hard.yaml │ │ │ ├── stacker_stack_2.yaml │ │ │ ├── walker_run.yaml │ │ │ ├── walker_stand.yaml │ │ │ └── walker_walk.yaml │ │ ├── rlbench.yaml │ │ └── rlbench │ │ │ ├── basketball_in_hoop.yaml │ │ │ ├── easy.yaml │ │ │ ├── episode_length │ │ │ ├── cqn.yaml │ │ │ └── default.yaml │ │ │ ├── insert_usb_in_computer.yaml │ │ │ ├── lamp_on.yaml │ │ │ ├── meat_on_grill.yaml │ │ │ ├── open_door.yaml │ │ │ ├── open_drawer.yaml │ │ │ ├── open_microwave.yaml │ │ │ ├── open_oven.yaml │ │ │ ├── phone_on_base.yaml │ │ │ ├── pick_up_cup.yaml │ │ │ ├── press_switch.yaml │ │ │ ├── put_books_on_bookshelf.yaml │ │ │ ├── put_item_in_drawer.yaml │ │ │ ├── put_money_in_safe.yaml │ │ │ ├── put_rubbish_in_bin.yaml │ │ │ ├── reach_target.yaml │ │ │ ├── slide_block_to_target.yaml │ │ │ ├── stack_wine.yaml │ │ │ ├── sweep_to_dustpan.yaml │ │ │ ├── take_lid_off_saucepan.yaml │ │ │ ├── take_plate_off_colored_dish_rack.yaml │ │ │ ├── toilet_seat_up.yaml │ │ │ └── turn_tap.yaml │ ├── intrinsic_reward_module │ │ ├── icm.yaml │ │ └── rnd.yaml │ ├── launch │ │ ├── act_pixel_bigym.yaml │ │ ├── act_pixel_rlbench.yaml │ │ ├── alix_pixel_dmc.yaml │ │ ├── cqn_pixel_dmc.yaml │ │ ├── cqn_pixel_rlbench_demo_driven.yaml │ │ ├── cqn_state_dmc.yaml │ │ ├── cqn_state_rlbench_demo_driven.yaml │ │ ├── dp_pixel_bigym.yaml │ │ ├── dreamerv3_pixel_dmc.yaml │ │ ├── drm_pixel_dmc.yaml │ │ ├── drqv2.yaml │ │ ├── drqv2_dist_critic.yaml │ │ ├── drqv2_pixel_dmc.yaml │ │ ├── drqv2plus_pixel_rlbench_demo_driven.yaml │ │ ├── edp_d4rl.yaml │ │ ├── iql_drqv2.yaml │ │ ├── mwm.yaml │ │ ├── mwm_pixel_dmc.yaml │ │ └── sac_lix_pixel_dmc.yaml │ ├── method │ │ ├── act.yaml │ │ ├── alix.yaml │ │ ├── bc.yaml │ │ ├── cqn.yaml │ │ ├── diffusion.yaml │ │ ├── dreamerv3.yaml │ │ ├── drm.yaml │ │ ├── drqv2.yaml │ │ ├── edp.yaml │ │ ├── iql_drqv2.yaml │ │ ├── mwm.yaml │ │ ├── sac_lix.yaml │ │ └── value_based.yaml │ └── robobase_config.yaml ├── envs │ ├── __init__.py │ ├── bigym.py │ ├── d4rl.py │ ├── dmc.py │ ├── env.py │ ├── rlbench.py │ ├── utils │ │ └── bigym_utils.py │ └── wrappers │ │ ├── __init__.py │ │ ├── action_sequence.py │ │ ├── append_demo_info.py │ │ ├── concat_dim.py │ │ ├── frame_stack.py │ │ ├── onehot_time.py │ │ ├── rescale_from_tanh.py │ │ ├── reward_modifiers.py │ │ └── transpose_image_chw.py ├── intrinsic_reward_module │ ├── __init__.py │ ├── core.py │ ├── icm.py │ ├── rnd.py │ └── utils.py ├── logger.py ├── method │ ├── __init__.py │ ├── act.py │ ├── actor_critic.py │ ├── alix.py │ ├── bc.py │ ├── core.py │ ├── cqn.py │ ├── diffusion.py │ ├── dreamerv3.py │ ├── drm.py │ ├── drqv2.py │ ├── edp.py │ ├── iql_drqv2.py │ ├── mwm.py │ ├── sac_lix.py │ ├── utils.py │ └── value_based.py ├── models │ ├── __init__.py │ ├── act │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── position_encoding.py │ │ ├── transformer.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── misc.py │ │ │ └── resnet_film.py │ ├── core.py │ ├── decoder.py │ ├── diffusion_models.py │ ├── encoder.py │ ├── fully_connected.py │ ├── fusion.py │ ├── lix_utils │ │ ├── __init__.py │ │ ├── analysis_custom_autograd_functions.py │ │ ├── analysis_layers.py │ │ ├── analysis_modules.py │ │ ├── analysis_optimizers.py │ │ └── analysis_utils.py │ ├── model_based │ │ ├── distributions.py │ │ ├── dynamics_model.py │ │ └── utils.py │ ├── multi_view_transformer.py │ └── utils.py ├── replay_buffer │ ├── __init__.py │ ├── prioritized_replay_buffer.py │ ├── replay_buffer.py │ ├── sum_tree.py │ ├── uniform_replay_buffer.py │ └── utils.py ├── utils.py ├── video.py └── workspace.py ├── setup.py ├── tests ├── __init__.py ├── conftest.py ├── integration │ └── test_training.py └── unit │ ├── __init__.py │ ├── envs │ ├── test_d4rl.py │ └── test_rlbench.py │ ├── intrinsic_reward_module │ └── test_intrinsic_rewards.py │ ├── method │ ├── base.py │ ├── test_il_methods.py │ ├── test_model_based_rl_methods.py │ └── test_rl_methods.py │ ├── models │ └── test_models.py │ ├── replay_buffer │ ├── test_prioritized_replay_buffer.py │ ├── test_sequential_replay_buffer.py │ ├── test_sum_tree.py │ └── test_uniform_replay_buffer.py │ ├── test_workspace.py │ ├── utils │ └── test_add_demo_to_replay_buffer.py │ └── wrappers │ ├── __init__.py │ ├── test_action_sequence.py │ ├── test_concat_dim.py │ ├── test_frame_stack.py │ ├── test_onehot_time.py │ ├── test_rescale_from_tanh.py │ ├── test_reward_modifiers.py │ └── utils.py └── train.py /.github/workflows/robobase.yml: -------------------------------------------------------------------------------- 1 | name: RoboBase 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - '**' 10 | schedule: 11 | - cron: '0 0 1 * *' 12 | 13 | jobs: 14 | clean: 15 | runs-on: [self-hosted, linux, x86, gpu] 16 | steps: 17 | - name: Prune Docker Images 18 | run: docker image prune -af 19 | 20 | test: 21 | needs: clean 22 | runs-on: [self-hosted, linux, x86, gpu] 23 | container: 24 | image: nvidia/cuda:11.7.1-base-ubuntu22.04 25 | options: --privileged --net=host --gpus=all -e=DISPLAY -e=SHELL=/bin/bash -e=NVIDIA_DRIVER_CAPABILITIES=all 26 | volumes: 27 | - /usr/share/vulkan/icd.d:/usr/share/vulkan/icd.d 28 | - ${{ secrets.HOME }}/.ssh:/root/.ssh_original 29 | 30 | steps: 31 | - name: Set up environment variables 32 | run: | 33 | export XDG_RUNTIME_DIR=/run/user/$(id -u) 34 | export XAUTHORITY=/run/user/$(id -u)/gdm/Xauthority 35 | 36 | - name: Install dependencies 37 | run: | 38 | apt-get update 39 | apt-get install -y wget python3 python3-pip python3.10-venv xvfb git 40 | python3 -m venv githubenv 41 | . githubenv/bin/activate 42 | apt-get install -y wget python3 python3-pip xvfb libgl1-mesa-glx libosmesa6 libxkbcommon-x11-0 libegl1-mesa-dev libgles2-mesa-dev git curl libffi-dev libffi7 libglib2.0-0 'libxcb.*-dev' libxcb-xinerama0 libx11-xcb-dev libglu1-mesa-dev libxrender-dev libxi-dev libxkbcommon-dev libxkbcommon-x11-dev libxcb-cursor0 libglfw3 libosmesa6-dev patchelf 43 | pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116 44 | pip install -e ".[dev]" 45 | git config --global --add safe.directory $GITHUB_WORKSPACE 46 | pre-commit run --all-files 47 | 48 | - name: Install CoppeliaSim 49 | run: | 50 | DEBIAN_FRONTEND=noninteractive apt-get install -y xvfb qtbase5-dev qtdeclarative5-dev libqt5webkit5-dev libsqlite3-dev qtchooser qt5-qmake qtbase5-dev-tools qttools5-dev-tools 51 | chmod u+x extra_install_scripts/install_coppeliasim.sh 52 | ./extra_install_scripts/install_coppeliasim.sh 53 | export COPPELIASIM_ROOT=${HOME}/.local/bin/CoppeliaSim 54 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$COPPELIASIM_ROOT 55 | export QT_QPA_PLATFORM_PLUGIN_PATH=$COPPELIASIM_ROOT 56 | 57 | - name: Install Mujoco210 58 | run: | 59 | wget --no-check-certificate https://mujoco.org/download/mujoco210-linux-x86_64.tar.gz 60 | mkdir ${HOME}/.mujoco 61 | tar -xvzf mujoco210-linux-x86_64.tar.gz -C ${HOME}/.mujoco 62 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${HOME}/.mujoco/mujoco210/bin 63 | 64 | - name: Install Python Packages 65 | run: | 66 | pip uninstall -y rlbench pyrep bigym d4rl 67 | pip --default-timeout=60 install -e ".[dev,dmc,rlbench,bigym,d4rl]" 68 | pip install pytest-xvfb 69 | export MKL_SERVICE_FORCE_INTEL=1 70 | 71 | - name: Run Tests 72 | run: pytest tests/unit --junitxml=test-reports/pytest.xml 73 | 74 | env: 75 | DISPLAY: :0 76 | SHELL: /bin/bash 77 | NVIDIA_DRIVER_CAPABILITIES: all 78 | -------------------------------------------------------------------------------- /.github/workflows/robobase_integration.yml: -------------------------------------------------------------------------------- 1 | name: RoboBase_Integration 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 3 * *' 6 | 7 | jobs: 8 | clean: 9 | runs-on: [self-hosted, linux, x86, gpu] 10 | steps: 11 | - name: Prune Docker Images 12 | run: docker image prune -af 13 | 14 | test: 15 | needs: clean 16 | runs-on: [self-hosted, linux, x86, gpu] 17 | container: 18 | image: nvidia/cuda:11.7.1-base-ubuntu22.04 19 | options: --privileged --net=host --gpus=all -e=DISPLAY -e=SHELL=/bin/bash -e=NVIDIA_DRIVER_CAPABILITIES=all 20 | volumes: 21 | - /usr/share/vulkan/icd.d:/usr/share/vulkan/icd.d 22 | - ${{ secrets.HOME }}/.ssh:/root/.ssh 23 | 24 | steps: 25 | - name: Set up environment variables 26 | run: | 27 | export XDG_RUNTIME_DIR=/run/user/$(id -u) 28 | export XAUTHORITY=/run/user/$(id -u)/gdm/Xauthority 29 | 30 | - name: Install dependencies 31 | run: | 32 | apt-get update 33 | apt-get install -y wget python3 python3-pip python3.10-venv xvfb git 34 | python3 -m venv githubenv 35 | . githubenv/bin/activate 36 | apt-get install -y wget python3 python3-pip xvfb libgl1-mesa-glx libosmesa6 libxkbcommon-x11-0 libegl1-mesa-dev libgles2-mesa-dev git curl libffi-dev libffi7 libglib2.0-0 'libxcb.*-dev' libxcb-xinerama0 libx11-xcb-dev libglu1-mesa-dev libxrender-dev libxi-dev libxkbcommon-dev libxkbcommon-x11-dev libxcb-cursor0 37 | pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116 38 | pip install -e ".[dev]" 39 | git config --global --add safe.directory $GITHUB_WORKSPACE 40 | ssh-keyscan -t rsa github.com >> $HOME/.ssh/known_hosts 41 | pip install -e ".[dmc]" 42 | Xvfb :99 & 43 | export DISPLAY=:99 44 | pytest tests/integration --junitxml=test-reports/pytest.xml 45 | 46 | env: 47 | DISPLAY: :0 48 | SHELL: /bin/bash 49 | NVIDIA_DRIVER_CAPABILITIES: all 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | bin/ 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Installer logs 24 | pip-log.txt 25 | pip-delete-this-directory.txt 26 | 27 | # PyCharm 28 | .idea 29 | 30 | # VSCode 31 | .vscode 32 | 33 | # Project specific 34 | exp_local/ 35 | wandb/ 36 | eval_video/ 37 | *.csv 38 | events.out.* 39 | .DS_Store 40 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: check-yaml 6 | exclude: bamboo-specs/bamboo.yaml 7 | - id: sort-simple-yaml 8 | - id: end-of-file-fixer 9 | - id: trailing-whitespace 10 | 11 | - repo: https://github.com/psf/black 12 | rev: 23.3.0 13 | hooks: 14 | - id: black 15 | 16 | - repo: https://github.com/charliermarsh/ruff-pre-commit 17 | rev: v0.0.272 18 | hooks: 19 | - id: ruff 20 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ### Added 11 | 12 | - None. 13 | 14 | ### Changed 15 | 16 | - None. 17 | 18 | ### Fixed 19 | 20 | - None. 21 | 22 | ## [1.0.0] 23 | 24 | ### Added 25 | 26 | - None. 27 | 28 | ### Changed 29 | 30 | - None. 31 | 32 | ### Fixed 33 | 34 | - None. 35 | -------------------------------------------------------------------------------- /extra_install_scripts/install_coppeliasim.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Install CoppeliaSim 4.1.0 for Ubuntu 20.04 3 | # Refer to PyRep README for other versions 4 | export COPPELIASIM_ROOT=${HOME}/.local/bin/CoppeliaSim 5 | wget https://www.coppeliarobotics.com/files/V4_1_0/CoppeliaSim_Edu_V4_1_0_Ubuntu20_04.tar.xz 6 | mkdir -p $COPPELIASIM_ROOT && tar -xf CoppeliaSim_Edu_V4_1_0_Ubuntu20_04.tar.xz -C $COPPELIASIM_ROOT --strip-components 1 7 | rm -rf CoppeliaSim_Edu_V4_1_0_Ubuntu20_04.tar.xz 8 | ## Add environment variables into bashrc (or zshrc) 9 | echo "export COPPELIASIM_ROOT=$COPPELIASIM_ROOT 10 | export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:\$COPPELIASIM_ROOT 11 | export QT_QPA_PLATFORM_PLUGIN_PATH=\$COPPELIASIM_ROOT" >> ~/.bashrc 12 | source ~/.bashrc 13 | -------------------------------------------------------------------------------- /robobase/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.0.0" 2 | -------------------------------------------------------------------------------- /robobase/cfgs/__init__.py: -------------------------------------------------------------------------------- 1 | """Needed here otherwise using hydra config search doesn't work.""" 2 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | env_name: bigym 5 | episode_length: 3000 6 | cameras: ["head", "right_wrist", "left_wrist"] 7 | action_mode: absolute 8 | floating: true 9 | dataset_root: "" 10 | demo_down_sample_rate: 20 11 | render_mode: rgb_array 12 | enable_all_floating_dof: false 13 | 14 | demos: !!float .inf 15 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/cupboards_close_all.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: cupboards_close_all 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 15500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/cupboards_open_all.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: cupboards_open_all 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 22500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_close.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_close 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 7500 11 | demo_down_sample_rate: 20 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_close_trays.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_close_trays 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 8000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_load_cups.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_load_cups 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 7500 11 | demo_down_sample_rate: 10 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_load_cutlery.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_load_cutlery 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 7000 11 | demo_down_sample_rate: 10 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_load_plates.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_load_plates 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 14000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_open.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_open 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 7500 11 | demo_down_sample_rate: 20 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_open_trays.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_open_trays 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 9500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_unload_cups.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_unload_cups 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 10000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_unload_cups_long.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_unload_cups_long 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 18000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_unload_cutlery.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_unload_cutlery 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 15500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_unload_cutlery_long.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_unload_cutlery_long 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 18000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_unload_plates.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_unload_plates 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 20000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/dishwasher_unload_plates_long.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: dishwasher_unload_plates_long 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 26000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/drawer_top_close.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: drawer_top_close 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 3000 11 | demo_down_sample_rate: 10 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/drawer_top_open.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: drawer_top_open 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 5000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/drawers_close_all.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: drawers_close_all 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 5000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/drawers_open_all.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: drawers_open_all 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 12000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/flip_cup.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: flip_cup 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 5500 11 | demo_down_sample_rate: 10 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/flip_cutlery.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: flip_cutlery 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 12500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/move_plate.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: move_plate 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 3000 11 | demo_down_sample_rate: 10 12 | enable_all_floating_dof: false 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/move_two_plates.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: move_two_plates 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 5500 11 | demo_down_sample_rate: 10 12 | enable_all_floating_dof: false 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/pick_box.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: pick_box 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 13500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/put_cups.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: put_cups 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 8500 11 | demo_down_sample_rate: 20 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/reach_target_dual.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: reach_target_dual 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 1000 11 | demo_down_sample_rate: 10 12 | enable_all_floating_dof: false 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/reach_target_multi_modal.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: reach_target_multi_modal 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 1000 11 | demo_down_sample_rate: 10 12 | enable_all_floating_dof: false 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/reach_target_single.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: reach_target_single 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 1000 11 | demo_down_sample_rate: 10 12 | enable_all_floating_dof: false 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/sandwich_flip.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: sandwich_flip 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 15500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/sandwich_remove.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: sandwich_remove 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 13500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/sandwich_toast.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: sandwich_toast 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 16500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/saucepan_to_hob.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: saucepan_to_hob 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 11000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/stack_blocks.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: stack_blocks 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 28500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/store_box.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: store_box 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 15000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/store_groceries_lower.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: store_groceries_lower 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 32000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/store_groceries_upper.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: store_groceries_upper 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 19000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/store_kitchenware.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: store_kitchenware 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 20000 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/take_cups.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: take_cups 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 10500 11 | demo_down_sample_rate: 25 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/wall_cupboard_close.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: wall_cupboard_close 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 3000 11 | demo_down_sample_rate: 10 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/bigym/wall_cupboard_open.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - bigym 5 | - _self_ 6 | 7 | env: 8 | task_name: wall_cupboard_open 9 | stddev_schedule: linear(1.0,0.1,500000) 10 | episode_length: 6000 11 | demo_down_sample_rate: 20 12 | enable_all_floating_dof: true 13 | -------------------------------------------------------------------------------- /robobase/cfgs/env/d4rl.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | env_name: d4rl 5 | episode_length: 999 6 | random_traj: false # if true, will choose the top K (K=demos) trajectories 7 | stddev_schedule: linear(1.0,0.1,500000) 8 | 9 | # demos: .inf # Use all available demos 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/d4rl/ant.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - d4rl 5 | - _self_ 6 | 7 | env: 8 | # choose between [ant-random-v0/v2, ant-medium-v0/v2, ant-expert-v0/v2, ant-medium-replay-v0/v2, ant-medium-expert-v0/v2] 9 | task_name: ant-medium-v2 10 | 11 | # demos: 1203 # Full size = 1203 12 | -------------------------------------------------------------------------------- /robobase/cfgs/env/d4rl/antmaze.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - d4rl 5 | - _self_ 6 | 7 | env: 8 | # choose between [antmaze-umaze-v0, antmaze-umaze-diverse-v0, antmaze-medium-diverse-v0, antmaze-medium-play-v0, antmaze-large-diverse-v0, antmaze-large-play-v0] 9 | task_name: antmaze-medium-diverse-v0 10 | 11 | # demos: 2924 # Full size = 2924 12 | -------------------------------------------------------------------------------- /robobase/cfgs/env/d4rl/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - d4rl 5 | - _self_ 6 | 7 | env: 8 | # choose between [halfcheetah-random-v0/v2, halfcheetah-medium-v0/v2, halfcheetah-expert-v0/v2, halfcheetah-medium-replay-v0/v2, halfcheetah-medium-expert-v0/v2] 9 | task_name: halfcheetah-medium-v2 10 | 11 | # demos: 1000 # Full size = 1000 12 | -------------------------------------------------------------------------------- /robobase/cfgs/env/d4rl/hopper.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - d4rl 5 | - _self_ 6 | 7 | env: 8 | # choose between [hopper-random-v0/v2, hopper-medium-v0/v2, hopper-expert-v0/v2, hopper-medium-replay-v0/v2, hopper-medium-expert-v0/v2] 9 | task_name: hopper-medium-v2 10 | 11 | # demos: 2187 # Full size = 2187 12 | -------------------------------------------------------------------------------- /robobase/cfgs/env/d4rl/walker2d.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - d4rl 5 | - _self_ 6 | 7 | env: 8 | # choose between [walker2d-random-v0/v2, walker2d-medium-v0/v2, walker2d-expert-v0/v2, walker2d-medium-replay-v0/v2, walker2d-medium-expert-v0/v2] 9 | task_name: walker2d-medium-v2 10 | 11 | # demos: 1191 # Full size = 1191 12 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | env_name: dmc 5 | episode_length: 1000 # This is the default for DMC. 6 | 7 | action_repeat: 2 8 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/acrobot_swingup.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: acrobot_swingup 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/cartpole_balance.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/easy 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: cartpole_balance 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/cartpole_balance_sparse.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/easy 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: cartpole_balance_sparse 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/cartpole_swingup.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/easy 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: cartpole_swingup 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/cartpole_swingup_sparse.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: cartpole_swingup_sparse 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/cheetah_run.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: cheetah_run 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/cup_catch.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/easy 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: cup_catch 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/easy.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | stddev_schedule: 'linear(1.0,0.1,100000)' 5 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/finger_spin.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/easy 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: finger_spin 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/finger_turn_easy.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: finger_turn_easy 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/finger_turn_hard.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: finger_turn_hard 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/hard.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | stddev_schedule: 'linear(1.0,0.1,2000000)' 5 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/hopper_hop.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: hopper_hop 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/hopper_stand.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/easy 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: hopper_stand 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/humanoid_run.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/hard 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: humanoid_run 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/humanoid_stand.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/hard 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: humanoid_stand 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/humanoid_walk.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/hard 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: humanoid_walk 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/lift_brick.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: lift_brick 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/medium.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | stddev_schedule: linear(1.0,0.1,500000) 5 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/pendulum_swingup.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | 4 | defaults: 5 | - dmc/easy 6 | - dmc 7 | - _self_ 8 | 9 | env: 10 | task_name: pendulum_swingup 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/quadruped_run.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: quadruped_run 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/quadruped_walk.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: quadruped_walk 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/reach_duplo.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: reach_duplo 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/reacher_easy.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: reacher_easy 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/reacher_hard.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: reacher_hard 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/stacker_stack_2.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: stacker_stack_2 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/walker_run.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/medium 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: walker_run 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/walker_stand.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/easy 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: walker_stand 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/dmc/walker_walk.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - dmc/easy 5 | - dmc 6 | - _self_ 7 | 8 | env: 9 | task_name: walker_walk 10 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/episode_length/default # change this if you want to use a different set of episode lengths 5 | 6 | visual_observation_shape: [128, 128] 7 | 8 | env: 9 | env_name: rlbench 10 | episode_length: ??? 11 | dataset_root: '' 12 | action_mode: JOINT_POSITION 13 | cameras: ["front", "wrist", "left_shoulder", "right_shoulder"] 14 | renderer: opengl3 15 | arm_max_velocity: 1.0 16 | arm_max_acceleration: 4.0 17 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/basketball_in_hoop.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: basketball_in_hoop 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/easy.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | stddev_schedule: 'linear(1.0,0.1,100000)' 5 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/episode_length/cqn.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | tasks: 5 | basketball_in_hoop: 6 | episode_length: 125 7 | insert_usb_in_computer: 8 | episode_length: 100 9 | lamp_on: 10 | episode_length: 100 11 | meat_on_grill: 12 | episode_length: 150 13 | open_door: 14 | episode_length: 125 15 | open_drawer: 16 | episode_length: 100 17 | open_microwave: 18 | episode_length: 125 19 | open_oven: 20 | episode_length: 225 21 | phone_on_base: 22 | episode_length: 175 23 | pick_up_cup: 24 | episode_length: 100 25 | press_switch: 26 | episode_length: 100 27 | put_books_on_bookshelf: 28 | episode_length: 175 29 | put_item_in_drawer: 30 | episode_length: 350 31 | put_money_in_safe: 32 | episode_length: 150 33 | put_rubbish_in_bin: 34 | episode_length: 150 35 | reach_target: 36 | episode_length: 50 37 | slide_block_to_target: 38 | episode_length: 150 39 | stack_wine: 40 | episode_length: 150 41 | sweep_to_dustpan: 42 | episode_length: 100 43 | take_lid_off_saucepan: 44 | episode_length: 100 45 | take_plate_off_colored_dish_rack: 46 | episode_length: 150 47 | toilet_seat_up: 48 | episode_length: 150 49 | turn_tap: 50 | episode_length: 125 51 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/episode_length/default.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | tasks: 5 | basketball_in_hoop: 6 | episode_length: 275 7 | insert_usb_in_computer: 8 | episode_length: 175 9 | lamp_on: 10 | episode_length: 150 11 | meat_on_grill: 12 | episode_length: 250 13 | open_door: 14 | episode_length: 250 15 | open_drawer: 16 | episode_length: 150 17 | open_microwave: 18 | episode_length: 200 19 | open_oven: 20 | episode_length: 500 21 | phone_on_base: 22 | episode_length: 300 23 | pick_up_cup: 24 | episode_length: 175 25 | press_switch: 26 | episode_length: 225 27 | put_books_on_bookshelf: 28 | episode_length: 350 29 | put_item_in_drawer: 30 | episode_length: 400 31 | put_money_in_safe: 32 | episode_length: 250 33 | put_rubbish_in_bin: 34 | episode_length: 300 35 | reach_target: 36 | episode_length: 100 37 | slide_block_to_target: 38 | episode_length: 175 39 | stack_wine: 40 | episode_length: 225 41 | sweep_to_dustpan: 42 | episode_length: 150 43 | take_lid_off_saucepan: 44 | episode_length: 125 45 | take_plate_off_colored_dish_rack: 46 | episode_length: 275 47 | toilet_seat_up: 48 | episode_length: 275 49 | turn_tap: 50 | episode_length: 225 51 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/insert_usb_in_computer.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: insert_usb_in_computer 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/lamp_on.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: lamp_on 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/meat_on_grill.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: meat_on_gril 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/open_door.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: open_door 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/open_drawer.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: open_drawer 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/open_microwave.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: open_microwave 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/open_oven.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: open_oven 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/phone_on_base.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: phone_on_base 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/pick_up_cup.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: pick_up_cup 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/press_switch.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: press_switch 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/put_books_on_bookshelf.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: put_books_on_bookshelf 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/put_item_in_drawer.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: put_item_in_drawer 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/put_money_in_safe.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: put_money_in_safe 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/put_rubbish_in_bin.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: put_rubbish_in_bin 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/reach_target.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: reach_target 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/slide_block_to_target.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: slide_block_to_target 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/stack_wine.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: stack_wine 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/sweep_to_dustpan.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: sweep_to_dustpan 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/take_lid_off_saucepan.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: take_lid_off_saucepan 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/take_plate_off_colored_dish_rack.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: take_plate_off_colored_dish_rack 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/toilet_seat_up.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: toilet_seat_up 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/env/rlbench/turn_tap.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - rlbench/easy 5 | - rlbench 6 | - _self_ 7 | 8 | env: 9 | task_name: turn_tap 10 | episode_length: ${env.tasks.${env.task_name}.episode_length} 11 | -------------------------------------------------------------------------------- /robobase/cfgs/intrinsic_reward_module/icm.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | intrinsic_reward_module: 4 | _target_: robobase.intrinsic_reward_module.ICM 5 | beta: 0.05 6 | kappa: 0.000025 7 | -------------------------------------------------------------------------------- /robobase/cfgs/intrinsic_reward_module/rnd.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | intrinsic_reward_module: 4 | _target_: robobase.intrinsic_reward_module.RND 5 | beta: 0.05 6 | kappa: 0.000025 7 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/act_pixel_bigym.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: null 5 | - ../method: act 6 | 7 | demos: 30 8 | num_pretrain_steps: 100000 9 | num_train_frames: 0 10 | eval_every_steps: 2500 11 | num_eval_episodes: 25 12 | batch_size: 256 13 | save_snapshot: true 14 | snapshot_every_n: 0 15 | replay_size_before_train: 500 16 | 17 | pixels: true 18 | frame_stack: 1 19 | 20 | is_imitation_learning: true 21 | 22 | action_repeat: 1 23 | action_sequence: 16 24 | execution_length: 1 25 | temporal_ensemble: true 26 | use_standardization: false # Demo-based standardization for action space 27 | use_min_max_normalization: true # Demo-based min-max normalization for action space 28 | min_max_margin: 0 29 | norm_obs: true 30 | 31 | update_every_steps: 1 32 | 33 | replay: 34 | nstep: 1 35 | 36 | hydra: 37 | run: 38 | dir: ./exp_local/pixel_act/bigym_${env.task_name}_${now:%Y%m%d%H%M%S} 39 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/act_pixel_rlbench.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: rlbench/take_lid_off_saucepan 5 | - ../method: act 6 | 7 | demos: 50 8 | num_pretrain_steps: 500000 9 | num_train_frames: 0 10 | eval_every_steps: 2000 11 | num_eval_episodes: 10 12 | batch_size: 256 13 | save_snapshot: true 14 | snapshot_every_n: 0 15 | 16 | pixels: true 17 | frame_stack: 1 18 | 19 | is_imitation_learning: true 20 | 21 | action_repeat: 1 22 | action_sequence: 20 23 | execution_length: 1 24 | temporal_ensemble: true 25 | use_standardization: false # Demo-based standardization for action space 26 | use_min_max_normalization: true # Demo-based min-max normalization for action space 27 | min_max_margin: 0.2 28 | 29 | update_every_steps: 1 30 | 31 | replay: 32 | nstep: 1 33 | 34 | 35 | hydra: 36 | run: 37 | dir: ./exp_local/pixel_act/rlbench_${env.task_name}_${now:%Y%m%d%H%M%S} 38 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/alix_pixel_dmc.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: dmc/cartpole_swingup 5 | - ../method: alix 6 | 7 | pixels: true 8 | frame_stack: 3 9 | action_repeat: 2 10 | num_train_envs: 1 11 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/cqn_pixel_dmc.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: null # Please specify environment to use in command line 5 | - ../method: cqn 6 | 7 | pixels: true 8 | frame_stack: 3 9 | action_repeat: 2 10 | 11 | update_every_steps: 2 12 | replay: 13 | nstep: 3 14 | size: 500000 15 | 16 | method: 17 | use_dueling: true 18 | v_min: 0.0 19 | v_max: 200.0 20 | atoms: 51 21 | 22 | hydra: 23 | run: 24 | dir: ./exp_local/pixel_cqn/dmc_${env.task_name}_${now:%Y%m%d%H%M%S} 25 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/cqn_pixel_rlbench_demo_driven.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../method: cqn 5 | - ../env: rlbench/take_lid_off_saucepan 6 | - override ../env: rlbench/episode_length/cqn 7 | 8 | pixels: true 9 | visual_observation_shape: [84, 84] 10 | frame_stack: 8 11 | action_repeat: 1 12 | 13 | env: 14 | arm_max_velocity: 2.0 15 | arm_max_acceleration: 8.0 16 | 17 | demos: 100 18 | use_min_max_normalization: true 19 | min_max_margin: 0.2 20 | batch_size: 256 21 | demo_batch_size: 256 22 | num_pretrain_steps: 0 23 | num_explore_steps: 0 24 | replay_size_before_train: 500 25 | use_self_imitation: true 26 | num_train_frames: 30100 27 | log_every: 200 28 | 29 | num_eval_episodes: 50 30 | eval_every_steps: 2500 31 | 32 | update_every_steps: 1 33 | replay: 34 | nstep: 1 35 | size: 50000 36 | demo_size: 50000 37 | num_workers: 4 38 | 39 | method: 40 | use_dueling: true 41 | v_min: -2.0 # 1.0 is too tight with n_step > 1 42 | v_max: 2.0 # -1.0 is too tight with n_step > 1 43 | atoms: 51 44 | use_target_network_for_rollout: true 45 | bc_lambda: 1.0 46 | bc_margin: 0.01 47 | weight_decay: 0.1 48 | stddev_schedule: 0.01 49 | critic_lr: 5e-5 50 | encoder_lr: 5e-5 51 | critic_lambda: 0.1 52 | always_bootstrap: true # Always do bootstrap; could be useful for episodic task 53 | 54 | advantage_model: 55 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 56 | linear_bias: false 57 | 58 | value_model: 59 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 60 | linear_bias: false 61 | 62 | encoder_model: 63 | kernel_size: 4 64 | padding: 1 65 | channels_multiplier: 2 66 | num_downsample_convs: 4 67 | num_post_downsample_convs: 0 68 | norm: img_ch_layer 69 | 70 | hydra: 71 | run: 72 | dir: ./exp_local/pixel_cqn/rlbench_${env.task_name}_${now:%Y%m%d%H%M%S} 73 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/cqn_state_dmc.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: null # Please specify environment to use in command line 5 | - ../method: cqn 6 | 7 | pixels: false 8 | frame_stack: 1 9 | action_repeat: 1 10 | 11 | update_every_steps: 1 12 | replay: 13 | nstep: 3 # Use 1 for walker_walk 14 | 15 | method: 16 | use_dueling: false 17 | v_min: 0.0 18 | v_max: 200.0 19 | atoms: 51 20 | 21 | hydra: 22 | run: 23 | dir: ./exp_local/state_cqn/dmc_${env.task_name}_${now:%Y%m%d%H%M%S} 24 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/cqn_state_rlbench_demo_driven.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../method: cqn 5 | - ../env: rlbench/reach_target 6 | - override ../env: rlbench/episode_length/cqn 7 | 8 | pixels: false 9 | frame_stack: 1 10 | action_repeat: 1 11 | 12 | env: 13 | arm_max_velocity: 2.0 14 | arm_max_acceleration: 8.0 15 | 16 | demos: 20 17 | use_min_max_normalization: true 18 | min_max_margin: 0.2 19 | batch_size: 256 20 | demo_batch_size: 256 21 | num_pretrain_steps: 0 22 | num_explore_steps: 0 23 | replay_size_before_train: 250 24 | log_every: 200 25 | 26 | num_eval_episodes: 25 27 | eval_every_steps: 500 28 | num_train_frames: 10100 29 | 30 | update_every_steps: 1 31 | replay: 32 | nstep: 1 33 | size: 50000 34 | demo_size: 50000 35 | num_workers: 4 36 | 37 | method: 38 | use_dueling: true 39 | v_min: -1.0 40 | v_max: 1.0 41 | atoms: 51 42 | use_target_network_for_rollout: true 43 | bc_lambda: 1.0 44 | bc_margin: 0.01 45 | critic_lambda: 0.1 46 | stddev_schedule: 0.01 47 | weight_decay: 0.1 48 | always_bootstrap: true 49 | 50 | advantage_model: 51 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 52 | 53 | value_model: 54 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 55 | 56 | 57 | hydra: 58 | run: 59 | dir: ./exp_local/state_cqn/rlbench_${env.task_name}_${now:%Y%m%d%H%M%S} 60 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/dp_pixel_bigym.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: null 5 | - ../method: diffusion 6 | 7 | demos: 30 8 | num_pretrain_steps: 100000 9 | num_train_frames: 0 10 | eval_every_steps: 20000 11 | num_eval_episodes: 5 12 | batch_size: 256 13 | save_snapshot: true 14 | snapshot_every_n: 0 15 | replay_size_before_train: 500 16 | 17 | pixels: true 18 | frame_stack: 1 19 | 20 | is_imitation_learning: true 21 | 22 | action_repeat: 1 23 | action_sequence: 16 24 | execution_length: 1 25 | temporal_ensemble: true 26 | use_standardization: false # Demo-based standardization for action space 27 | use_min_max_normalization: true # Demo-based min-max normalization for action space 28 | min_max_margin: 0 29 | norm_obs: true 30 | 31 | update_every_steps: 1 32 | # logging settings 33 | wandb: # weight and bias 34 | use: true 35 | project: dp_bigym 36 | name: null 37 | 38 | 39 | replay: 40 | nstep: 1 41 | 42 | hydra: 43 | run: 44 | dir: ./exp_local/pixel_act/bigym_${env.task_name}_${now:%Y%m%d%H%M%S} 45 | 46 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/dreamerv3_pixel_dmc.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: dmc/cartpole_swingup 5 | - ../method: dreamerv3 6 | 7 | pixels: true 8 | visual_observation_shape: [64, 64] 9 | frame_stack: 1 10 | batch_size: 16 11 | replay_size_before_train: 2500 12 | update_every_steps: 2 13 | num_explore_steps: 2500 14 | frame_stack_on_channel: false 15 | eval_every_steps: 5000 16 | 17 | replay: 18 | sequential: true 19 | num_workers: 4 20 | gamma: 0.997 21 | size: 500000 22 | transition_seq_len: 64 # the length of transition seq for each sample 23 | 24 | hydra: 25 | run: 26 | dir: ./exp_local/dreamerv3/${env.task_name}_${now:%Y%m%d%H%M} 27 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/drm_pixel_dmc.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: dmc/cartpole_swingup 5 | - ../method: drm 6 | 7 | pixels: true 8 | frame_stack: 3 9 | action_repeat: 2 10 | num_train_envs: 1 11 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/drqv2.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: null # Please specify environment to use in command line 5 | - ../method: drqv2 6 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/drqv2_dist_critic.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: null # Please specify environment to use in command line 5 | - ../method: drqv2 6 | 7 | method: 8 | distributional_critic: true 9 | critic_model: 10 | _target_: robobase.models.MLPWithBottleneckFeatures 11 | _partial_: true 12 | input_shapes: ??? 13 | output_shape: [251, 1] # This refers to the number of atoms for distributional critic 14 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 15 | bottleneck_size: 50 16 | norm_after_bottleneck: true 17 | tanh_after_bottleneck: true 18 | mlp_nodes: [1024, 1024] 19 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/drqv2_pixel_dmc.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: dmc/cartpole_swingup 5 | - ../method: drqv2 6 | 7 | pixels: true 8 | frame_stack: 3 9 | action_repeat: 2 10 | num_train_envs: 1 11 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/drqv2plus_pixel_rlbench_demo_driven.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | # DrQ-v2+ baseline introduced in CQN paper 3 | 4 | defaults: 5 | - ../method: drqv2 6 | - ../env: rlbench/take_lid_off_saucepan 7 | - override ../env: rlbench/episode_length/cqn 8 | 9 | pixels: true 10 | visual_observation_shape: [84, 84] 11 | frame_stack: 8 12 | action_repeat: 1 13 | 14 | env: 15 | arm_max_velocity: 2.0 16 | arm_max_acceleration: 8.0 17 | 18 | demos: 100 19 | use_min_max_normalization: true 20 | min_max_margin: 0.2 21 | batch_size: 256 22 | demo_batch_size: 256 23 | num_pretrain_steps: 0 24 | num_explore_steps: 0 25 | replay_size_before_train: 500 26 | use_self_imitation: true 27 | num_train_frames: 30100 28 | log_every: 150 29 | 30 | num_eval_episodes: 50 31 | eval_every_steps: 2500 32 | 33 | update_every_steps: 1 34 | replay: 35 | nstep: 1 36 | size: 50000 37 | demo_size: 50000 38 | num_workers: 4 39 | 40 | method: 41 | bc_lambda: 1.0 42 | stddev_schedule: 0.01 43 | weight_decay: 0.1 44 | always_bootstrap: true 45 | distributional_critic: true 46 | distributional_critic_limit: 2 47 | distributional_critic_atoms: 101 48 | distributional_critic_transform: false 49 | 50 | critic_model: 51 | output_shape: [101, 1] 52 | 53 | encoder_model: 54 | kernel_size: 4 55 | padding: 1 56 | channels_multiplier: 2 57 | num_downsample_convs: 4 58 | num_post_downsample_convs: 0 59 | 60 | hydra: 61 | run: 62 | dir: ./exp_local/pixel_drqv2+/rlbench_${env.task_name}_${now:%Y%m%d%H%M%S} 63 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/edp_d4rl.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: null # Please specify environment to use in command line 5 | - ../method: edp 6 | 7 | num_pretrain_steps: 500000 8 | 9 | # main config 10 | no_data_collection: false 11 | demos: .inf 12 | eval_every_frames: 10000 13 | num_train_frames: 0 14 | use_standardization: false 15 | 16 | method: 17 | encoder_model: null 18 | view_fusion_model: null 19 | actor_update_method: "iql" 20 | diff_coeff: 1.0 21 | guide_coeff: 1.0 22 | awr_temperature: 1.0 23 | 24 | # diffusion param 25 | solver_type: "DDPM" 26 | num_diffusion_iters: 100 27 | beta_schedule: "linear" 28 | 29 | wandb: 30 | project: diff-rl 31 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/iql_drqv2.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: null # Please specify environment to use in command line 5 | - ../method: iql_drqv2 6 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/mwm.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: null 5 | - ../method: mwm 6 | 7 | pixels: true 8 | visual_observation_shape: [64, 64] 9 | frame_stack: 1 10 | batch_size: 16 11 | replay_size_before_train: 5000 12 | repeat_final_frame: false # Could be useful for sparse, episodic task 13 | update_every_steps: 5 14 | num_explore_steps: 5000 15 | frame_stack_on_channel: false 16 | 17 | replay: 18 | sequential: true 19 | num_workers: 16 20 | transition_seq_len: 50 # the length of transition seq for each sample 21 | 22 | hydra: 23 | run: 24 | dir: ./exp_local/mwm/${env.task_name}_${now:%Y%m%d%H%M} 25 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/mwm_pixel_dmc.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: dmc/cartpole_swingup 5 | - ../method: mwm 6 | 7 | pixels: true 8 | visual_observation_shape: [64, 64] 9 | frame_stack: 1 10 | batch_size: 16 11 | replay_size_before_train: 2500 12 | update_every_steps: 5 13 | num_explore_steps: 2500 14 | frame_stack_on_channel: false 15 | 16 | replay: 17 | sequential: true 18 | num_workers: 16 19 | gamma: 0.997 20 | size: 500000 21 | transition_seq_len: 64 # the length of transition seq for each sample 22 | 23 | hydra: 24 | run: 25 | dir: ./exp_local/mwm/${env.task_name}_${now:%Y%m%d%H%M} 26 | -------------------------------------------------------------------------------- /robobase/cfgs/launch/sac_lix_pixel_dmc.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - ../env: dmc/cartpole_swingup 5 | - ../method: sac_lix 6 | 7 | pixels: true 8 | frame_stack: 3 9 | action_repeat: 2 10 | num_train_envs: 1 11 | -------------------------------------------------------------------------------- /robobase/cfgs/method/act.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | method: 3 | _target_: robobase.method.act.ActBCAgent 4 | is_rl: false 5 | device: ${device} 6 | lr: 1e-4 7 | lr_backbone: 1e-5 8 | weight_decay: 1e-4 9 | num_train_steps: ${num_pretrain_steps} 10 | adaptive_lr: false 11 | actor_grad_clip: null 12 | # This would expect language embeddings using openai-clip 13 | # TODO: add an environment wrapper for an extra observation with language embeddings 14 | # But this would be environment-specific 15 | use_lang_cond: False 16 | 17 | actor_model: 18 | _target_: robobase.models.multi_view_transformer.MultiViewTransformerEncoderDecoderACT 19 | _partial_: true 20 | input_shape: ??? 21 | hidden_dim: 512 22 | enc_layers: 4 23 | # Note: Although the original ACT implementation has 7 for `n_decoder_layers`, there is a bug in the code 24 | # that means only the first layer is used. Here we match the original implementation by setting this to 1. 25 | # See this issue https://github.com/tonyzhaozh/act/issues/25#issue-2258740521. 26 | dec_layers: 1 27 | dim_feedforward: 3200 28 | dropout: 0.1 29 | nheads: 8 30 | num_queries: ${action_sequence} 31 | pre_norm: false 32 | state_dim: ??? 33 | action_dim: ??? 34 | use_lang_cond: ${method.use_lang_cond} 35 | 36 | encoder_model: 37 | _target_: robobase.method.act.ImageEncoderACT 38 | _partial_: true 39 | input_shape: ??? 40 | hidden_dim: ${method.actor_model.hidden_dim} 41 | position_embedding: "sine" 42 | lr_backbone: ${method.lr_backbone} 43 | masks: False 44 | backbone: "resnet18" 45 | dilation: False 46 | use_lang_cond: ${method.use_lang_cond} 47 | -------------------------------------------------------------------------------- /robobase/cfgs/method/alix.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | method: 4 | _target_: robobase.method.alix.ALIX 5 | is_rl: true 6 | num_explore_steps: ${num_explore_steps} 7 | actor_lr: 1e-4 8 | critic_lr: 1e-4 9 | view_fusion_lr: 1e-4 10 | encoder_lr: 1e-4 11 | weight_decay: 0.0 12 | num_critics: 2 13 | critic_target_tau: 0.01 14 | stddev_schedule: ${env.stddev_schedule} 15 | stddev_clip: 0.3 16 | actor_grad_clip: null 17 | critic_grad_clip: null 18 | bc_lambda: 0.0 19 | always_bootstrap: false # Always do bootstrap; could be useful for episodic task 20 | action_sequence_network_type: rnn # rnn, mlp 21 | critic_updates_shared_vision_encoder: true 22 | distributional_critic: false 23 | distributional_critic_limit: 20 # v_min / m_max for dist_critic 24 | distributional_critic_atoms: 251 25 | distributional_critic_transform: true # hyperbolic/parabolic transformation to value 26 | 27 | actor_model: 28 | _target_: robobase.models.MLPWithBottleneckFeaturesAndSequenceOutput 29 | _partial_: true 30 | input_shapes: ??? 31 | output_shape: ??? 32 | num_envs: ??? 33 | num_rnn_layers: 1 34 | rnn_hidden_size: 128 35 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 36 | bottleneck_size: 50 37 | norm_after_bottleneck: true 38 | tanh_after_bottleneck: true 39 | mlp_nodes: [1024, 1024] 40 | output_sequence_network_type: rnn 41 | output_sequence_length: ${action_sequence} 42 | 43 | critic_model: 44 | _target_: robobase.models.MLPWithBottleneckFeatures 45 | _partial_: true 46 | input_shapes: ??? 47 | output_shape: 1 48 | num_envs: ??? 49 | num_rnn_layers: 1 50 | rnn_hidden_size: 128 51 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 52 | bottleneck_size: 50 53 | norm_after_bottleneck: true 54 | tanh_after_bottleneck: true 55 | mlp_nodes: [1024, 1024] 56 | 57 | encoder_model: 58 | _target_: robobase.models.lix_utils.analysis_modules.EncoderAllFeatTiedRegularizedCNNMultiViewDownsampleWithStrides 59 | _partial_: true 60 | input_shape: ??? 61 | num_downsample_convs: 1 62 | num_post_downsample_convs: 3 63 | channels: 32 64 | kernel_size: 3 65 | 66 | view_fusion_model: 67 | _target_: robobase.models.FusionMultiCamFeature 68 | _partial_: true 69 | input_shape: ??? 70 | mode: flatten 71 | -------------------------------------------------------------------------------- /robobase/cfgs/method/bc.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | method: 4 | _target_: robobase.method.bc.BC 5 | is_rl: false 6 | lr: 1e-4 7 | num_train_steps: ${num_pretrain_steps} 8 | adaptive_lr: true 9 | actor_grad_clip: null 10 | 11 | actor_model: 12 | _target_: robobase.models.MLPWithBottleneckFeaturesAndSequenceOutput 13 | _partial_: true 14 | input_shapes: ??? 15 | output_shape: ??? 16 | num_envs: ??? 17 | num_rnn_layers: 1 18 | rnn_hidden_size: 128 19 | keys_to_bottleneck: [] 20 | bottleneck_size: 50 21 | norm_after_bottleneck: true 22 | tanh_after_bottleneck: true 23 | mlp_nodes: [1024, 1024] 24 | output_sequence_network_type: rnn 25 | output_sequence_length: ${action_sequence} 26 | 27 | encoder_model: 28 | _target_: robobase.models.ResNetEncoder 29 | _partial_: true 30 | input_shape: ??? 31 | model: resnet18 32 | 33 | view_fusion_model: 34 | _target_: robobase.models.FusionMultiCamFeature 35 | _partial_: true 36 | input_shape: ??? 37 | mode: flatten 38 | -------------------------------------------------------------------------------- /robobase/cfgs/method/cqn.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | method: 4 | _target_: robobase.method.cqn.CQN 5 | num_explore_steps: ${num_explore_steps} 6 | is_rl: true 7 | critic_lr: 1e-4 8 | view_fusion_lr: 1e-4 9 | encoder_lr: 1e-4 10 | weight_decay: 0.0 11 | num_critics: 1 # Not supported 12 | critic_target_tau: 0.02 13 | stddev_schedule: 0.1 14 | critic_grad_clip: null 15 | use_dueling: true 16 | always_bootstrap: false # Always do bootstrap; could be useful for episodic task 17 | bc_lambda: 0.0 18 | bc_margin: 0.0 # If > 0.0, encourage expert action's Q(s, a*) be higher than other actions' Q(s, a) 19 | use_target_network_for_rollout: false 20 | num_update_steps: 1 # If > 1, take N updates inside a single `update` call 21 | use_augmentation: true 22 | use_torch_compile: false 23 | levels: 3 24 | bins: 5 25 | v_min: 0 # Set to r_min / (1 - gamma) for infinite-length tasks or r_min for episodic task 26 | v_max: 200.0 # Set to r_max / (1 - gamma), e.g., DMC -> 200 (with action repeat 2) or to r_max, e.g., RLBench -> 1 27 | atoms: 51 # Worth trying 101 28 | critic_lambda: 1.0 29 | centralized_critic: false 30 | 31 | advantage_model: 32 | _target_: robobase.models.MLPWithBottleneckFeatures 33 | _partial_: true 34 | input_shapes: ??? 35 | output_shape: ??? 36 | num_envs: ??? 37 | num_rnn_layers: 1 38 | rnn_hidden_size: 128 39 | keys_to_bottleneck: [fused_view_feats] 40 | bottleneck_size: 64 41 | norm_after_bottleneck: true 42 | tanh_after_bottleneck: true 43 | mlp_nodes: [512, 512] 44 | activation: silu 45 | norm: layer 46 | linear_bias: false 47 | 48 | value_model: 49 | _target_: robobase.models.MLPWithBottleneckFeatures 50 | _partial_: true 51 | input_shapes: ??? 52 | output_shape: ??? 53 | num_envs: ??? 54 | num_rnn_layers: 1 55 | rnn_hidden_size: 128 56 | keys_to_bottleneck: [fused_view_feats] 57 | bottleneck_size: 64 58 | norm_after_bottleneck: true 59 | tanh_after_bottleneck: true 60 | mlp_nodes: [512, 512] 61 | activation: silu 62 | norm: layer 63 | linear_bias: false 64 | 65 | encoder_model: 66 | _target_: robobase.models.EncoderCNNMultiViewDownsampleWithStrides 67 | _partial_: true 68 | input_shape: ??? 69 | num_downsample_convs: 1 70 | num_post_downsample_convs: 3 71 | channels: 32 72 | kernel_size: 3 73 | channels_multiplier: 1 74 | padding: 0 75 | activation: silu 76 | norm: layer_for_cnn 77 | 78 | view_fusion_model: 79 | _target_: robobase.models.FusionMultiCamFeature 80 | _partial_: true 81 | input_shape: ??? 82 | mode: flatten 83 | -------------------------------------------------------------------------------- /robobase/cfgs/method/diffusion.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | action_sequence: 20 3 | 4 | method: 5 | _target_: robobase.method.diffusion.Diffusion 6 | is_rl: false 7 | lr: 1e-4 8 | num_train_steps: ${num_pretrain_steps} 9 | num_diffusion_iters: 50 10 | adaptive_lr: true 11 | actor_grad_clip: null 12 | 13 | actor_model: 14 | _target_: robobase.models.diffusion_models.ConditionalUnet1D # film_unet 15 | _partial_: true 16 | input_shapes: ??? 17 | output_shape: ??? 18 | sequence_length: ${action_sequence} 19 | diffusion_step_embed_dim: 256 20 | down_dims: [256, 512, 1024] 21 | kernel_size: 5 22 | n_groups: 8 23 | 24 | encoder_model: 25 | _target_: robobase.models.ResNetEncoder 26 | _partial_: true 27 | input_shape: ??? 28 | model: resnet18 29 | 30 | view_fusion_model: 31 | _target_: robobase.models.FusionMultiCamFeature 32 | _partial_: true 33 | input_shape: ??? 34 | mode: flatten 35 | -------------------------------------------------------------------------------- /robobase/cfgs/method/drm.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | method: 4 | _target_: robobase.method.drm.DrM 5 | is_rl: true 6 | num_explore_steps: ${num_explore_steps} 7 | actor_lr: 1e-4 8 | critic_lr: 1e-4 9 | view_fusion_lr: 1e-4 10 | encoder_lr: 1e-4 11 | value_lr: 1e-4 12 | weight_decay: 0.0 13 | num_critics: 2 14 | critic_target_tau: 0.01 15 | stddev_schedule: ${env.stddev_schedule} 16 | stddev_clip: 0.3 17 | use_augmentation: true 18 | actor_grad_clip: null 19 | critic_grad_clip: null 20 | bc_lambda: 0.0 21 | always_bootstrap: false # Always do bootstrap; could be useful for episodic task 22 | action_sequence_network_type: rnn # rnn, mlp 23 | critic_updates_shared_vision_encoder: true 24 | distributional_critic: false 25 | distributional_critic_limit: 20 # v_min / m_max for dist_critic 26 | distributional_critic_atoms: 251 27 | distributional_critic_transform: true # hyperbolic/parabolic transformation to value 28 | t_dormant_ratio: 0.025 29 | dormant_ratio_threshold: 0.2 30 | awaken_exploration_temperature: 0.1 31 | exploitation_temperature: 0.02 32 | exploitation_lam_max: 0.6 33 | exploitation_expectile: 0.9 34 | perturbation_frames: 200000 35 | minimum_perturb_factor: 0.2 36 | maximum_perturb_factor: 0.9 37 | 38 | actor_model: 39 | _target_: robobase.models.MLPWithBottleneckFeaturesAndSequenceOutput 40 | _partial_: true 41 | input_shapes: ??? 42 | output_shape: ??? 43 | num_envs: ??? 44 | num_rnn_layers: 1 45 | rnn_hidden_size: 128 46 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 47 | bottleneck_size: 50 48 | norm_after_bottleneck: true 49 | tanh_after_bottleneck: true 50 | mlp_nodes: [1024, 1024] 51 | output_sequence_network_type: rnn 52 | output_sequence_length: ${action_sequence} 53 | 54 | critic_model: 55 | _target_: robobase.models.MLPWithBottleneckFeatures 56 | _partial_: true 57 | input_shapes: ??? 58 | output_shape: 1 59 | num_envs: ??? 60 | num_rnn_layers: 1 61 | rnn_hidden_size: 128 62 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 63 | bottleneck_size: 50 64 | norm_after_bottleneck: true 65 | tanh_after_bottleneck: true 66 | mlp_nodes: [1024, 1024] 67 | 68 | value_model: 69 | _target_: robobase.models.MLPWithBottleneckFeatures 70 | _partial_: true 71 | input_shapes: ??? 72 | output_shape: 1 73 | num_envs: ??? 74 | num_rnn_layers: 1 75 | rnn_hidden_size: 128 76 | keys_to_bottleneck: [ fused_view_feats, low_dim_obs, time_obs ] 77 | bottleneck_size: 50 78 | norm_after_bottleneck: true 79 | tanh_after_bottleneck: true 80 | mlp_nodes: [ 1024, 1024 ] 81 | 82 | encoder_model: 83 | _target_: robobase.models.EncoderCNNMultiViewDownsampleWithStrides 84 | _partial_: true 85 | input_shape: ??? 86 | num_downsample_convs: 1 87 | num_post_downsample_convs: 3 88 | channels: 32 89 | kernel_size: 3 90 | 91 | view_fusion_model: 92 | _target_: robobase.models.FusionMultiCamFeature 93 | _partial_: true 94 | input_shape: ??? 95 | mode: flatten 96 | -------------------------------------------------------------------------------- /robobase/cfgs/method/drqv2.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | method: 4 | _target_: robobase.method.drqv2.DrQV2 5 | is_rl: true 6 | num_explore_steps: ${num_explore_steps} 7 | actor_lr: 1e-4 8 | critic_lr: 1e-4 9 | view_fusion_lr: 1e-4 10 | encoder_lr: 1e-4 11 | weight_decay: 0.0 12 | num_critics: 2 13 | critic_target_tau: 0.01 14 | stddev_schedule: ${env.stddev_schedule} 15 | stddev_clip: 0.3 16 | use_augmentation: true 17 | actor_grad_clip: null 18 | critic_grad_clip: null 19 | bc_lambda: 0.0 20 | always_bootstrap: false # Always do bootstrap; could be useful for episodic task 21 | action_sequence_network_type: rnn # rnn, mlp 22 | critic_updates_shared_vision_encoder: true 23 | distributional_critic: false 24 | distributional_critic_limit: 20 # v_min / m_max for dist_critic 25 | distributional_critic_atoms: 251 26 | distributional_critic_transform: true # hyperbolic/parabolic transformation to value 27 | 28 | actor_model: 29 | _target_: robobase.models.MLPWithBottleneckFeaturesAndSequenceOutput 30 | _partial_: true 31 | input_shapes: ??? 32 | output_shape: ??? 33 | num_envs: ??? 34 | num_rnn_layers: 1 35 | rnn_hidden_size: 128 36 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 37 | bottleneck_size: 50 38 | norm_after_bottleneck: true 39 | tanh_after_bottleneck: true 40 | mlp_nodes: [1024, 1024] 41 | output_sequence_network_type: rnn 42 | output_sequence_length: ${action_sequence} 43 | 44 | critic_model: 45 | _target_: robobase.models.MLPWithBottleneckFeatures 46 | _partial_: true 47 | input_shapes: ??? 48 | output_shape: 1 49 | num_envs: ??? 50 | num_rnn_layers: 1 51 | rnn_hidden_size: 128 52 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 53 | bottleneck_size: 50 54 | norm_after_bottleneck: true 55 | tanh_after_bottleneck: true 56 | mlp_nodes: [1024, 1024] 57 | 58 | encoder_model: 59 | _target_: robobase.models.EncoderCNNMultiViewDownsampleWithStrides 60 | _partial_: true 61 | input_shape: ??? 62 | num_downsample_convs: 1 63 | num_post_downsample_convs: 3 64 | channels: 32 65 | kernel_size: 3 66 | 67 | view_fusion_model: 68 | _target_: robobase.models.FusionMultiCamFeature 69 | _partial_: true 70 | input_shape: ??? 71 | mode: flatten 72 | -------------------------------------------------------------------------------- /robobase/cfgs/method/edp.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | method: 4 | _target_: robobase.method.edp.DiffusionRL 5 | is_rl: true 6 | 7 | # actor-critic param 8 | num_explore_steps: ${num_explore_steps} 9 | actor_grad_clip: 1 10 | actor_lr: 1e-4 11 | critic_lr: 1e-4 12 | view_fusion_lr: 1e-4 13 | encoder_lr: 1e-4 14 | num_critics: 2 15 | critic_target_tau: 0.01 16 | critic_grad_clip: 1 17 | bc_lambda: 0.0 18 | always_bootstrap: false # Always do bootstrap; could be useful for episodic task 19 | action_sequence_network_type: rnn # rnn, mlp 20 | critic_updates_shared_vision_encoder: true 21 | distributional_critic: false 22 | distributional_critic_limit: 20 # v_min / m_max for dist_critic 23 | distributional_critic_atoms: 251 24 | distributional_critic_transform: true # hyperbolic/parabolic transformation to value 25 | actor_update_method: "iql" 26 | 27 | # diffusion param 28 | solver_type: "DDPM" 29 | num_diffusion_iters: 100 30 | beta_schedule: "linear" 31 | 32 | # iql param 33 | expectile: 0.7 34 | awr_temperature: 3.0 35 | 36 | # dql param 37 | dql_alpha: 2.0 38 | 39 | # edp 40 | diff_coeff: 1.0 41 | guide_coeff: 1.0 42 | learnable_std: false 43 | 44 | actor_model: 45 | _target_: robobase.models.diffusion_models.MLPWithBottleneckFeaturesForDiffusion 46 | _partial_: true 47 | input_shapes: ??? 48 | output_shape: ??? 49 | num_envs: ??? 50 | num_rnn_layers: 1 51 | rnn_hidden_size: 128 52 | keys_to_bottleneck: [actions, obs_features] 53 | bottleneck_size: 256 54 | norm_after_bottleneck: true 55 | tanh_after_bottleneck: true 56 | mlp_nodes: [1024, 1024] 57 | sequence_length: ${action_sequence} 58 | diffusion_step_embed_dim: 16 59 | 60 | critic_model: 61 | _target_: robobase.models.MLPWithBottleneckFeatures 62 | _partial_: true 63 | input_shapes: ??? 64 | output_shape: 1 65 | num_envs: ??? 66 | num_rnn_layers: 1 67 | rnn_hidden_size: 128 68 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 69 | bottleneck_size: 50 70 | norm_after_bottleneck: true 71 | tanh_after_bottleneck: true 72 | mlp_nodes: [1024, 1024] 73 | 74 | encoder_model: 75 | _target_: robobase.models.EncoderCNNMultiViewDownsampleWithStrides 76 | _partial_: true 77 | input_shape: ??? 78 | num_downsample_convs: 1 79 | num_post_downsample_convs: 3 80 | channels: 32 81 | kernel_size: 3 82 | 83 | view_fusion_model: 84 | _target_: robobase.models.FusionMultiCamFeature 85 | _partial_: true 86 | input_shape: ??? 87 | mode: flatten 88 | -------------------------------------------------------------------------------- /robobase/cfgs/method/iql_drqv2.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | method: 4 | _target_: robobase.method.iql_drqv2.IQLDrQV2 5 | is_rl: true 6 | num_explore_steps: ${num_explore_steps} 7 | actor_lr: 1e-4 8 | critic_lr: 1e-4 9 | view_fusion_lr: 1e-4 10 | encoder_lr: 1e-4 11 | weight_decay: 0.0 12 | num_critics: 2 13 | critic_target_tau: 0.01 14 | stddev_schedule: ${env.stddev_schedule} 15 | stddev_clip: 0.3 16 | use_augmentation: true 17 | actor_grad_clip: null 18 | critic_grad_clip: null 19 | bc_lambda: 0.0 20 | always_bootstrap: false # Always do bootstrap; could be useful for episodic task 21 | action_sequence_network_type: rnn # rnn, mlp 22 | expectile: 0.7 23 | critic_updates_shared_vision_encoder: true 24 | distributional_critic: false 25 | distributional_critic_limit: 20 # v_min / m_max for dist_critic 26 | distributional_critic_atoms: 251 27 | distributional_critic_transform: true # hyperbolic/parabolic transformation to value 28 | actor_loss_type: ddpg 29 | awr_temperature: 3.0 30 | 31 | actor_model: 32 | _target_: robobase.models.MLPWithBottleneckFeaturesAndSequenceOutput 33 | _partial_: true 34 | input_shapes: ??? 35 | output_shape: ??? 36 | num_envs: ??? 37 | num_rnn_layers: 1 38 | rnn_hidden_size: 128 39 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 40 | bottleneck_size: 50 41 | norm_after_bottleneck: true 42 | tanh_after_bottleneck: true 43 | mlp_nodes: [1024, 1024] 44 | output_sequence_network_type: rnn 45 | output_sequence_length: ${action_sequence} 46 | 47 | critic_model: 48 | _target_: robobase.models.MLPWithBottleneckFeatures 49 | _partial_: true 50 | input_shapes: ??? 51 | output_shape: 1 52 | num_envs: ??? 53 | num_rnn_layers: 1 54 | rnn_hidden_size: 128 55 | keys_to_bottleneck: [fused_view_feats, low_dim_obs, time_obs] 56 | bottleneck_size: 50 57 | norm_after_bottleneck: true 58 | tanh_after_bottleneck: true 59 | mlp_nodes: [1024, 1024] 60 | 61 | encoder_model: 62 | _target_: robobase.models.EncoderCNNMultiViewDownsampleWithStrides 63 | _partial_: true 64 | input_shape: ??? 65 | num_downsample_convs: 1 66 | num_post_downsample_convs: 3 67 | channels: 32 68 | kernel_size: 3 69 | 70 | view_fusion_model: 71 | _target_: robobase.models.FusionMultiCamFeature 72 | _partial_: true 73 | input_shape: ??? 74 | mode: flatten 75 | -------------------------------------------------------------------------------- /robobase/cfgs/method/sac_lix.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | method: 4 | _target_: robobase.method.sac_lix.SACLix 5 | is_rl: true 6 | num_explore_steps: ${num_explore_steps} 7 | actor_lr: 1e-4 8 | critic_lr: 1e-4 9 | view_fusion_lr: 1e-4 10 | encoder_lr: 1e-4 11 | weight_decay: 0.0 12 | num_critics: 2 13 | critic_target_tau: 0.01 14 | actor_grad_clip: null 15 | critic_grad_clip: null 16 | always_bootstrap: false # Always do bootstrap; could be useful for episodic task 17 | bc_lambda: 0.0 18 | action_sequence_network_type: rnn # rnn, mlp 19 | critic_updates_shared_vision_encoder: true 20 | alpha_lr: 1e-4 21 | init_temperature: 0.1 22 | distributional_critic: false 23 | distributional_critic_limit: 20 # v_min / m_max for dist_critic 24 | distributional_critic_atoms: 251 25 | distributional_critic_transform: true # hyperbolic/parabolic transformation to value 26 | 27 | actor_model: 28 | _target_: robobase.models.MLPWithBottleneckFeaturesAndSequenceOutput 29 | _partial_: true 30 | input_shapes: ??? 31 | output_shape: ??? 32 | num_envs: ??? 33 | num_rnn_layers: 1 34 | rnn_hidden_size: 128 35 | keys_to_bottleneck: [ fused_view_feats, low_dim_obs, time_obs ] 36 | bottleneck_size: 50 37 | norm_after_bottleneck: true 38 | tanh_after_bottleneck: true 39 | mlp_nodes: [ 1024, 1024 ] 40 | output_sequence_network_type: rnn 41 | output_sequence_length: ${action_sequence} 42 | 43 | critic_model: 44 | _target_: robobase.models.MLPWithBottleneckFeatures 45 | _partial_: true 46 | input_shapes: ??? 47 | output_shape: 1 48 | num_envs: ??? 49 | num_rnn_layers: 1 50 | rnn_hidden_size: 128 51 | keys_to_bottleneck: [ fused_view_feats, low_dim_obs, time_obs ] 52 | bottleneck_size: 50 53 | norm_after_bottleneck: true 54 | tanh_after_bottleneck: true 55 | mlp_nodes: [ 1024, 1024 ] 56 | 57 | encoder_model: 58 | _target_: robobase.models.lix_utils.analysis_modules.EncoderAllFeatTiedRegularizedCNNMultiViewDownsampleWithStrides 59 | _partial_: true 60 | input_shape: ??? 61 | num_downsample_convs: 1 62 | num_post_downsample_convs: 3 63 | channels: 32 64 | kernel_size: 3 65 | 66 | view_fusion_model: 67 | _target_: robobase.models.FusionMultiCamFeature 68 | _partial_: true 69 | input_shape: ??? 70 | mode: flatten 71 | -------------------------------------------------------------------------------- /robobase/cfgs/method/value_based.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | method: 4 | _target_: robobase.method.value_based.ValueBased 5 | is_rl: true 6 | num_explore_steps: ${num_explore_steps} 7 | critic_lr: 1e-4 8 | view_fusion_lr: 1e-4 9 | encoder_lr: 1e-4 10 | weight_decay: 0.0 11 | num_critics: 1 # Not supported 12 | critic_target_tau: 0.01 13 | stddev_schedule: 0.1 14 | critic_grad_clip: null 15 | use_dueling: false 16 | always_bootstrap: false # Always do bootstrap; could be useful for episodic task 17 | bc_lambda: 0.0 18 | bc_margin: 0.1 # If > 0.0, encourage expert action's Q(s, a*) be higher than other actions' Q(s, a) 19 | use_target_network_for_rollout: false 20 | num_update_steps: 1 # If > 1, take N updates inside a single `update` call 21 | use_augmentation: true 22 | use_torch_compile: false # Only works for torch >= 2.2.0 23 | bins: 5 24 | 25 | advantage_model: 26 | _target_: robobase.models.MLPWithBottleneckFeatures 27 | _partial_: true 28 | input_shapes: ??? 29 | output_shape: ??? 30 | num_envs: ??? 31 | num_rnn_layers: 1 32 | rnn_hidden_size: 128 33 | keys_to_bottleneck: [fused_view_feats] 34 | bottleneck_size: 64 35 | norm_after_bottleneck: true 36 | tanh_after_bottleneck: true 37 | mlp_nodes: [512, 512] 38 | activation: silu 39 | norm: layer 40 | 41 | value_model: 42 | _target_: robobase.models.MLPWithBottleneckFeatures 43 | _partial_: true 44 | input_shapes: ??? 45 | output_shape: ??? 46 | num_envs: ??? 47 | num_rnn_layers: 1 48 | rnn_hidden_size: 128 49 | keys_to_bottleneck: [fused_view_feats] 50 | bottleneck_size: 64 51 | norm_after_bottleneck: true 52 | tanh_after_bottleneck: true 53 | mlp_nodes: [512, 512] 54 | activation: silu 55 | norm: layer 56 | 57 | encoder_model: 58 | _target_: robobase.models.EncoderCNNMultiViewDownsampleWithStrides 59 | _partial_: true 60 | input_shape: ??? 61 | num_downsample_convs: 1 62 | num_post_downsample_convs: 3 63 | channels: 32 64 | kernel_size: 3 65 | channels_multiplier: 1 66 | padding: 0 67 | activation: silu 68 | norm: layer_for_cnn 69 | 70 | view_fusion_model: 71 | _target_: robobase.models.FusionMultiCamFeature 72 | _partial_: true 73 | input_shape: ??? 74 | mode: flatten 75 | -------------------------------------------------------------------------------- /robobase/cfgs/robobase_config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - env: null 4 | - method: null 5 | - intrinsic_reward_module: null 6 | - launch: null 7 | - override hydra/launcher: joblib 8 | 9 | # Universal settings 10 | create_train_env: true 11 | num_train_envs: 1 12 | replay_size_before_train: 2000 13 | num_pretrain_steps: 0 14 | num_train_frames: 1100000 15 | eval_every_steps: 10000 16 | num_eval_episodes: 10 17 | update_every_steps: 2 18 | num_explore_steps: 2000 19 | save_snapshot: false 20 | snapshot_every_n: 1000 21 | batch_size: 256 22 | is_imitation_learning: false 23 | 24 | # Demonstration settings 25 | demos: 0 26 | demo_batch_size: null # If set to > 0, introduce a separate buffer for demos 27 | use_self_imitation: false # When using a separate buffer for demos, If set to True, save successful (online) trajectories into the separate demo buffer 28 | 29 | # Observation settings 30 | pixels: false 31 | visual_observation_shape: [84, 84] 32 | frame_stack: 1 33 | frame_stack_on_channel: true 34 | use_onehot_time_and_no_bootstrap: false 35 | 36 | # Action settings 37 | action_repeat: 1 38 | action_sequence: 1 # ActionSequenceWrapper 39 | execution_length: 1 # If execution_length < action_sequence, we use receding horizon control 40 | temporal_ensemble: true # Temporal ensemling only applicable to action sequence > 1 41 | temporal_ensemble_gain: 0.01 42 | use_standardization: false # Demo-based standardization for action space 43 | use_min_max_normalization: false # Demo-based min-max normalization for action space 44 | min_max_margin: 0.0 # If set to > 0, introduce margin for demo-driven min-max normalization 45 | norm_obs: false 46 | 47 | # Replay buffer settings 48 | replay: 49 | prioritization: false 50 | size: 1000000 51 | gamma: 0.99 52 | demo_size: null 53 | save_dir: null 54 | nstep: 3 55 | num_workers: 4 56 | pin_memory: true 57 | alpha: 0.7 # prioritization 58 | beta: 0.5 # prioritization 59 | sequential: false 60 | transition_seq_len: 1 # The length of transition sequence returned from sample() call. Only applicable if sequential is True 61 | 62 | # logging settings 63 | wandb: # weight and bias 64 | use: true 65 | project: act_bigym 66 | entity: younggyo 67 | name: act 68 | 69 | tb: # TensorBoard 70 | use: false 71 | log_dir: /tmp/robobase_tb_logs 72 | name: null 73 | 74 | # Misc 75 | experiment_name: exp 76 | seed: 1 77 | num_gpus: 1 78 | log_every: 1000 79 | log_train_video: false 80 | log_eval_video: true 81 | log_pretrain_every: 100 82 | save_csv: false 83 | 84 | hydra: 85 | run: 86 | dir: ./exp_local/${now:%Y.%m.%d}/${now:%H%M%S}_${hydra.job.override_dirname} 87 | sweep: 88 | dir: ./exp_local/${now:%Y.%m.%d}/${now:%H%M}_${hydra.job.override_dirname} 89 | subdir: ${hydra.job.num} 90 | -------------------------------------------------------------------------------- /robobase/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robobase-org/robobase/411b7c7a40272dcaa7eed0ea2459af07002d53e5/robobase/envs/__init__.py -------------------------------------------------------------------------------- /robobase/envs/env.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import gymnasium as gym 3 | from omegaconf import DictConfig 4 | 5 | 6 | class Demo(list): 7 | def __init__(self, transition_tuples: List[tuple]): 8 | super().__init__(transition_tuples) 9 | 10 | 11 | class EnvFactory: 12 | def make_train_env(self, cfg: DictConfig) -> gym.vector.VectorEnv: 13 | pass 14 | 15 | def make_eval_env(self, cfg: DictConfig) -> gym.Env: 16 | pass 17 | 18 | def collect_or_fetch_demos(self, cfg: DictConfig, num_demos: int): 19 | """Collect demonstrations or fetch stored demonstrations. 20 | 21 | Args: 22 | cfg (DictConfig): Config 23 | num_demos (int): Number of demonstrations to fetch or collect 24 | """ 25 | raise NotImplementedError("This env does not support demo loading.") 26 | 27 | def post_collect_or_fetch_demos(self, cfg: DictConfig): 28 | """Post-process demonstrations after collecting or storing them. 29 | This is required for a case when such post-processing needs some 30 | information from environments, which were often not available when 31 | we call `collect_or_fetch_demos` 32 | 33 | Args: 34 | cfg (DictConfig): Config 35 | """ 36 | raise NotImplementedError("This env does not support demo loading.") 37 | 38 | def load_demos_into_replay(self, cfg: DictConfig, buffer): 39 | """Load the collected or fetched demos into the replay buffer. 40 | 41 | Args: 42 | cfg (DictConfig): Config 43 | buffer (_type_): Replay buffer to save the demonstrations. 44 | """ 45 | raise NotImplementedError("This env does not support demo loading.") 46 | 47 | 48 | class DemoEnv(gym.Env): 49 | def __init__(self, demos: List[Demo], action_space, observation_space): 50 | """Init. 51 | 52 | Args: 53 | demos: A list of demos 54 | """ 55 | self.action_space = action_space 56 | self.observation_space = observation_space 57 | self.is_demo_env = True 58 | self._active_demo = [] 59 | self._loaded_demos = demos 60 | 61 | def modify_actions(self): 62 | pass 63 | 64 | def render(self): 65 | raise NotImplementedError("Not supported for demo env.") 66 | 67 | def step(self, action): 68 | return self._active_demo.pop(0) 69 | 70 | def reset(self, seed=None, options=None): 71 | self._active_demo = self._loaded_demos.pop(0) 72 | return self._active_demo.pop(0) 73 | -------------------------------------------------------------------------------- /robobase/envs/utils/bigym_utils.py: -------------------------------------------------------------------------------- 1 | from bigym.envs.reach_target import ReachTarget, ReachTargetDual, ReachTargetSingle 2 | from bigym.envs.move_plates import MovePlate, MoveTwoPlates 3 | from bigym.envs.cupboards import ( 4 | CupboardsOpenAll, 5 | CupboardsCloseAll, 6 | WallCupboardOpen, 7 | WallCupboardClose, 8 | DrawerTopOpen, 9 | DrawerTopClose, 10 | DrawersAllOpen, 11 | DrawersAllClose, 12 | ) 13 | from bigym.envs.dishwasher import ( 14 | DishwasherOpen, 15 | DishwasherClose, 16 | DishwasherOpenTrays, 17 | DishwasherCloseTrays, 18 | ) 19 | from bigym.envs.dishwasher_cups import ( 20 | DishwasherLoadCups, 21 | DishwasherUnloadCups, 22 | DishwasherUnloadCupsLong, 23 | ) 24 | from bigym.envs.dishwasher_cutlery import ( 25 | DishwasherLoadCutlery, 26 | DishwasherUnloadCutlery, 27 | DishwasherUnloadCutleryLong, 28 | ) 29 | from bigym.envs.dishwasher_plates import ( 30 | DishwasherLoadPlates, 31 | DishwasherUnloadPlates, 32 | DishwasherUnloadPlatesLong, 33 | ) 34 | from bigym.envs.pick_and_place import ( 35 | PutCups, 36 | TakeCups, 37 | PickBox, 38 | SaucepanToHob, 39 | StoreKitchenware, 40 | ToastSandwich, 41 | FlipSandwich, 42 | RemoveSandwich, 43 | StoreBox, 44 | ) 45 | from bigym.envs.manipulation import FlipCup, FlipCutlery, StackBlocks 46 | from bigym.envs.groceries import GroceriesStoreLower, GroceriesStoreUpper 47 | 48 | TASK_MAP = dict( 49 | reach_target_single=ReachTargetSingle, # 2000, 10, enable_all_floating_dofs=False 50 | reach_target_multi_modal=ReachTarget, # 3000, 10, enable_all_floating_dofs=False 51 | reach_target_dual=ReachTargetDual, # 3000, 10, enable_all_floating_dofs=False 52 | stack_blocks=StackBlocks, # 28500, 25 53 | move_plate=MovePlate, # 3000, 10 54 | move_two_plates=MoveTwoPlates, # 5500, 10 55 | flip_cup=FlipCup, # 5500, 10 56 | flip_cutlery=FlipCutlery, # 12500, 25 57 | dishwasher_open=DishwasherOpen, # 7500, 20 58 | dishwasher_close=DishwasherClose, # 7500, 20 59 | dishwasher_open_trays=DishwasherOpenTrays, # 9500, 25 60 | dishwasher_close_trays=DishwasherCloseTrays, # 7500, 25 61 | dishwasher_load_cups=DishwasherLoadCups, # 7500, 10 62 | dishwasher_unload_cups=DishwasherUnloadCups, # 10000, 25 63 | dishwasher_unload_cups_long=DishwasherUnloadCupsLong, # 18000, 25 64 | dishwasher_load_cutlery=DishwasherLoadCutlery, # 7000, 10 65 | dishwasher_unload_cutlery=DishwasherUnloadCutlery, # 15500, 25 66 | dishwasher_unload_cutlery_long=DishwasherUnloadCutleryLong, # 18000, 25 67 | dishwasher_load_plates=DishwasherLoadPlates, # 14000, 25 68 | dishwasher_unload_plates=DishwasherUnloadPlates, # 20000, 25 69 | dishwasher_unload_plates_long=DishwasherUnloadPlatesLong, # 26000, 25 70 | drawer_top_open=DrawerTopOpen, # 5000, 10 71 | drawer_top_close=DrawerTopClose, # 3000, 10 72 | drawers_open_all=DrawersAllOpen, # 12000, 25 73 | drawers_close_all=DrawersAllClose, # 5000, 25 74 | wall_cupboard_open=WallCupboardOpen, # 6000, 20 75 | wall_cupboard_close=WallCupboardClose, # 3000, 10 76 | cupboards_open_all=CupboardsOpenAll, # 22500, 25 77 | cupboards_close_all=CupboardsCloseAll, # 15500, 25 78 | take_cups=TakeCups, # 10500, 25 79 | put_cups=PutCups, # 8500, 20 80 | pick_box=PickBox, # 13500, 25 81 | store_box=StoreBox, # 15000, 25 82 | saucepan_to_hob=SaucepanToHob, # 11000, 25 83 | store_kitchenware=StoreKitchenware, # 20000, 25 84 | sandwich_toast=ToastSandwich, # 16500, 25 85 | sandwich_flip=FlipSandwich, # 15500, 25 86 | sandwich_remove=RemoveSandwich, # 13500, 25 87 | store_groceries_lower=GroceriesStoreLower, # 32000, 25 88 | store_groceries_upper=GroceriesStoreUpper, # 19000, 25 89 | ) 90 | -------------------------------------------------------------------------------- /robobase/envs/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from robobase.envs.wrappers.concat_dim import ConcatDim 2 | from robobase.envs.wrappers.frame_stack import FrameStack 3 | from robobase.envs.wrappers.onehot_time import OnehotTime 4 | from robobase.envs.wrappers.rescale_from_tanh import ( 5 | RescaleFromTanh, 6 | RescaleFromTanhEEPose, 7 | RescaleFromTanhWithStandardization, 8 | RescaleFromTanhWithMinMax, 9 | ) 10 | from robobase.envs.wrappers.transpose_image_chw import TransposeImageCHW 11 | from robobase.envs.wrappers.action_sequence import ( 12 | ActionSequence, 13 | RecedingHorizonControl, 14 | ) 15 | from robobase.envs.wrappers.append_demo_info import AppendDemoInfo 16 | from robobase.envs.wrappers.reward_modifiers import ( 17 | ClipReward, 18 | ScaleReward, 19 | ShapeRewards, 20 | ) 21 | 22 | __all__ = [ 23 | "ConcatDim", 24 | "FrameStack", 25 | "OnehotTime", 26 | "RescaleFromTanh", 27 | "RescaleFromTanhEEPose", 28 | "RescaleFromTanhWithStandardization", 29 | "RescaleFromTanhWithMinMax", 30 | "TransposeImageCHW", 31 | "ScaleReward", 32 | "ShapeRewards", 33 | "ClipReward", 34 | "ActionSequence", 35 | "AppendDemoInfo", 36 | "RecedingHorizonControl", 37 | ] 38 | -------------------------------------------------------------------------------- /robobase/envs/wrappers/append_demo_info.py: -------------------------------------------------------------------------------- 1 | """Append Demo info.""" 2 | import gymnasium as gym 3 | import numpy as np 4 | 5 | 6 | class AppendDemoInfo(gym.Wrapper, gym.utils.RecordConstructorArgs): 7 | """Append a demo flag to the info dict.""" 8 | 9 | def __init__(self, env: gym.Env): 10 | """Init. 11 | 12 | Args: 13 | env: The environment to apply the wrapper 14 | """ 15 | gym.utils.RecordConstructorArgs.__init__(self) 16 | gym.Wrapper.__init__(self, env) 17 | self.is_vector_env = getattr(env, "is_vector_env", False) 18 | 19 | def _modify_info(self, info): 20 | if "demo" not in info: 21 | if self.is_vector_env: 22 | info["demo"] = np.zeros((self.num_envs,)) 23 | else: 24 | info["demo"] = 0 25 | return info 26 | 27 | def reset(self, *args, **kwargs): 28 | """See base.""" 29 | obs, info = self.env.reset(*args, **kwargs) 30 | return obs, self._modify_info(info) 31 | 32 | def step(self, action): 33 | """See base.""" 34 | *rest, info = self.env.step(action) 35 | return *rest, self._modify_info(info) 36 | -------------------------------------------------------------------------------- /robobase/envs/wrappers/concat_dim.py: -------------------------------------------------------------------------------- 1 | """Concatenates dictionary of observations that share same shape.""" 2 | import numpy as np 3 | 4 | import gymnasium as gym 5 | from gymnasium.spaces import Box, Dict 6 | 7 | 8 | class ConcatDim(gym.ObservationWrapper, gym.utils.RecordConstructorArgs): 9 | """Concatenates dictionary of observations that share same shape.""" 10 | 11 | def __init__( 12 | self, 13 | env: gym.Env, 14 | shape_length: int, 15 | dim: int, 16 | new_name: str, 17 | norm_obs: bool = False, 18 | obs_stats: dict = None, 19 | keys_to_ignore: list[str] = None, 20 | ): 21 | """Init. 22 | 23 | Args: 24 | env: The environment to apply the wrapper 25 | shape_length: The ndim we are interested in, e.g. images=3, low_dim=1. 26 | dim: The oberservations with this ... 27 | new_name: The name of the new observation. 28 | norm_obs: Whether to normalize observations. 29 | obs_stats: The obs statistics for normalizing observations. 30 | keys_to_ignore: A list of keys to not include in this combined observation, 31 | regardless if they meet shape_len. 32 | """ 33 | gym.utils.RecordConstructorArgs.__init__(self) 34 | gym.ObservationWrapper.__init__(self, env) 35 | self.is_vector_env = getattr(env, "is_vector_env", False) 36 | self._shape_length = shape_length + int(self.is_vector_env) 37 | self._dim = dim + int(self.is_vector_env) 38 | self._new_name = new_name 39 | self._keys_to_ignore = [] if keys_to_ignore is None else keys_to_ignore 40 | self._norm_obs = norm_obs 41 | self._obs_stats = obs_stats 42 | new_obs_dict = {} 43 | combined = [] 44 | for k, v in self.observation_space.items(): 45 | if len(v.shape) == self._shape_length and k not in self._keys_to_ignore: 46 | combined.append(v) 47 | else: 48 | new_obs_dict[k] = v 49 | new_min = np.concatenate(list(map(lambda s: s.low, combined)), self._dim) 50 | new_max = np.concatenate(list(map(lambda s: s.high, combined)), self._dim) 51 | new_obs_dict[new_name] = Box(new_min, new_max, dtype=np.float32) 52 | self.observation_space = Dict(new_obs_dict) 53 | 54 | def _transform_timestep(self, observation, final: bool = False): 55 | shape_len = self._shape_length - int(final) 56 | dim = self._dim - int(final) 57 | new_obs = {} 58 | combined = [] 59 | for k, v in observation.items(): 60 | # We allow normalizing observations in the ConcatDim wrapper 61 | # because all obs stats are stored with original key names and 62 | # ConcatDim will rename them to new keys. Doing it here would 63 | # safer and cleaner. 64 | if len(v.shape) == shape_len and k not in self._keys_to_ignore: 65 | if self._norm_obs and k in self._obs_stats: 66 | v = (v - self._obs_stats["mean"][k]) / self._obs_stats["std"][k] 67 | combined.append(v) 68 | else: 69 | new_obs[k] = v 70 | new_obs[self._new_name] = np.concatenate(combined, dim) 71 | return new_obs 72 | 73 | def observation(self, observation): 74 | """Adds to the observation with the current time step. 75 | 76 | Args: 77 | observation: The observation to add the time step to 78 | 79 | Returns: 80 | The observation with the time step appended to 81 | """ 82 | return self._transform_timestep(observation) 83 | 84 | def step(self, action): 85 | """Steps through the environment, incrementing the time step. 86 | 87 | Args: 88 | action: The action to take 89 | 90 | Returns: 91 | The environment's step using the action. 92 | """ 93 | observations, *rest, info = super().step(action) 94 | if "final_observation" in info: 95 | for fidx in np.where(info["_final_observation"])[0]: 96 | info["final_observation"][fidx] = self._transform_timestep( 97 | info["final_observation"][fidx], final=True 98 | ) 99 | return self.observation(observations), *rest, info 100 | -------------------------------------------------------------------------------- /robobase/envs/wrappers/frame_stack.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | import gymnasium as gym 5 | from gymnasium import spaces 6 | 7 | 8 | class FrameStack(gym.ObservationWrapper, gym.utils.RecordConstructorArgs): 9 | """Observation wrapper that stacks the observations in a rolling manner. 10 | 11 | For example, if the number of stacks is 4, then the returned observation contains 12 | the most recent 4 observations. For environment 'Pendulum-v1', the original 13 | observation is an array with shape [3], so if we stack 4 observations, the 14 | processed observation 15 | has shape [4, 3]. 16 | 17 | Note: 18 | - After :meth:`reset` is called, the frame buffer will be filled with the 19 | initial observation. I.e. the observation returned by :meth:`reset` will 20 | consist of `num_stack` many identical frames. 21 | 22 | Example: 23 | >>> import gymnasium as gym 24 | >>> from gymnasium.wrappers import FrameStack 25 | >>> env = gym.make("CarRacing-v2") 26 | >>> env = FrameStack(env, 4) 27 | >>> env.observation_space 28 | Box(0, 255, (4, 96, 96, 3), uint8) 29 | >>> obs, _ = env.reset() 30 | >>> obs.shape 31 | (4, 96, 96, 3) 32 | """ 33 | 34 | def __init__( 35 | self, 36 | env: gym.Env, 37 | num_stack: int, 38 | ): 39 | """Observation wrapper that stacks the observations in a rolling manner. 40 | 41 | Args: 42 | env (Env): The environment to apply the wrapper 43 | num_stack (int): The number of frames to stack 44 | """ 45 | gym.utils.RecordConstructorArgs.__init__(self, num_stack=num_stack) 46 | gym.ObservationWrapper.__init__(self, env) 47 | self.is_vector_env = getattr(env, "is_vector_env", False) 48 | self.num_stack = num_stack 49 | self.frames = {} 50 | new_obs_dict = {} 51 | for name in self.observation_space.keys(): 52 | orig_space = env.observation_space[name] 53 | self._axis = axis = 0 54 | shape = (num_stack,) + orig_space.shape 55 | if self.is_vector_env: 56 | self._axis = axis = 1 57 | shape = orig_space.shape[:1] + (num_stack,) + orig_space.shape[1:] 58 | new_obs_dict[name] = spaces.Box( 59 | np.expand_dims(orig_space.low, axis).repeat(num_stack, axis), 60 | np.expand_dims(orig_space.high, axis).repeat(num_stack, axis), 61 | shape=shape, 62 | dtype=orig_space.dtype, 63 | ) 64 | self.frames[name] = np.zeros_like(new_obs_dict[name].sample()) 65 | self.observation_space = spaces.Dict(new_obs_dict) 66 | 67 | def _add_frame(self, observation): 68 | for name, value in observation.items(): 69 | if self.is_vector_env: 70 | self.frames[name] = np.concatenate( 71 | [self.frames[name][:, 1:], np.expand_dims(value, 1)], 1 72 | ) 73 | else: 74 | self.frames[name] = np.concatenate([self.frames[name][1:], [value]], 0) 75 | 76 | def _add_frame_at_idx(self, observation, idx: int = None): 77 | for name, value in observation.items(): 78 | self.frames[name][idx] = np.concatenate( 79 | [self.frames[name][idx, 1:], np.expand_dims(value, 0)], 0 80 | ) 81 | 82 | def observation(self, observation): 83 | return deepcopy(self.frames) 84 | 85 | def step(self, action): 86 | """Steps through the environment, appending the observation to the frame buffer. 87 | 88 | Args: 89 | action: The action to step through the environment with 90 | 91 | Returns: 92 | Stacked observations, reward, terminated, truncated, and information 93 | from the environment 94 | """ 95 | observation, reward, terminated, truncated, info = self.env.step(action) 96 | if "final_observation" in info: 97 | for fidx in np.where(info["_final_observation"])[0]: 98 | self._add_frame_at_idx(info["final_observation"][fidx], fidx) 99 | info["final_observation"][fidx] = { 100 | k: v[fidx] 101 | for k, v in self.observation(info["final_observation"]).items() 102 | } 103 | single_agent_obs = {k: v[fidx] for k, v in observation.items()} 104 | [ 105 | self._add_frame_at_idx(single_agent_obs, fidx) 106 | for _ in range(self.num_stack) 107 | ] 108 | self._add_frame(observation) 109 | return self.observation(observation), reward, terminated, truncated, info 110 | 111 | def reset(self, **kwargs): 112 | """Reset the environment with kwargs. 113 | 114 | Args: 115 | **kwargs: The kwargs for the environment reset 116 | 117 | Returns: 118 | The stacked observations 119 | """ 120 | obs, info = self.env.reset(**kwargs) 121 | [self._add_frame(obs) for _ in range(self.num_stack)] 122 | return self.observation(obs), info 123 | -------------------------------------------------------------------------------- /robobase/envs/wrappers/onehot_time.py: -------------------------------------------------------------------------------- 1 | """Wrapper for adding time aware observations to environment observation.""" 2 | import numpy as np 3 | 4 | import gymnasium as gym 5 | from gymnasium.spaces import Box, Dict 6 | 7 | 8 | class OnehotTime(gym.ObservationWrapper, gym.utils.RecordConstructorArgs): 9 | """Augment the observation with the current time step in the episode. 10 | Example: 11 | >>> import gymnasium as gym 12 | >>> from gymnasium.wrappers import TimeAwareObservation 13 | >>> env = gym.make("CartPole-v1") 14 | >>> env = OnehotTime(env, episode_length=2) 15 | >>> env.reset(seed=42) 16 | (array([ 0.0273956 , -0.00611216, 0.03585979, 0.0197368 , 17 | 1. , 0. , 0. ]), {}) 18 | >>> _ = env.action_space.seed(42) 19 | >>> env.step(env.action_space.sample())[0] 20 | array([ 0.02727336, -0.20172954, 0.03625453, 0.32351476, 21 | 0 , 1. , 0. ]) 22 | """ 23 | 24 | PADDING = 2 25 | 26 | def __init__(self, env: gym.Env, episode_length: int): 27 | """Init. 28 | 29 | Args: 30 | env: The environment to apply the wrapper 31 | episode_length: The environment episode length 32 | """ 33 | gym.utils.RecordConstructorArgs.__init__(self) 34 | gym.ObservationWrapper.__init__(self, env) 35 | self._episode_length = episode_length 36 | self.is_vector_env = getattr(env, "is_vector_env", False) 37 | extra_dim = (self.num_envs,) if self.is_vector_env else () 38 | if isinstance(self.observation_space, Box): 39 | low = np.append( 40 | self.observation_space.low, 41 | extra_dim + tuple([0.0] * (episode_length + OnehotTime.PADDING)), 42 | ) 43 | high = np.append( 44 | self.observation_space.high, 45 | extra_dim + tuple([1.0] * (episode_length + OnehotTime.PADDING)), 46 | ) 47 | self.observation_space = Box(low, high, dtype=np.float32) 48 | elif isinstance(self.observation_space, Dict): 49 | self.observation_space["time"] = Box( 50 | 0, 1, extra_dim + (episode_length + OnehotTime.PADDING,), dtype=np.uint8 51 | ) 52 | else: 53 | raise ValueError("Unsupported space.") 54 | self._eye = np.eye(self._episode_length + OnehotTime.PADDING).astype(np.uint8) 55 | self._reset_t() 56 | 57 | def _reset_t(self): 58 | self._t = np.array([0], dtype=int) 59 | if self.is_vector_env: 60 | self._t = np.zeros( 61 | ( 62 | 1, 63 | self.num_envs, 64 | ), 65 | dtype=int, 66 | ) 67 | 68 | def _transform_timestep(self, observation, t: np.ndarray = None): 69 | t = self._t if t is None else t 70 | observation["time"] = self._eye[t][0] 71 | return observation 72 | 73 | def observation(self, observation): 74 | """Adds to the observation with the current time step. 75 | 76 | Args: 77 | observation: The observation to add the time step to 78 | 79 | Returns: 80 | The observation with the time step appended to 81 | """ 82 | return self._transform_timestep(observation) 83 | 84 | def step(self, action): 85 | """Steps through the environment, incrementing the time step. 86 | 87 | Args: 88 | action: The action to take 89 | 90 | Returns: 91 | The environment's step using the action. 92 | """ 93 | self._t += 1 94 | observations, rewards, terminations, truncations, info = self.env.step(action) 95 | if "final_observation" in info: 96 | for fidx in np.where(info["_final_observation"])[0]: 97 | info["final_observation"][fidx] = self._transform_timestep( 98 | info["final_observation"][fidx], self._t[:, fidx] 99 | ) 100 | self._t[:, fidx] = 0 101 | observations = self._transform_timestep(observations) 102 | return ( 103 | observations, 104 | rewards, 105 | np.logical_or( 106 | terminations, truncations 107 | ), # Required to end the episode when truncated 108 | False, # Set to False not to bootstrap 109 | info, 110 | ) 111 | 112 | def reset(self, **kwargs): 113 | """Reset the environment setting the time to zero. 114 | 115 | Args: 116 | **kwargs: Kwargs to apply to env.reset() 117 | 118 | Returns: 119 | The reset environment 120 | """ 121 | self._reset_t() 122 | return super().reset(**kwargs) 123 | -------------------------------------------------------------------------------- /robobase/envs/wrappers/reward_modifiers.py: -------------------------------------------------------------------------------- 1 | """Shape Rewards.""" 2 | import gymnasium as gym 3 | 4 | 5 | class ShapeRewards(gym.Wrapper, gym.utils.RecordConstructorArgs): 6 | """Shape Rewards.""" 7 | 8 | def __init__(self, env: gym.Env, reward_shaping_fn: callable): 9 | """General function to shape the rewards. 10 | 11 | Args: 12 | env: The environment to apply the wrapper 13 | reward_shaping_fn: The reward shaping function. 14 | """ 15 | gym.utils.RecordConstructorArgs.__init__( 16 | self, reward_shaping_fn=reward_shaping_fn 17 | ) 18 | gym.Wrapper.__init__(self, env) 19 | self.is_vector_env = getattr(env, "is_vector_env", False) 20 | self.fn = reward_shaping_fn 21 | 22 | def step(self, action): 23 | """Steps through the environment, incrementing the time step. 24 | 25 | Args: 26 | action: The action to take 27 | 28 | Returns: 29 | The environment's step using the action. 30 | """ 31 | observations, reward, *rest = self.env.step(action) 32 | return observations, self.fn(reward), *rest 33 | 34 | 35 | class ScaleReward(ShapeRewards): 36 | """Scale Rewars.""" 37 | 38 | def __init__(self, env: gym.Env, scale: float): 39 | """Scale the rewards. 40 | 41 | Args: 42 | env: The environment to apply the wrapper 43 | scale: The scale value 44 | """ 45 | super().__init__(env, lambda r: r * scale) 46 | 47 | 48 | class ClipReward(ShapeRewards): 49 | """Clip Rewards.""" 50 | 51 | def __init__(self, env: gym.Env, lower_bound: float, upper_bound: float): 52 | """Clip the rewards. 53 | 54 | Args: 55 | env: The environment to apply the wrapper 56 | lower_bound: The lower bound 57 | upper_bound: The upper bound 58 | """ 59 | super().__init__(env, lambda r: max(min(r, upper_bound), lower_bound)) 60 | -------------------------------------------------------------------------------- /robobase/envs/wrappers/transpose_image_chw.py: -------------------------------------------------------------------------------- 1 | """Wrapper for adding time aware observations to environment observation.""" 2 | import numpy as np 3 | 4 | import gymnasium as gym 5 | from gymnasium import spaces 6 | 7 | 8 | class TransposeImageCHW(gym.ObservationWrapper, gym.utils.RecordConstructorArgs): 9 | """Turn images from HWC to CHW.""" 10 | 11 | def __init__(self, env: gym.Env): 12 | """Init. 13 | 14 | Args: 15 | env: The environment to apply the wrapper 16 | """ 17 | gym.utils.RecordConstructorArgs.__init__(self) 18 | gym.ObservationWrapper.__init__(self, env) 19 | self.is_vector_env = getattr(env, "is_vector_env", False) 20 | self._vision_ndim = 4 if self.is_vector_env else 3 21 | for k, v in self.observation_space.items(): 22 | if len(v.shape) == self._vision_ndim: 23 | self.observation_space[k] = spaces.Box( 24 | 0, 25 | 255, 26 | dtype=np.uint8, 27 | shape=(*v.shape[:-3], 3, v.shape[-3], v.shape[-2]), 28 | ) 29 | 30 | def observation(self, observation, final: bool = False): 31 | """Adds to the observation with the current time step. 32 | 33 | Args: 34 | observation: The observation to add the time step to. 35 | final: If is final obs 36 | 37 | Returns: 38 | The observation with the time step appended to 39 | """ 40 | for k, v in observation.items(): 41 | if len(v.shape) == (self._vision_ndim - int(final)): 42 | observation[k] = v.transpose(*np.arange(0, v.ndim - 3), -1, -3, -2) 43 | return observation 44 | 45 | def step(self, action): 46 | """Steps through the environment, appending the observation to the frame buffer. 47 | 48 | Args: 49 | action: The action to step through the environment with 50 | 51 | Returns: 52 | Stacked observations, reward, terminated, truncated, and information 53 | from the environment 54 | """ 55 | observation, reward, terminated, truncated, info = self.env.step(action) 56 | if "final_observation" in info: 57 | for fidx in np.where(info["_final_observation"])[0]: 58 | info["final_observation"][fidx] = self.observation( 59 | info["final_observation"][fidx], True 60 | ) 61 | return self.observation(observation), reward, terminated, truncated, info 62 | -------------------------------------------------------------------------------- /robobase/intrinsic_reward_module/__init__.py: -------------------------------------------------------------------------------- 1 | from robobase.intrinsic_reward_module.rnd import RND 2 | from robobase.intrinsic_reward_module.icm import ICM 3 | 4 | __all__ = ["RND", "ICM"] 5 | -------------------------------------------------------------------------------- /robobase/intrinsic_reward_module/core.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | import torch 3 | from gymnasium import spaces 4 | 5 | from robobase.method.utils import ( 6 | extract_many_from_spec, 7 | extract_from_spec, 8 | extract_many_from_batch, 9 | ) 10 | 11 | 12 | class IntrinsicRewardModule(ABC): 13 | """Base class of intrinsic reward module.""" 14 | 15 | def __init__( 16 | self, 17 | observation_space: spaces.Dict, 18 | action_space: spaces.Box, 19 | device: torch.device, 20 | beta: float = 0.05, 21 | kappa: float = 0.000025, 22 | ) -> None: 23 | """Init. 24 | 25 | Args: 26 | observation_space: The observation space of environment. 27 | action_space: The action space of environment. 28 | device: Device to run the model. 29 | beta: The initial weighting coefficient of the intrinsic rewards. 30 | kappa: The decay rate. 31 | """ 32 | self.observation_space = observation_space 33 | self.action_space = action_space 34 | self.device = device 35 | self.beta = beta 36 | self.kappa = kappa 37 | 38 | self.rgb_spaces = extract_many_from_spec( 39 | observation_space, r"rgb.*", missing_ok=True 40 | ) 41 | self.low_dim_space = extract_from_spec( 42 | observation_space, "low_dim_state", missing_ok=True 43 | ) 44 | self.use_pixels = len(self.rgb_spaces) > 0 45 | 46 | @abstractmethod 47 | def compute_irs( 48 | self, batch: dict[str, torch.Tensor], step: int = 0 49 | ) -> torch.Tensor: 50 | """Compute the intrinsic rewards for current samples. 51 | 52 | Args: 53 | batch: Batch of data. 54 | step: The global training step. 55 | 56 | Returns: 57 | The intrinsic rewards. 58 | """ 59 | 60 | @abstractmethod 61 | def update( 62 | self, 63 | batch: dict[str, torch.Tensor], 64 | ) -> None: 65 | """Update the intrinsic reward module if necessary. 66 | 67 | Args: 68 | batch: Batch of data. 69 | """ 70 | 71 | def _extract_obs(self, batch: dict[str, torch.Tensor], name_or_regex: str): 72 | if self.use_pixels: 73 | # dict of {"cam_name": (B, T, 3, H, W)} 74 | obs = extract_many_from_batch(batch, name_or_regex) 75 | # Fold views into time axis 76 | obs = torch.cat(list(obs.values()), 1) 77 | # Fold time into channel axis 78 | obs = obs.view(obs.shape[0], -1, *obs.shape[3:]) 79 | else: 80 | # Get last timestep 81 | obs = batch[name_or_regex][:, -1] 82 | return obs 83 | -------------------------------------------------------------------------------- /robobase/intrinsic_reward_module/rnd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.nn import functional as F 4 | 5 | from robobase.intrinsic_reward_module.core import IntrinsicRewardModule 6 | from robobase.intrinsic_reward_module.utils import Encoder 7 | 8 | 9 | class RND(IntrinsicRewardModule): 10 | """Exploration by Random Network Distillation (RND). 11 | 12 | Burda, Yuri, et al. "Exploration by random network distillation." 13 | https://arxiv.org/pdf/1810.12894.pdf 14 | 15 | If pixels are used, then any low-dimensional input will not be passed into RND. 16 | All pixel observations are concatenated on channel axis, and assumed same shape. 17 | """ 18 | 19 | def __init__( 20 | self, latent_dim: int = 128, lr: float = 0.001, *args, **kwargs 21 | ) -> None: 22 | """Init. 23 | 24 | Args: 25 | latent_dim: The dimension of encoding vectors. 26 | lr: The learning rate. 27 | """ 28 | super().__init__(*args, **kwargs) 29 | if self.use_pixels: 30 | obs_shapes = [v.shape for v in self.rgb_spaces.values()] 31 | # Fuse num views and time into channel axis 32 | obs_shape = (len(obs_shapes) * np.prod(obs_shapes[0][:2]),) + obs_shapes[0][ 33 | 2: 34 | ] 35 | else: 36 | obs_shape = self.low_dim_space.shape[-1:] 37 | self.predictor = Encoder( 38 | obs_shape=obs_shape, 39 | latent_dim=latent_dim, 40 | ).to(self.device) 41 | self.target = Encoder( 42 | obs_shape=obs_shape, 43 | latent_dim=latent_dim, 44 | ).to(self.device) 45 | 46 | self.opt = torch.optim.Adam(self.predictor.parameters(), lr=lr) 47 | 48 | # freeze the network parameters 49 | for p in self.target.parameters(): 50 | p.requires_grad = False 51 | 52 | def compute_irs( 53 | self, batch: dict[str, torch.Tensor], step: int = 0 54 | ) -> torch.Tensor: 55 | """See Base.""" 56 | # compute the weighting coefficient of timestep t 57 | beta_t = self.beta * np.power(1.0 - self.kappa, step) 58 | next_obs = self._extract_obs( 59 | batch, r"rgb.*tp1" if self.use_pixels else "low_dim_state_tp1" 60 | ) 61 | 62 | with torch.no_grad(): 63 | src_feats = self.predictor(next_obs) 64 | tgt_feats = self.target(next_obs) 65 | dist = F.mse_loss(src_feats, tgt_feats, reduction="none").mean( 66 | dim=1, keepdim=True 67 | ) 68 | dist = (dist - dist.min()) / (dist.max() - dist.min() + 1e-11) 69 | intrinsic_rewards = dist 70 | 71 | return intrinsic_rewards * beta_t 72 | 73 | def update(self, batch: dict[str, torch.Tensor]) -> None: 74 | """See Base.""" 75 | obs = self._extract_obs( 76 | batch, r"rgb(?!.*?tp1)" if self.use_pixels else "low_dim_state" 77 | ) 78 | src_feats = self.predictor(obs) 79 | with torch.no_grad(): 80 | tgt_feats = self.target(obs) 81 | self.opt.zero_grad() 82 | loss = F.mse_loss(src_feats, tgt_feats) 83 | loss.backward() 84 | self.opt.step() 85 | -------------------------------------------------------------------------------- /robobase/intrinsic_reward_module/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class Encoder(nn.Module): 6 | """Encoder for encoding observations.""" 7 | 8 | def __init__(self, obs_shape: tuple, latent_dim: int) -> None: 9 | """Init. 10 | 11 | Args: 12 | obs_shape: The data shape of observations. 13 | latent_dim: The dimension of encoding vectors. 14 | 15 | Returns: 16 | Encoder instance. 17 | """ 18 | super().__init__() 19 | # visual 20 | if len(obs_shape) == 3: 21 | self.trunk = nn.Sequential( 22 | nn.Conv2d(obs_shape[0], 32, kernel_size=3, stride=2, padding=1), 23 | nn.ELU(), 24 | nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1), 25 | nn.ELU(), 26 | nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1), 27 | nn.ELU(), 28 | nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1), 29 | nn.ELU(), 30 | nn.Flatten(), 31 | ) 32 | with torch.no_grad(): 33 | sample = torch.ones(size=tuple(obs_shape)) 34 | n_flatten = self.trunk(sample.unsqueeze(0)).shape[1] 35 | 36 | self.linear = nn.Linear(n_flatten, latent_dim) 37 | else: 38 | self.trunk = nn.Sequential(nn.Linear(obs_shape[0], 256), nn.ReLU()) 39 | self.linear = nn.Linear(256, latent_dim) 40 | 41 | def forward(self, obs: torch.Tensor) -> torch.Tensor: 42 | """Encode the input tensors. 43 | 44 | Args: 45 | obs: Observations. 46 | 47 | Returns: 48 | Encoding tensors. 49 | """ 50 | if len(obs.shape) == 4: 51 | # RGB image 52 | obs = obs.float() / 255.0 - 0.5 53 | return self.linear(self.trunk(obs)) 54 | -------------------------------------------------------------------------------- /robobase/method/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robobase-org/robobase/411b7c7a40272dcaa7eed0ea2459af07002d53e5/robobase/method/__init__.py -------------------------------------------------------------------------------- /robobase/method/alix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from robobase.method.utils import extract_many_from_spec 3 | 4 | from robobase.models.lix_utils import analysis_optimizers 5 | from robobase.method.drqv2 import DrQV2 6 | from robobase.models.lix_utils.analysis_modules import LIXModule 7 | 8 | 9 | class ALIX(DrQV2): 10 | """Implementation of Adaptive Local SIgnal MiXing (A-LIX). 11 | 12 | Cetin et al. Stabilizing Off-Policy Deep Reinforcement Learning from Pixels 13 | """ 14 | 15 | def __init__(self, *args, **kwargs): 16 | kwargs["use_augmentation"] = False 17 | super().__init__(*args, **kwargs) 18 | 19 | def build_encoder(self): 20 | rgb_spaces = extract_many_from_spec( 21 | self.observation_space, r"rgb.*", missing_ok=True 22 | ) 23 | if len(rgb_spaces) > 0: 24 | rgb_shapes = [s.shape for s in rgb_spaces.values()] 25 | assert np.all( 26 | [sh == rgb_shapes[0] for sh in rgb_shapes] 27 | ), "Expected all RGB obs to be same shape." 28 | 29 | num_views = len(rgb_shapes) 30 | if self.frame_stack_on_channel: 31 | obs_shape = (np.prod(rgb_shapes[0][:2]), *rgb_shapes[0][2:]) 32 | else: 33 | # T is folded into batch 34 | obs_shape = rgb_shapes[0][1:] 35 | self.encoder = self.encoder_model(input_shape=(num_views, *obs_shape)) 36 | if not isinstance(self.encoder, LIXModule): 37 | raise ValueError("Encoder must be of type LIXModule.") 38 | self.encoder.to(self.device) 39 | self.encoder_opt = ( 40 | analysis_optimizers.custom_parameterized_aug_optimizer_builder( 41 | encoder_lr=self.encoder_lr, lr=2e-3, betas=[0.5, 0.999] 42 | )(self.encoder) 43 | ) 44 | -------------------------------------------------------------------------------- /robobase/method/core.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Iterator, TypeAlias, Optional 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | from gymnasium import spaces 8 | 9 | from robobase.intrinsic_reward_module.core import IntrinsicRewardModule 10 | from robobase.replay_buffer.replay_buffer import ReplayBuffer 11 | 12 | 13 | BatchedActionSequence: TypeAlias = np.ndarray[ 14 | tuple[int, int, int], np.dtype[np.float32] 15 | ] 16 | Metrics: TypeAlias = dict[str, np.ndarray] 17 | 18 | 19 | class Method(nn.Module, ABC): 20 | def __init__( 21 | self, 22 | observation_space: spaces.Dict, 23 | action_space: spaces.Box, 24 | device: torch.device, 25 | num_train_envs: int, 26 | replay_alpha: float, 27 | replay_beta: float, 28 | frame_stack_on_channel: bool, 29 | intrinsic_reward_module: Optional[IntrinsicRewardModule] = None, 30 | is_rl: bool = False, 31 | ): 32 | super().__init__() 33 | self.observation_space = observation_space 34 | self.action_space = action_space 35 | self.device = device 36 | self.num_train_envs = num_train_envs 37 | self.replay_alpha = replay_alpha 38 | self.replay_beta = replay_beta 39 | self.frame_stack_on_channel = frame_stack_on_channel 40 | self.num_eval_envs = 1 # Fixed to 1 for now 41 | self.intrinsic_reward_module = intrinsic_reward_module 42 | self._eval_env_running = False 43 | self.logging = False 44 | self.is_rl = is_rl 45 | 46 | @property 47 | def random_explore_action(self) -> torch.Tensor: 48 | # All actions live in -1 to 1, regardless of environment. 49 | min_action = -1 50 | max_action = 1 51 | return (min_action - max_action) * torch.rand( 52 | size=(self.num_train_envs,) + self.action_space.shape 53 | ) + max_action 54 | 55 | @abstractmethod 56 | def act( 57 | self, observations: dict[str, torch.Tensor], step: int, eval_mode: bool 58 | ) -> BatchedActionSequence: 59 | pass 60 | 61 | @abstractmethod 62 | def update( 63 | self, 64 | replay_iter: Iterator[dict[str, torch.Tensor]], 65 | step: int, 66 | replay_buffer: ReplayBuffer = None, 67 | ) -> Metrics: 68 | pass 69 | 70 | @abstractmethod 71 | def reset(self, step: int, agents_to_reset: list[int]): 72 | pass 73 | 74 | @property 75 | def eval_env_running(self): 76 | return self._eval_env_running 77 | 78 | def set_eval_env_running(self, value: bool): 79 | self._eval_env_running = value 80 | 81 | 82 | class ImitationLearningMethod(Method, ABC): 83 | pass 84 | 85 | 86 | class OffPolicyMethod(Method, ABC): 87 | pass 88 | 89 | 90 | class OnPolicyMethod(Method, ABC): 91 | """TODO: Leave open for future development.""" 92 | 93 | pass 94 | 95 | 96 | class ModelBasedMethod(Method, ABC): 97 | def __init__(self, *args, **kwargs): 98 | super().__init__(*args, **kwargs) 99 | -------------------------------------------------------------------------------- /robobase/models/__init__.py: -------------------------------------------------------------------------------- 1 | from robobase.models.encoder import * # noqa: F403 2 | from robobase.models.decoder import * # noqa: F403 3 | from robobase.models.fusion import * # noqa: F403 4 | from robobase.models.fully_connected import * # noqa: F403 5 | -------------------------------------------------------------------------------- /robobase/models/act/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robobase-org/robobase/411b7c7a40272dcaa7eed0ea2459af07002d53e5/robobase/models/act/__init__.py -------------------------------------------------------------------------------- /robobase/models/act/position_encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Various positional encodings for the transformer. 4 | """ 5 | import math 6 | import torch 7 | from torch import nn 8 | 9 | from robobase.models.act.utils.misc import NestedTensor 10 | 11 | 12 | class PositionEmbeddingSine(nn.Module): 13 | """ 14 | This is a more standard version of the position embedding, very similar to the one 15 | used by the Attention is all you need paper, generalized to work on images. 16 | """ 17 | 18 | def __init__( 19 | self, num_pos_feats=64, temperature=10000, normalize=False, scale=None 20 | ): 21 | super().__init__() 22 | self.num_pos_feats = num_pos_feats 23 | self.temperature = temperature 24 | self.normalize = normalize 25 | if scale is not None and normalize is False: 26 | raise ValueError("normalize should be True if scale is passed") 27 | if scale is None: 28 | scale = 2 * math.pi 29 | self.scale = scale 30 | 31 | def forward(self, tensor): 32 | x = tensor 33 | # mask = tensor_list.mask 34 | # assert mask is not None 35 | # not_mask = ~mask 36 | 37 | not_mask = torch.ones_like(x[0, [0]]) 38 | y_embed = not_mask.cumsum(1, dtype=torch.float32) 39 | x_embed = not_mask.cumsum(2, dtype=torch.float32) 40 | if self.normalize: 41 | eps = 1e-6 42 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale 43 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale 44 | 45 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) 46 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) 47 | 48 | pos_x = x_embed[:, :, :, None] / dim_t 49 | pos_y = y_embed[:, :, :, None] / dim_t 50 | pos_x = torch.stack( 51 | (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4 52 | ).flatten(3) 53 | pos_y = torch.stack( 54 | (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4 55 | ).flatten(3) 56 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) 57 | return pos 58 | 59 | 60 | class PositionEmbeddingLearned(nn.Module): 61 | """ 62 | Absolute pos embedding, learned. 63 | """ 64 | 65 | def __init__(self, num_pos_feats=256): 66 | super().__init__() 67 | self.row_embed = nn.Embedding(50, num_pos_feats) 68 | self.col_embed = nn.Embedding(50, num_pos_feats) 69 | self.reset_parameters() 70 | 71 | def reset_parameters(self): 72 | nn.init.uniform_(self.row_embed.weight) 73 | nn.init.uniform_(self.col_embed.weight) 74 | 75 | def forward(self, tensor_list: NestedTensor): 76 | x = tensor_list.tensors 77 | h, w = x.shape[-2:] 78 | i = torch.arange(w, device=x.device) 79 | j = torch.arange(h, device=x.device) 80 | x_emb = self.col_embed(i) 81 | y_emb = self.row_embed(j) 82 | pos = ( 83 | torch.cat( 84 | [ 85 | x_emb.unsqueeze(0).repeat(h, 1, 1), 86 | y_emb.unsqueeze(1).repeat(1, w, 1), 87 | ], 88 | dim=-1, 89 | ) 90 | .permute(2, 0, 1) 91 | .unsqueeze(0) 92 | .repeat(x.shape[0], 1, 1, 1) 93 | ) 94 | return pos 95 | 96 | 97 | def build_position_encoding(hidden_dim, pos_emb): 98 | N_steps = hidden_dim // 2 99 | if pos_emb in ("v2", "sine"): 100 | # TODO find a better way of exposing other arguments 101 | position_embedding = PositionEmbeddingSine(N_steps, normalize=True) 102 | elif pos_emb in ("v3", "learned"): 103 | position_embedding = PositionEmbeddingLearned(N_steps) 104 | else: 105 | raise ValueError(f"not supported {pos_emb}") 106 | 107 | return position_embedding 108 | -------------------------------------------------------------------------------- /robobase/models/act/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robobase-org/robobase/411b7c7a40272dcaa7eed0ea2459af07002d53e5/robobase/models/act/utils/__init__.py -------------------------------------------------------------------------------- /robobase/models/core.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Optional 3 | 4 | import torch 5 | from torch import nn 6 | 7 | from robobase.models.utils import ImgChLayerNorm, layernorm_for_cnn, identity_cls 8 | 9 | 10 | class RoboBaseModule(nn.Module, ABC): 11 | @property 12 | @abstractmethod 13 | def output_shape(self): 14 | raise NotImplementedError() 15 | 16 | def calculate_loss(self, *args, **kwargs) -> Optional[torch.Tensor]: 17 | return None 18 | 19 | 20 | def get_activation_fn_from_str(act: str) -> type[nn.Module]: 21 | if act == "relu": 22 | return nn.ReLU 23 | elif act == "lrelu": 24 | return nn.LeakyReLU 25 | elif act == "elu": 26 | return nn.ELU 27 | elif act == "tanh": 28 | return nn.Tanh 29 | elif act == "prelu": 30 | return nn.PReLU 31 | elif act == "silu": 32 | return nn.SiLU 33 | elif act == "gelu": 34 | return nn.GELU 35 | elif act == "glu": 36 | return nn.GLU 37 | else: 38 | raise ValueError("%s not recognized." % act) 39 | 40 | 41 | def get_normalization_fn_from_str(norm: str) -> type[nn.Module]: 42 | if norm == "layer": 43 | return nn.LayerNorm 44 | elif norm == "layer_for_cnn": 45 | return layernorm_for_cnn 46 | elif norm == "img_ch_layer": 47 | return ImgChLayerNorm 48 | elif norm == "group": 49 | return nn.GroupNorm 50 | elif norm == "batch1d": 51 | return nn.BatchNorm1d 52 | elif norm == "batch2d": 53 | return nn.BatchNorm2d 54 | elif norm == "identity": 55 | return identity_cls 56 | else: 57 | raise ValueError("%s not recognized." % norm) 58 | -------------------------------------------------------------------------------- /robobase/models/decoder.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | import math 3 | from typing import Tuple 4 | 5 | import numpy as np 6 | import torch 7 | from torch import nn as nn 8 | 9 | from robobase import utils 10 | from robobase.models.core import ( 11 | RoboBaseModule, 12 | get_activation_fn_from_str, 13 | get_normalization_fn_from_str, 14 | ) 15 | 16 | 17 | class DecoderModule(RoboBaseModule, ABC): 18 | def __init__(self, input_shape: tuple[int]): 19 | super().__init__() 20 | self.input_shape = input_shape 21 | assert len(input_shape) == 1, f"Expected input shape (C), but got {input_shape}" 22 | 23 | 24 | class DecoderCNNMultiView(DecoderModule): 25 | def __init__( 26 | self, 27 | input_shape: Tuple[int], 28 | output_shape: Tuple[int, int, int, int], 29 | min_res: 4, 30 | channels: int = 32, 31 | kernel_size: int = 4, 32 | channels_multiplier: int = 1, 33 | activation: str = "relu", 34 | norm: str = "identity", 35 | ): 36 | super().__init__(input_shape) 37 | num_cameras = output_shape[0] 38 | num_layers = int(np.log2(output_shape[2]) - np.log2(min_res)) 39 | assert output_shape[2] == output_shape[3], "Only support square images" 40 | assert ( 41 | output_shape[2] > 0 and (output_shape[2] & (output_shape[2] - 1)) == 0 42 | ), f"{output_shape[2]} is not a power of 2" 43 | linear_output_channels = ( 44 | (min_res**2) * channels * (channels_multiplier ** (num_layers - 1)) 45 | ) 46 | self.activation_fn = get_activation_fn_from_str(activation) 47 | self.norm_fn = get_normalization_fn_from_str(norm) 48 | self.linears_per_cam = nn.ModuleList() 49 | self.convs_per_cam = nn.ModuleList() 50 | 51 | for i in range(num_cameras): 52 | self.linears_per_cam.append( 53 | nn.Linear(input_shape[0], linear_output_channels) 54 | ) 55 | net = [] 56 | input_channels = (linear_output_channels) // (min_res**2) 57 | for i in range(num_layers): 58 | is_last_layer = i == num_layers - 1 59 | if is_last_layer: 60 | output_channels = output_shape[1] 61 | else: 62 | output_channels = input_channels // channels_multiplier 63 | padding, output_padding = self.calculate_same_pad(kernel_size, 2, 1) 64 | net.append( 65 | nn.ConvTranspose2d( 66 | input_channels, 67 | output_channels, 68 | kernel_size, 69 | stride=2, 70 | padding=padding, 71 | output_padding=output_padding, 72 | bias=False if not is_last_layer else True, 73 | ) 74 | ) 75 | if not is_last_layer: 76 | net.append(self.norm_fn(output_channels)) 77 | net.append(self.activation_fn()) 78 | input_channels = output_channels 79 | self.convs_per_cam.append(nn.Sequential(*net)) 80 | self._min_res = min_res 81 | self._output_shape = output_shape 82 | self.apply(utils.weight_init) 83 | 84 | @property 85 | def output_shape(self): 86 | return self._output_shape 87 | 88 | def calculate_same_pad(self, kernel, stride, dilation): 89 | val = dilation * (kernel - 1) - stride + 1 90 | pad = math.ceil(val / 2) 91 | outpad = pad * 2 - val 92 | return pad, outpad 93 | 94 | def initialize_output_layer(self, initialize_fn): 95 | """Initialize output layer with specified initialize function 96 | 97 | Could be useful when a user wants to specify the initialization scheme for 98 | the output layer (e.g., zero initialization) 99 | """ 100 | for conv_per_cam in self.convs_per_cam: 101 | conv_per_cam[-1].apply(initialize_fn) 102 | 103 | def forward(self, features): 104 | assert ( 105 | self.input_shape == features.shape[1:] 106 | ), f"expected input shape {self.input_shape} but got {features.shape[1:]}" 107 | 108 | num_cameras = self.output_shape[0] 109 | outs = [] 110 | for i in range(num_cameras): 111 | x = self.linears_per_cam[i](features) 112 | x = x.reshape( 113 | [-1, x.shape[-1] // (self._min_res**2), self._min_res, self._min_res] 114 | ) 115 | x = self.convs_per_cam[i](x) 116 | outs.append(x) 117 | fused = torch.stack(outs, 1) 118 | assert ( 119 | fused.shape[1:] == self.output_shape 120 | ), f"Expected output {self.output_shape}, but got {fused.shape[1:]}" 121 | return fused 122 | -------------------------------------------------------------------------------- /robobase/models/lix_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robobase-org/robobase/411b7c7a40272dcaa7eed0ea2459af07002d53e5/robobase/models/lix_utils/__init__.py -------------------------------------------------------------------------------- /robobase/models/lix_utils/analysis_custom_autograd_functions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch as th 3 | 4 | import torch.nn.functional as F 5 | 6 | from robobase.models.lix_utils.analysis_utils import ( 7 | get_local_patches_kernel, 8 | extract_local_patches, 9 | ) 10 | 11 | 12 | class LearnS(torch.autograd.Function): 13 | """Uses neighborhood around each feature gradient position to calculate the 14 | spatial divergence of the gradients, and uses it to update the param S,""" 15 | 16 | @staticmethod 17 | def forward(ctx, input, param, N, target_capped_ratio, eps): 18 | """ 19 | input : Tensor 20 | representation to be processed (used for the gradient analysis). 21 | param : Tensor 22 | ALIX parameter S to be optimized. 23 | N : int 24 | filter size used to approximate the spatial divergence as a 25 | convolution (to calculate the ND scores), should be odd, >= 3 26 | target_capped_ratio : float 27 | target ND scores used to adaptively tune S 28 | eps : float 29 | small stabilization constant for the ND scores 30 | """ 31 | ctx.save_for_backward(param) 32 | ctx.N = N 33 | ctx.target_capped_ratio = target_capped_ratio 34 | ctx.eps = eps 35 | return input 36 | 37 | @staticmethod 38 | def backward(ctx, dy): 39 | N = ctx.N 40 | target_capped_ratio = ctx.target_capped_ratio 41 | eps = ctx.eps 42 | dy_mean_B = dy.mean(0, keepdim=True) 43 | ave_dy_abs = th.abs(dy_mean_B) 44 | pad_Hl = (N - 1) // 2 45 | pad_Hr = (N - 1) - pad_Hl 46 | pad_Wl = (N - 1) // 2 47 | pad_Wr = (N - 1) - pad_Wl 48 | pad = (pad_Wl, pad_Wr, pad_Hl, pad_Hr) 49 | padded_ave_dy = F.pad(dy_mean_B, pad, mode="replicate") 50 | loc_patches_k = get_local_patches_kernel(kernel_size=N, device=dy.device) 51 | 52 | local_patches_dy = extract_local_patches( 53 | input=padded_ave_dy, kernel=loc_patches_k, stride=1, padding=0 54 | ) 55 | ave_dy_sq = ave_dy_abs.pow(2) 56 | patch_normalizer = (N * N) - 1 57 | 58 | unbiased_sq_signal = ( 59 | local_patches_dy.pow(2).sum(dim=2) - ave_dy_sq 60 | ) / patch_normalizer # expected squared signal 61 | unbiased_sq_noise_signal = (local_patches_dy - dy_mean_B.unsqueeze(2)).pow( 62 | 2 63 | ).sum( 64 | 2 65 | ) / patch_normalizer # 1 x C x x H x W expected squared noise 66 | 67 | unbiased_sqn2sig = (unbiased_sq_noise_signal) / (unbiased_sq_signal + eps) 68 | 69 | unbiased_sqn2sig_lp1 = th.log(1 + unbiased_sqn2sig).mean() 70 | param_grad = target_capped_ratio - unbiased_sqn2sig_lp1 71 | 72 | return dy, param_grad, None, None, None 73 | -------------------------------------------------------------------------------- /robobase/models/lix_utils/analysis_layers.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from robobase.models.lix_utils.analysis_custom_autograd_functions import LearnS 8 | 9 | 10 | class LocalSignalMixing(nn.Module): 11 | def __init__( 12 | self, 13 | pad, 14 | fixed_batch=False, 15 | ): 16 | """LIX regularization layer 17 | 18 | pad : float 19 | maximum regularization shift (maximum S) 20 | fixed batch : bool 21 | compute independent regularization for each sample (slower) 22 | """ 23 | super().__init__() 24 | # +1 to avoid that the sampled values at the borders get smoothed with 0 25 | self.pad = int(math.ceil(pad)) + 1 26 | self.base_normalization_ratio = (2 * pad + 1) / (2 * self.pad + 1) 27 | self.fixed_batch = fixed_batch 28 | 29 | def get_random_shift(self, n, c, h, w, x): 30 | if self.fixed_batch: 31 | return torch.rand(size=(1, 1, 1, 2), device=x.device, dtype=x.dtype) 32 | else: 33 | return torch.rand(size=(n, 1, 1, 2), device=x.device, dtype=x.dtype) 34 | 35 | def forward(self, x, max_normalized_shift=1.0): 36 | """ 37 | x : Tensor 38 | input features 39 | max_normalized_shift : float 40 | current regularization shift in relative terms (current S) 41 | """ 42 | if self.training: 43 | max_normalized_shift = max_normalized_shift * self.base_normalization_ratio 44 | n, c, h, w = x.size() 45 | assert h == w 46 | padding = tuple([self.pad] * 4) 47 | x = F.pad(x, padding, "replicate") 48 | arange = torch.arange(h, device=x.device, dtype=x.dtype) # from 0 to eps*h 49 | arange = arange.unsqueeze(0).repeat(h, 1).unsqueeze(2) 50 | base_grid = torch.cat([arange, arange.transpose(1, 0)], dim=2) 51 | base_grid = base_grid.unsqueeze(0).repeat(n, 1, 1, 1) # 2d grid 52 | shift = self.get_random_shift(n, c, h, w, x) 53 | shift_offset = (1 - max_normalized_shift) / 2 54 | shift = (shift * max_normalized_shift) + shift_offset 55 | shift *= ( 56 | 2 * self.pad + 1 57 | ) # can start up to idx 2*pad + 1 - ignoring the left pad 58 | grid = base_grid + shift 59 | # normalize in [-1, 1] 60 | grid = grid * 2.0 / (h + 2 * self.pad) - 1 61 | return F.grid_sample(x, grid, padding_mode="zeros", align_corners=False) 62 | else: 63 | return x 64 | 65 | def get_grid(self, x, max_normalized_shift=1.0): 66 | max_normalized_shift = max_normalized_shift * self.base_normalization_ratio 67 | n, c, h, w = x.size() 68 | assert h == w 69 | padding = tuple([self.pad] * 4) 70 | x = F.pad(x, padding, "replicate") 71 | arange = torch.arange(h, device=x.device, dtype=x.dtype) # from 0 to eps*h 72 | arange = arange.unsqueeze(0).repeat(h, 1).unsqueeze(2) 73 | base_grid = torch.cat([arange, arange.transpose(1, 0)], dim=2) 74 | base_grid = base_grid.unsqueeze(0).repeat(n, 1, 1, 1) # 2d grid 75 | shift = self.get_random_shift(n, c, h, w, x) 76 | shift_offset = (1 - max_normalized_shift) / 2 77 | shift = (shift * max_normalized_shift) + shift_offset 78 | shift *= 2 * self.pad + 1 79 | grid = base_grid + shift 80 | # normalize in [-1, 1] 81 | grid = grid * 2.0 / (h + 2 * self.pad) - 1 82 | return grid 83 | 84 | 85 | class ParameterizedReg(nn.Module): 86 | """Augmentation/Regularization wrapper where the strength parameterized 87 | and is tuned with a custom autograd function 88 | 89 | aug : nn.Module 90 | augmentation/Regularization layer 91 | parameter_init : float 92 | initial strength value 93 | param_grad_fn : str 94 | custom autograd function to tune the parameter 95 | param_grad_fn_args : list 96 | arguments for the custom autograd function 97 | """ 98 | 99 | def __init__(self, aug, parameter_init, param_grad_fn, param_grad_fn_args): 100 | super().__init__() 101 | self.aug = aug 102 | self.P = nn.Parameter(data=torch.tensor(parameter_init)) 103 | self.param_grad_fn_name = param_grad_fn 104 | if param_grad_fn == "alix_param_grad": 105 | self.param_grad_fn = LearnS.apply 106 | else: 107 | raise NotImplementedError 108 | self.param_grad_fn_args = param_grad_fn_args 109 | 110 | def forward(self, x): 111 | with torch.no_grad(): 112 | self.P.copy_(torch.clamp(self.P, min=0, max=1)) 113 | out = self.aug(x, self.P.detach()) 114 | out = self.param_grad_fn(out, self.P, *self.param_grad_fn_args) 115 | return out 116 | 117 | def forward_no_learn(self, x): 118 | with torch.no_grad(): 119 | self.P.copy_(torch.clamp(self.P, min=0, max=1)) 120 | out = self.aug(x, self.P.detach()) 121 | return out 122 | 123 | def forward_no_aug(self, x): 124 | with torch.no_grad(): 125 | self.P.copy_(torch.clamp(self.P, min=0, max=1)) 126 | out = x 127 | out = self.param_grad_fn(out, self.P, *self.param_grad_fn_args) 128 | return out 129 | 130 | 131 | class NonLearnableParameterizedRegWrapper(nn.Module): 132 | def __init__(self, aug): 133 | super().__init__() 134 | self.aug = aug 135 | assert isinstance(aug, ParameterizedReg) 136 | 137 | def forward(self, x): 138 | return self.aug.forward_no_learn(x) 139 | -------------------------------------------------------------------------------- /robobase/models/lix_utils/analysis_modules.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import Tuple 3 | 4 | import numpy as np 5 | import torch.nn as nn 6 | 7 | from robobase.models.lix_utils.analysis_layers import ( 8 | NonLearnableParameterizedRegWrapper, 9 | ) 10 | from robobase.models.lix_utils import analysis_layers 11 | from robobase.models import EncoderCNNMultiViewDownsampleWithStrides 12 | from robobase.utils import weight_init 13 | 14 | 15 | class LIXModule(ABC): 16 | pass 17 | 18 | 19 | class EncoderAllFeatTiedRegularizedCNNMultiViewDownsampleWithStrides( 20 | EncoderCNNMultiViewDownsampleWithStrides, LIXModule 21 | ): 22 | """Encoder with the same regularization applied after every layer, and with the 23 | regularization parameter tuned only with the final layer's feature gradients.""" 24 | 25 | def __init__( 26 | self, 27 | input_shape: Tuple[int, int, int, int], 28 | num_downsample_convs: int, 29 | num_post_downsample_convs: int = 3, 30 | channels: int = 32, 31 | kernel_size: int = 3, 32 | ): 33 | super().__init__( 34 | input_shape, 35 | num_downsample_convs, 36 | num_post_downsample_convs, 37 | channels, 38 | kernel_size, 39 | ) 40 | assert ( 41 | len(input_shape) == 4 42 | ), f"Expected shape (V, C, H, W), but got {input_shape}" 43 | self._input_shape = input_shape 44 | num_cameras = input_shape[0] 45 | self.convs_per_cam = nn.ModuleList() 46 | final_channels = 0 47 | self.aug = analysis_layers.ParameterizedReg( 48 | analysis_layers.LocalSignalMixing(2, True), 49 | 0.5, 50 | param_grad_fn="alix_param_grad", 51 | param_grad_fn_args=[3, 0.535, 1e-20], 52 | ) 53 | for _ in range(num_cameras): 54 | resolution = np.array(input_shape[2:]) 55 | net = [] 56 | ch = input_shape[1] 57 | for _ in range(num_downsample_convs): 58 | net.append( 59 | nn.Conv2d( 60 | ch, 61 | channels, 62 | kernel_size=kernel_size, 63 | stride=2, 64 | ) 65 | ) 66 | net.append(nn.ReLU()) 67 | net.append(NonLearnableParameterizedRegWrapper(self.aug)) 68 | ch = channels 69 | resolution = np.ceil(resolution / 2) - (kernel_size // 2) 70 | for _ in range(num_post_downsample_convs): 71 | net.append( 72 | nn.Conv2d( 73 | ch, 74 | channels, 75 | kernel_size=kernel_size, 76 | stride=1, 77 | ) 78 | ) 79 | net.append(nn.ReLU()) 80 | net.append(NonLearnableParameterizedRegWrapper(self.aug)) 81 | ch = channels 82 | resolution -= (kernel_size // 2) * 2 83 | net.append(self.aug) 84 | self.convs_per_cam.append(nn.Sequential(*net)) 85 | final_channels = int(channels * resolution.prod()) 86 | self._output_shape = (num_cameras, final_channels) 87 | self.apply(weight_init) 88 | -------------------------------------------------------------------------------- /robobase/models/lix_utils/analysis_optimizers.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | 3 | from robobase.models.lix_utils.analysis_layers import ParameterizedReg 4 | 5 | 6 | def custom_parameterized_aug_optimizer_builder(encoder_lr, **kwargs): 7 | """Apply different optimizer parameters for S""" 8 | 9 | def make_optimizer( 10 | encoder, 11 | ): 12 | assert isinstance(encoder.aug, ParameterizedReg) 13 | encoder_params = list(encoder.parameters()) 14 | encoder_aug_parameters = list(encoder.aug.parameters()) 15 | encoder_non_aug_parameters = [ 16 | p 17 | for p in encoder_params 18 | if all([p is not aug_p for aug_p in encoder_aug_parameters]) 19 | ] 20 | return th.optim.Adam( 21 | [ 22 | {"params": encoder_non_aug_parameters}, 23 | {"params": encoder_aug_parameters, **kwargs}, 24 | ], 25 | lr=encoder_lr, 26 | ) 27 | 28 | return make_optimizer 29 | -------------------------------------------------------------------------------- /robobase/models/lix_utils/analysis_utils.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn.functional as F 3 | 4 | 5 | def get_local_patches_kernel(kernel_size, device): 6 | patch_dim = kernel_size**2 7 | k = th.eye(patch_dim, device=device).view(patch_dim, 1, kernel_size, kernel_size) 8 | return k 9 | 10 | 11 | def extract_local_patches(input, kernel, N=None, padding=0, stride=1): 12 | b, c, _, _ = input.size() 13 | if kernel is None: 14 | kernel = get_local_patches_kernel(kernel_size=N, device=input.device) 15 | flinput = input.flatten(0, 1).unsqueeze(1) 16 | patches = F.conv2d(flinput, kernel, padding=padding, stride=stride) 17 | _, _, h, w = patches.size() 18 | return patches.view(b, c, -1, h, w) 19 | -------------------------------------------------------------------------------- /robobase/replay_buffer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robobase-org/robobase/411b7c7a40272dcaa7eed0ea2459af07002d53e5/robobase/replay_buffer/__init__.py -------------------------------------------------------------------------------- /robobase/replay_buffer/prioritized_replay_buffer.py: -------------------------------------------------------------------------------- 1 | """An implementation of Prioritized Experience Replay (PER). 2 | 3 | This implementation is based on the paper "Prioritized Experience Replay" 4 | by Tom Schaul et al. (2015). 5 | """ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | from __future__ import annotations 11 | 12 | from multiprocessing import Array, Value 13 | from typing_extensions import override 14 | 15 | import numpy as np 16 | 17 | from robobase.replay_buffer.replay_buffer import ReplayElement 18 | from robobase.replay_buffer.sum_tree import SumTree 19 | from robobase.replay_buffer.uniform_replay_buffer import UniformReplayBuffer 20 | 21 | PRIORITY = "priority" 22 | SAMPLING_PROBABILITIES = "sampling_probabilities" 23 | 24 | 25 | class PrioritizedReplayBuffer(UniformReplayBuffer): 26 | """An out-of-graph Replay Buffer for Prioritized Experience Replay. 27 | 28 | See uniform_replay_buffer.py for details. 29 | """ 30 | 31 | def __init__(self, *args, **kwargs): 32 | """Initializes OutOfGraphPrioritizedReplayBuffer.""" 33 | super(PrioritizedReplayBuffer, self).__init__(*args, **kwargs) 34 | self._sum_tree = SumTree(self._replay_capacity) 35 | self._num_to_sample = self.batch_size * (self._num_workers + 1) 36 | self._last_sampled_idx = Array("i", self._num_to_sample) 37 | self._times_samples = Value("i", 0) 38 | 39 | def get_storage_signature( 40 | self, 41 | ) -> tuple[dict[str, ReplayElement], dict[str, ReplayElement]]: 42 | """Returns a default list of elements to be stored in this replay memory. 43 | 44 | Note - Derived classes may return a different signature. 45 | 46 | Returns: 47 | dict of ReplayElements defining the type of the contents stored. 48 | """ 49 | storage_elements, obs_elements = super( 50 | PrioritizedReplayBuffer, self 51 | ).get_storage_signature() 52 | storage_elements[PRIORITY] = ReplayElement(PRIORITY, (), np.float32) 53 | return storage_elements, obs_elements 54 | 55 | @override 56 | def add( 57 | self, 58 | observation: dict, 59 | action: np.ndarray, 60 | reward: float, 61 | terminal: bool, 62 | truncated: bool, 63 | priority: float = None, 64 | **kwargs, 65 | ): 66 | kwargs[PRIORITY] = priority 67 | if priority is None: 68 | priority = self._sum_tree.max_recorded_priority 69 | kwargs[PRIORITY] = priority 70 | self._sum_tree.set(self.add_count, priority) 71 | super(PrioritizedReplayBuffer, self).add( 72 | observation, action, reward, terminal, truncated, **kwargs 73 | ) 74 | 75 | def get_priority(self, indices): 76 | """Fetches the priorities correspond to a batch of memory indices. 77 | 78 | For any memory location not yet used, the corresponding priority is 0. 79 | 80 | Args: 81 | indices: np.array with dtype int32, of indices in range 82 | [0, replay_capacity). 83 | 84 | Returns: 85 | priorities: float, the corresponding priorities. 86 | """ 87 | assert isinstance(indices, np.ndarray), "Indices must be an array." 88 | assert indices.shape, "Indices must be an array." 89 | assert indices.dtype == np.int32, "Indices must be int32s, " "given: {}".format( 90 | indices.dtype 91 | ) 92 | batch_size = len(indices) 93 | priority_batch = np.empty((batch_size), dtype=np.float32) 94 | for i, memory_index in enumerate(indices): 95 | priority_batch[i] = self._sum_tree.get(memory_index) 96 | return priority_batch 97 | 98 | def sample_single(self, index=None): 99 | replay_sample = super().sample_single(index) 100 | if replay_sample is not None: 101 | replay_sample[SAMPLING_PROBABILITIES] = self._sum_tree.get(index) 102 | return replay_sample 103 | 104 | @override 105 | def sample(self, batch_size=None, indices=None): 106 | batch_size = self._batch_size if batch_size is None else batch_size 107 | if indices is not None and len(indices) != batch_size: 108 | raise ValueError( 109 | f"indices was of size {len(indices)}, but batch size was {batch_size}" 110 | ) 111 | if indices is None: 112 | indices = self._sum_tree.stratified_sample(batch_size) 113 | samples = [self.sample_single(indices[i]) for i in range(batch_size)] 114 | batch = {} 115 | for k in samples[0].keys(): 116 | batch[k] = np.stack([sample[k] for sample in samples]) 117 | return batch 118 | 119 | def set_priority(self, indices, priorities): 120 | """Sets the priority of the given elements according to Schaul et al. 121 | 122 | Args: 123 | indices: np.array with dtype int32, of indices in range 124 | [0, replay_capacity). 125 | priorities: float, the corresponding priorities. 126 | """ 127 | assert ( 128 | indices.dtype == np.int32 129 | ), "Indices must be integers, " "given: {}".format(indices.dtype) 130 | for index, priority in zip(indices, priorities): 131 | self._sum_tree.set(index, priority) 132 | 133 | def __iter__(self): 134 | while True: 135 | # Because not globally sampling, we can get repeat samples in a batch. 136 | # By sampling across the 10 highest priorities, this reduces that chance. 137 | sample = self.sample_single( 138 | self._sum_tree.stratified_sample(10)[np.random.randint(10)] 139 | ) 140 | if sample is None: 141 | continue 142 | yield sample 143 | -------------------------------------------------------------------------------- /robobase/replay_buffer/replay_buffer.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import numpy as np 4 | from torch.utils.data import IterableDataset 5 | 6 | 7 | class ReplayElement(object): 8 | def __init__(self, name, shape, type, is_observation=False): 9 | self.name = name 10 | self.shape = shape 11 | self.type = type 12 | self.is_observation = is_observation 13 | 14 | 15 | class ReplayBuffer(IterableDataset): 16 | def replay_capacity(self): 17 | pass 18 | 19 | def batch_size(self): 20 | pass 21 | 22 | def get_storage_signature(self) -> tuple[list[ReplayElement], list[ReplayElement]]: 23 | pass 24 | 25 | def add( 26 | self, 27 | observation: dict, 28 | action: np.ndarray, 29 | reward: float, 30 | terminal: bool, 31 | truncated: bool, 32 | **kwargs, 33 | ): 34 | """Adds a transition to the replay memory. 35 | 36 | Since the next_observation in the transition will be the observation added 37 | next, there is no need to pass it. 38 | If the replay memory is at capacity the oldest transition will be discarded. 39 | 40 | Args: 41 | observation: current observation before action is applied. 42 | action: the action in the transition. 43 | reward: the reward received in the transition. 44 | terminal: Whether the transition was terminal or not. 45 | truncated: Whether the transition was truncated or not. 46 | kwargs: extra elements of the transition 47 | """ 48 | pass 49 | 50 | def add_final(self, final_observation: dict): 51 | """Adds the final transition to the replay memory. 52 | 53 | Final transition only contains final observation, but no action, rewards and 54 | info as the episode has terminated. 55 | 56 | Args: 57 | final_observation: final observation of the episode 58 | """ 59 | pass 60 | 61 | def is_empty(self): 62 | pass 63 | 64 | def is_full(self): 65 | pass 66 | 67 | def sample(self, batch_size=None, indices=None): 68 | """Sample transitions from replay buffer. 69 | 70 | Args: 71 | batch_size (int, optional): the batch size. Defaults to None. 72 | indices (list[int], optional): a list of global transition indices. 73 | Defaults to None. 74 | 75 | Returns: 76 | batch: a batch of transitions. 77 | """ 78 | pass 79 | 80 | def shutdown(self): 81 | pass 82 | -------------------------------------------------------------------------------- /robobase/replay_buffer/utils.py: -------------------------------------------------------------------------------- 1 | """Collection of replay buffer utils.""" 2 | from multiprocessing import Condition, Lock 3 | 4 | 5 | class ReadWriteLock: 6 | """Allows many simultaneous read locks, but only one write lock.""" 7 | 8 | def __init__(self): 9 | self._read_ready = Condition(Lock()) 10 | self._readers = 0 11 | 12 | def acquire_read(self): 13 | """Acquire a read lock. Blocks only if a thread has acquired the write lock.""" 14 | self._read_ready.acquire() 15 | try: 16 | self._readers += 1 17 | finally: 18 | self._read_ready.release() 19 | 20 | def release_read(self): 21 | """Release a read lock.""" 22 | self._read_ready.acquire() 23 | try: 24 | self._readers -= 1 25 | if not self._readers: 26 | self._read_ready.notify_all() 27 | finally: 28 | self._read_ready.release() 29 | 30 | def acquire_write(self): 31 | """Acquire a write lock. Blocks until there are no acquired read/write locks.""" 32 | self._read_ready.acquire() 33 | while self._readers > 0: 34 | self._read_ready.wait() 35 | 36 | def release_write(self): 37 | """Release a write lock.""" 38 | self._read_ready.release() 39 | -------------------------------------------------------------------------------- /robobase/video.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import imageio 4 | import numpy as np 5 | import gymnasium as gym 6 | 7 | 8 | def _render_single_env_if_vector(env: gym.vector.VectorEnv): 9 | if getattr(env, "is_vector_env", False): 10 | if getattr(env, "parent_pipes", False): 11 | # Async env 12 | old_parent_pipes = env.parent_pipes 13 | env.parent_pipes = old_parent_pipes[:1] 14 | img = env.call("render")[0] 15 | env.parent_pipes = old_parent_pipes 16 | elif getattr(env, "envs", False): 17 | # Sync env 18 | old_envs = env.envs 19 | env.envs = old_envs[:1] 20 | img = env.call("render")[0] 21 | env.envs = old_envs 22 | else: 23 | raise ValueError("Unrecognized vector env.") 24 | else: 25 | img = env.render() 26 | return img 27 | 28 | 29 | class VideoRecorder: 30 | def __init__(self, save_dir: Path, render_size=256, fps=20): 31 | self.save_dir = save_dir 32 | if save_dir is not None: 33 | self.save_dir.mkdir(exist_ok=True) 34 | self.render_size = render_size 35 | self.fps = fps 36 | self.frames = [] 37 | 38 | def init(self, env, enabled=True): 39 | self.frames = [] 40 | self.enabled = self.save_dir is not None and enabled 41 | self.record(env) 42 | 43 | def record(self, env): 44 | if self.enabled: 45 | frame = _render_single_env_if_vector(env) 46 | if frame is not None: 47 | self.frames.append(frame) 48 | 49 | def save(self, file_name): 50 | if self.enabled and len(self.frames) > 0: 51 | path = self.save_dir / file_name 52 | imageio.mimsave(str(path), np.array(self.frames), fps=self.fps) 53 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import os 3 | from pathlib import Path 4 | 5 | import setuptools 6 | 7 | 8 | def read(rel_path): 9 | here = os.path.abspath(os.path.dirname(__file__)) 10 | with codecs.open(os.path.join(here, rel_path), "r") as fp: 11 | return fp.read() 12 | 13 | 14 | def get_version(rel_path): 15 | for line in read(rel_path).splitlines(): 16 | if line.startswith("__version__"): 17 | delim = '"' if '"' in line else "'" 18 | return line.split(delim)[1] 19 | else: 20 | raise RuntimeError("Unable to find version string.") 21 | 22 | 23 | core_requirements = [ 24 | "torch>1.13", 25 | "moviepy", 26 | "natsort", 27 | "omegaconf", 28 | "hydra-core", 29 | "hydra-joblib-launcher", 30 | # Fix for solver_iter before 1.0.0 31 | "gymnasium @ git+https://git@github.com/stepjam/Gymnasium.git@0.29.2", 32 | "wandb<=0.15.12", 33 | "termcolor", 34 | "opencv-python-headless", 35 | "numpy<2", 36 | "imageio", 37 | "timm", 38 | "scipy", 39 | "einops", 40 | "diffusers==0.29.0", 41 | ] 42 | 43 | setuptools.setup( 44 | version=get_version("robobase/__init__.py"), 45 | name="robobase", 46 | author="robobase", 47 | packages=setuptools.find_packages(), 48 | python_requires=">=3.10", 49 | install_requires=core_requirements, 50 | package_data={ 51 | "": [str(p.resolve()) for p in Path("robobase/cfgs/").glob("**/*.yaml")] 52 | }, 53 | extras_require={ 54 | "dev": ["pre-commit", "pytest", "mvp @ git+https://github.com/ir413/mvp"], 55 | "dmc": [ 56 | "dm_control", 57 | ], 58 | "rlbench": [ 59 | "rlbench @ git+https://git@github.com/stepjam/RLBench.git@b80e51feb3694d9959cb8c0408cd385001b01382", 60 | ], 61 | "bigym": [ 62 | "bigym @ git+https://github.com/chernyadev/bigym.git", 63 | ], 64 | "d4rl": [ 65 | "d4rl @ git+https://github.com/Farama-Foundation/d4rl@master#egg=d4rl", 66 | "gym", 67 | "cython<3", 68 | ], 69 | }, 70 | ) 71 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robobase-org/robobase/411b7c7a40272dcaa7eed0ea2459af07002d53e5/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import multiprocessing 3 | 4 | 5 | # Define a fixture to set the start method for multiprocessing 6 | @pytest.fixture(scope="session", autouse=True) 7 | def set_multiprocessing_start_method(): 8 | # Set your desired start method here 9 | multiprocessing.set_start_method("forkserver") 10 | -------------------------------------------------------------------------------- /tests/integration/test_training.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import tempfile 3 | import time 4 | from pathlib import Path 5 | import multiprocessing 6 | 7 | import numpy as np 8 | import pytest 9 | 10 | from hydra import compose, initialize 11 | from omegaconf import OmegaConf 12 | 13 | from robobase.workspace import Workspace 14 | 15 | EXP_NAME = "test_experiment" 16 | COMMON_HYDRA_OVERRIDES = [ 17 | "num_train_frames=100000", 18 | "replay_size_before_train=1000", 19 | "save_csv=true", 20 | "log_eval_video=false", 21 | "num_train_envs=1", 22 | "seed=1", 23 | f"experiment_name={EXP_NAME}", 24 | ] 25 | PIXEL_METHODS = ["drqv2", "alix"] 26 | STATE_METHODS = ["drqv2"] 27 | 28 | 29 | def run_cmd(hydra_overrides: list[str], target_reward: float, result_queue): 30 | try: 31 | with tempfile.TemporaryDirectory(dir=Path.cwd()) as tmpdirname: 32 | with initialize( 33 | version_base=None, 34 | config_path="../../robobase/cfgs", 35 | job_name="test_app", 36 | ): 37 | hydra_overrides.append(f"replay.save_dir={tmpdirname}/replay") 38 | tmp_dir = Path(tmpdirname) 39 | cfg = compose(config_name="robobase_config", overrides=hydra_overrides) 40 | print(OmegaConf.to_yaml(cfg)) 41 | workspace = Workspace(cfg, work_dir=tmp_dir) 42 | start_time = time.monotonic() 43 | workspace.train() 44 | print( 45 | "Train time: ", (time.monotonic() - start_time) / 60.0, " minutes" 46 | ) 47 | csv_log_dir = Path(tmp_dir, "eval.csv") 48 | with csv_log_dir.open(newline="", encoding="utf-8") as f: 49 | reader = csv.DictReader(f) 50 | returns = [row["episode_reward"] for row in reader] 51 | sorted_rewards = np.sort(np.array(returns[-20:]).astype(float)) 52 | # Check the top N rewards (reduces test flakiness) 53 | print(np.mean(sorted_rewards[-5:])) 54 | assert np.mean(sorted_rewards[-5:]) > target_reward 55 | except Exception as e: # pylint: disable=try-except-raise 56 | result_queue.put(e) 57 | finally: 58 | # Sleep for 5 secs to clean memory from previous process 59 | # Next test will fail without this when it's initialised too fast 60 | time.sleep(5) 61 | 62 | 63 | @pytest.mark.parametrize("method", STATE_METHODS) 64 | def test_dmc_cartpole_no_pixels(method: str): 65 | result_queue = multiprocessing.Queue() 66 | # Create a multiprocessing.Process and pass the result queue as an argument 67 | process = multiprocessing.Process( 68 | target=run_cmd, 69 | args=( 70 | COMMON_HYDRA_OVERRIDES 71 | + [ 72 | f"method={method}", 73 | "env=dmc/cartpole_balance", 74 | "pixels=false", 75 | "frame_stack=3", 76 | "method.use_augmentation=true", 77 | "replay.size=200000", 78 | ], 79 | 800, 80 | result_queue, 81 | ), 82 | ) 83 | 84 | # Start the process 85 | process.start() 86 | # Wait for the process to finish 87 | process.join() 88 | # Check if the process terminated successfully (exit code 0) or not 89 | if process.exitcode != 0: 90 | raise result_queue.get() 91 | 92 | 93 | @pytest.mark.parametrize("method", PIXEL_METHODS) 94 | def test_dmc_cartpole_pixels(method: str): 95 | result_queue = multiprocessing.Queue() 96 | # Create a multiprocessing.Process and pass the result queue as an argument 97 | process = multiprocessing.Process( 98 | target=run_cmd, 99 | args=( 100 | COMMON_HYDRA_OVERRIDES 101 | + [ 102 | f"method={method}", 103 | "env=dmc/cartpole_balance", 104 | "pixels=true", 105 | "frame_stack=3", 106 | "method.use_augmentation=true", 107 | "replay.size=50000", 108 | ], 109 | 800, 110 | result_queue, 111 | ), 112 | ) 113 | # Start the process 114 | process.start() 115 | # Wait for the process to finish 116 | process.join() 117 | # Check if the process terminated successfully (exit code 0) or not 118 | if process.exitcode != 0: 119 | raise result_queue.get() 120 | 121 | 122 | # noqa: E501 TODO: reach target is not stable enough at the moment to do integration test with rlbench 123 | # @pytest.mark.parametrize("method", METHODS) 124 | # def test_rlbench_reach_target_no_pixels(method: str): 125 | # run_cmd( 126 | # COMMON_HYDRA_OVERRIDES 127 | # + [ 128 | # f"method={method}", 129 | # "env=rlbench/reach_target", 130 | # "pixels=false", 131 | # "action_sequence=4", 132 | # ], 133 | # 0.8, 134 | # ) 135 | 136 | 137 | # @pytest.mark.parametrize("method", METHODS) 138 | # def test_rlbench_reach_target_pixels(method: str): 139 | # noqa: E501 run_cmd(COMMON_HYDRA_OVERRIDES + [f"method={method}", "env=rlbench/reach_target", "env.cameras=[wrist, front]", "pixels=true"], 0.8) 140 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robobase-org/robobase/411b7c7a40272dcaa7eed0ea2459af07002d53e5/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/envs/test_d4rl.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import gym as gym_old 3 | import gymnasium as gym 4 | import numpy as np 5 | from hydra.core.global_hydra import GlobalHydra 6 | from hydra import compose, initialize 7 | 8 | from robobase.envs.d4rl import ( 9 | get_traj_dataset, 10 | D4RLEnvCompatibility, 11 | ConvertObsToDict, 12 | D4RLEnvFactory, 13 | ) 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "task_name, expected_len", 18 | [ 19 | ("halfcheetah-medium-v2", 1000), 20 | ("hopper-medium-v2", 2187), 21 | ("walker2d-medium-v2", 1191), 22 | ], 23 | ) 24 | def test_get_trajectory_dataset(task_name, expected_len): 25 | env = gym_old.make(task_name) 26 | d4rl_trajs, _ = get_traj_dataset(env) 27 | 28 | for traj in d4rl_trajs: # for each trajecotry 29 | assert len(traj[0]) == 2 # first transition only contains obs and info 30 | for i in range(1, len(traj)): 31 | assert len(traj[i]) == 5 # subsequent transitons contain 5 items 32 | assert "demo_action" in traj[i][4] 33 | assert traj[i][4]["demo"] == 1 34 | 35 | assert len(d4rl_trajs) == expected_len 36 | 37 | 38 | @pytest.mark.parametrize( 39 | "task_name", 40 | [("halfcheetah-medium-v2"), ("hopper-medium-v2"), ("walker2d-medium-v2")], 41 | ) 42 | def test_env_compatilibility_wrapper(task_name): 43 | env = gym_old.make(task_name) 44 | env = D4RLEnvCompatibility(env) 45 | 46 | assert isinstance(env.observation_space, gym.spaces.Box) 47 | assert isinstance(env.action_space, gym.spaces.Box) 48 | assert env.observation_space.dtype == np.float32 49 | assert env.action_space.dtype == np.float32 50 | 51 | env.reset() 52 | action = env.action_space.sample() 53 | res = env.step(action) 54 | assert len(res) == 5 # In the new gym api, step should return 5 items. 55 | 56 | 57 | @pytest.mark.parametrize( 58 | "task_name", [("HalfCheetah-v4"), ("Hopper-v4"), ("Walker2d-v4")] 59 | ) 60 | def test_convert_obs_to_dict_wrapper(task_name): 61 | env = gym.make(task_name) 62 | env = ConvertObsToDict(env) 63 | 64 | assert isinstance(env.observation_space, gym.spaces.Dict) 65 | 66 | obs, info = env.reset() 67 | assert "low_dim_state" in obs 68 | 69 | action = env.action_space.sample() 70 | obs, _, _, _, _ = env.step(action) 71 | assert "low_dim_state" in obs 72 | 73 | 74 | @pytest.fixture() 75 | def compose_cfg(): 76 | GlobalHydra.instance().clear() 77 | initialize(config_path="../../../robobase/cfgs") 78 | method = ["method=" + "iql_drqv2"] 79 | cfg = compose( 80 | config_name="robobase_config", 81 | overrides=method 82 | + [ 83 | "pixels=false", 84 | "env=d4rl/hopper", 85 | "save_snapshot=true", 86 | "snapshot_every_n=1", 87 | ], 88 | ) 89 | return cfg 90 | 91 | 92 | @pytest.mark.parametrize( 93 | "num_demos, desired_num_demos", [(float("inf"), 2187), (0, 0), (100, 100)] 94 | ) 95 | def test_collect_demo(num_demos, desired_num_demos, compose_cfg): 96 | factory = D4RLEnvFactory() 97 | factory.collect_or_fetch_demos(compose_cfg, num_demos) 98 | 99 | assert len(factory._raw_demos) == desired_num_demos 100 | -------------------------------------------------------------------------------- /tests/unit/envs/test_rlbench.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rlbench.observation_config import ObservationConfig 4 | from robobase.envs.env import Demo 5 | from robobase.envs.rlbench import RLBenchEnv, keypoint_discovery 6 | from rlbench.action_modes.action_mode import JointPositionActionMode 7 | 8 | 9 | @pytest.fixture 10 | def demo(): 11 | """Get demo from rlbench.""" 12 | cameras = ["wrist"] 13 | obs_config = ObservationConfig() 14 | obs_config.set_all_low_dim(True) 15 | obs_config.set_all_high_dim(False) 16 | obs_config.gripper_open = True 17 | obs_config.joint_positions = True 18 | for camera in cameras: 19 | camera_config = getattr(obs_config, f"{camera}_camera") 20 | camera_config.rgb = True 21 | camera_config.depth = False 22 | camera_config.point_cloud = False 23 | camera_config.mask = False 24 | 25 | env = RLBenchEnv( 26 | task_name="reach_target", 27 | observation_config=obs_config, 28 | action_mode=JointPositionActionMode(), 29 | headless=True, 30 | ) 31 | _, _ = env.reset() 32 | demos = env.get_demos(1) 33 | env.close() 34 | return demos[0] 35 | 36 | 37 | def test_keypoint_discovery(demo): 38 | keypoint_indices = keypoint_discovery(demo, method="heuristic") 39 | # 0th index is not included in keypoint_indices 40 | nbp_demo = Demo([demo[i] for i in [0] + keypoint_indices]) 41 | assert len(keypoint_indices) < len(demo) 42 | # Last index should be in keypoint_indices 43 | assert len(demo) - 1 in keypoint_indices 44 | # Check reward of last keypoint 45 | assert nbp_demo[-1][1] > 0 46 | 47 | # TODO: Add tests for other keypoint discovery methods (random, fixed_interval) 48 | -------------------------------------------------------------------------------- /tests/unit/intrinsic_reward_module/test_intrinsic_rewards.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | from gymnasium import spaces 4 | 5 | from robobase.intrinsic_reward_module.core import IntrinsicRewardModule 6 | from robobase.intrinsic_reward_module import RND, ICM 7 | 8 | 9 | OBS_LOW_DIM = "low_dim_state" 10 | OBS_PIXELS_1 = "rgb_one" 11 | OBS_PIXELS_2 = "rgb_two" 12 | OBS_LOW_DIM_SIZE = 100 13 | IMG_SHAPE = (3, 8, 8) 14 | ACTION_SHAPE = (2, 4) 15 | BATCH_SIZE = 10 16 | TIME_SIZE = 2 17 | 18 | 19 | @pytest.mark.parametrize( 20 | "intrinsic_module_cls,", 21 | [RND, ICM], 22 | ) 23 | class TestIntrinsicRewards: 24 | def _sample_fake_batch(self, low_dim: bool = False, pixels: bool = False): 25 | batch = { 26 | "action": torch.rand(BATCH_SIZE, *ACTION_SHAPE, dtype=torch.float32), 27 | } 28 | if low_dim: 29 | batch.update( 30 | { 31 | OBS_LOW_DIM: torch.rand( 32 | BATCH_SIZE, TIME_SIZE, OBS_LOW_DIM_SIZE, dtype=torch.float32 33 | ), 34 | f"{OBS_LOW_DIM}_tp1": torch.rand( 35 | BATCH_SIZE, TIME_SIZE, OBS_LOW_DIM_SIZE, dtype=torch.float32 36 | ), 37 | } 38 | ) 39 | if pixels: 40 | batch.update( 41 | { 42 | OBS_PIXELS_1: torch.rand( 43 | BATCH_SIZE, TIME_SIZE, *IMG_SHAPE, dtype=torch.float32 44 | ), 45 | f"{OBS_PIXELS_1}_tp1": torch.rand( 46 | BATCH_SIZE, TIME_SIZE, *IMG_SHAPE, dtype=torch.float32 47 | ), 48 | OBS_PIXELS_2: torch.rand( 49 | BATCH_SIZE, TIME_SIZE, *IMG_SHAPE, dtype=torch.float32 50 | ), 51 | f"{OBS_PIXELS_2}_tp1": torch.rand( 52 | BATCH_SIZE, TIME_SIZE, *IMG_SHAPE, dtype=torch.float32 53 | ), 54 | } 55 | ) 56 | return batch 57 | 58 | def test_low_dim_obs(self, intrinsic_module_cls: type[IntrinsicRewardModule]): 59 | observation_space = spaces.Dict( 60 | { 61 | OBS_LOW_DIM: spaces.Box(-1, 1, (TIME_SIZE, OBS_LOW_DIM_SIZE)), 62 | } 63 | ) 64 | action_space = spaces.Box(-2, 2, ACTION_SHAPE) 65 | rew = intrinsic_module_cls( 66 | observation_space=observation_space, 67 | action_space=action_space, 68 | device=torch.device("cpu"), 69 | ) 70 | batch = self._sample_fake_batch(low_dim=True) 71 | rewards = rew.compute_irs(batch) 72 | rew.update(batch) 73 | assert rewards.shape == (BATCH_SIZE, 1) 74 | assert rewards.dtype == torch.float32 75 | 76 | def test_pixel_obs(self, intrinsic_module_cls: type[IntrinsicRewardModule]): 77 | observation_space = spaces.Dict( 78 | { 79 | OBS_PIXELS_1: spaces.Box(-1, 1, (TIME_SIZE, *IMG_SHAPE)), 80 | OBS_PIXELS_2: spaces.Box(-1, 1, (TIME_SIZE, *IMG_SHAPE)), 81 | } 82 | ) 83 | action_space = spaces.Box(-2, 2, ACTION_SHAPE) 84 | rew = intrinsic_module_cls( 85 | observation_space=observation_space, 86 | action_space=action_space, 87 | device=torch.device("cpu"), 88 | ) 89 | batch = self._sample_fake_batch(pixels=True) 90 | rewards = rew.compute_irs(batch) 91 | rew.update(batch) 92 | assert rewards.shape == (BATCH_SIZE, 1) 93 | assert rewards.dtype == torch.float32 94 | -------------------------------------------------------------------------------- /tests/unit/method/base.py: -------------------------------------------------------------------------------- 1 | """Base test""" 2 | import multiprocessing 3 | import tempfile 4 | 5 | from hydra import compose, initialize 6 | from hydra.core.global_hydra import GlobalHydra 7 | from robobase.envs import dmc, bigym 8 | 9 | from robobase.workspace import Workspace 10 | 11 | import os 12 | import torch 13 | 14 | dmc.UNIT_TEST = True 15 | bigym.UNIT_TEST = True 16 | 17 | 18 | def train_and_shutdown(cfg, tempdir): 19 | w = Workspace(cfg, work_dir=tempdir) 20 | w.train() 21 | w.shutdown() 22 | 23 | 24 | def _train_process_helper(cfg, tempdir): 25 | # Initialize Workspace inside the subprocess 26 | workspace = Workspace(cfg, work_dir=tempdir) 27 | 28 | # Store the initial state_dict 29 | prev_state_dict = {k: v.clone() for k, v in workspace.agent.state_dict().items()} 30 | 31 | # Perform training 32 | workspace.train() 33 | 34 | # Get the updated state_dict and save it to temp directory 35 | state_dict = {k: v.clone() for k, v in workspace.agent.state_dict().items()} 36 | with open(f"{tempdir}/state_dict.pt", "wb") as f: 37 | torch.save(state_dict, f) 38 | 39 | is_param_different = [] 40 | for k in state_dict.keys(): 41 | is_param_different.append(not torch.allclose(state_dict[k], prev_state_dict[k])) 42 | assert sum(is_param_different) > 0 43 | workspace.save_snapshot() 44 | workspace.shutdown() 45 | 46 | 47 | def _load_snapshot_process_helper(cfg, tempdir): 48 | # Initialize Workspace inside the subprocess 49 | new_workspace = Workspace(cfg, work_dir=tempdir) 50 | 51 | # Load state_dict from previous process 52 | with open(f"{tempdir}/state_dict.pt", "rb") as f: 53 | state_dict = torch.load(f, map_location="cpu") 54 | 55 | # Check the snapshot path 56 | snapshot_path = os.path.join(tempdir, "snapshots", "latest_snapshot.pt") 57 | assert os.path.exists(snapshot_path) 58 | 59 | # Check whether initial parameters are different from saved parameters 60 | new_state_dict = new_workspace.agent.state_dict() 61 | is_param_different = [] 62 | for k in new_state_dict.keys(): 63 | is_param_different.append(not torch.allclose(state_dict[k], new_state_dict[k])) 64 | assert sum(is_param_different) > 0 65 | 66 | # Load snapshot 67 | new_workspace.load_snapshot() 68 | new_state_dict = new_workspace.agent.state_dict() 69 | 70 | # Check whether the parameters are the same after loading snapshot 71 | assert len(state_dict) == len(new_state_dict) 72 | for k in new_state_dict.keys(): 73 | assert torch.allclose(state_dict[k], new_state_dict[k]) 74 | 75 | new_workspace.shutdown() 76 | 77 | 78 | class Base: 79 | def test_save_load_snapshot(self, method, cfg_params): 80 | GlobalHydra.instance().clear() 81 | initialize(config_path="../../../robobase/cfgs") 82 | method = ["method=" + method] 83 | cfg = compose( 84 | config_name="robobase_config", 85 | overrides=method 86 | + [ 87 | "pixels=true", 88 | "env=dmc/acrobot_swingup", 89 | "save_snapshot=true", 90 | "snapshot_every_n=1", 91 | ] 92 | + cfg_params, 93 | ) 94 | with tempfile.TemporaryDirectory() as tempdir: 95 | p = multiprocessing.Process( 96 | target=_train_process_helper, args=(cfg, tempdir) 97 | ) 98 | p.start() 99 | p.join() 100 | assert not p.exitcode 101 | 102 | p = multiprocessing.Process( 103 | target=_load_snapshot_process_helper, args=(cfg, tempdir) 104 | ) 105 | p.start() 106 | p.join() 107 | assert not p.exitcode 108 | -------------------------------------------------------------------------------- /tests/unit/method/test_il_methods.py: -------------------------------------------------------------------------------- 1 | """Tests IL algos.""" 2 | import multiprocessing 3 | import tempfile 4 | 5 | import pytest 6 | from hydra import compose, initialize 7 | from hydra.core.global_hydra import GlobalHydra 8 | 9 | from omegaconf import OmegaConf 10 | 11 | from tests.unit.method.base import Base, train_and_shutdown 12 | 13 | 14 | COMMON_OVERRIDES = [ 15 | "num_pretrain_steps=2", 16 | "num_train_frames=2", 17 | "num_gpus=0", 18 | "num_eval_episodes=2", 19 | "replay_size_before_train=10", 20 | "batch_size=2", 21 | "replay.size=500", 22 | "replay.nstep=1", 23 | "replay.num_workers=0", 24 | "replay.pin_memory=false", 25 | "log_pretrain_every=1", 26 | "env.episode_length=10", 27 | ] 28 | 29 | DEFAULT_OVERRIDES = COMMON_OVERRIDES + ["action_sequence=4"] 30 | DIFFUSION_OVERRIDES = COMMON_OVERRIDES + [ 31 | "method.num_diffusion_iters=2", 32 | "action_sequence=20", 33 | ] 34 | 35 | 36 | @pytest.mark.parametrize( 37 | "method,cfg_params", 38 | [ 39 | ("bc", DEFAULT_OVERRIDES), 40 | ("diffusion", DIFFUSION_OVERRIDES), 41 | ("act", DEFAULT_OVERRIDES), 42 | ], 43 | ) 44 | class TestILMethods(Base): 45 | def test_rlbench_without_pixels(self, method, cfg_params): 46 | if method == "act": 47 | pytest.skip("ACT does not support state-only environments.") 48 | GlobalHydra.instance().clear() 49 | initialize(config_path="../../../robobase/cfgs") 50 | method = ["method=" + method] 51 | cfg = compose( 52 | config_name="robobase_config", 53 | overrides=method 54 | + cfg_params 55 | + [ 56 | "pixels=false", 57 | "env=rlbench/reach_target", 58 | "env.action_mode=JOINT_POSITION", 59 | "demos=1", 60 | ], 61 | ) 62 | with tempfile.TemporaryDirectory() as tempdir: 63 | # RLBench needs to be run with multiprocess 64 | p = multiprocessing.Process(target=train_and_shutdown, args=(cfg, tempdir)) 65 | p.start() 66 | p.join() 67 | assert not p.exitcode 68 | 69 | @pytest.mark.parametrize( 70 | ["img_shape", "encoder_model", "view_fusion_model"], 71 | [ 72 | ( 73 | (84, 84), 74 | { 75 | "_target_": "robobase.models.EncoderCNNMultiViewDownsampleWithStrides", 76 | "_partial_": True, 77 | "num_downsample_convs": 1, 78 | "num_post_downsample_convs": 3, 79 | }, 80 | { 81 | "_target_": "robobase.models.FusionMultiCamFeature", 82 | "_partial_": True, 83 | }, 84 | ), 85 | ], 86 | ) 87 | def test_rlbench_with_pixels( 88 | self, method, cfg_params, img_shape, encoder_model, view_fusion_model 89 | ): 90 | GlobalHydra.instance().clear() 91 | initialize(config_path="../../../robobase/cfgs") 92 | method_cfg = ["method=" + method] 93 | cfg = compose( 94 | config_name="robobase_config", 95 | overrides=method_cfg 96 | + cfg_params 97 | + [ 98 | "pixels=true", 99 | "env=rlbench/reach_target", 100 | "env.action_mode=JOINT_POSITION", 101 | "demos=1", 102 | ], 103 | ) 104 | 105 | if method == "act": 106 | encoder_model = { 107 | "_target_": "robobase.method.act.ImageEncoderACT", 108 | "_partial_": True, 109 | } 110 | actor_model = { 111 | "_target_": "robobase.models.multi_view_transformer" 112 | ".MultiViewTransformerEncoderDecoderACT", 113 | "_partial_": True, 114 | "num_queries": cfg.action_sequence, 115 | } 116 | cfg.visual_observation_shape = img_shape 117 | cfg.method.encoder_model = OmegaConf.create(encoder_model) 118 | cfg.method.actor_model = OmegaConf.create(actor_model) 119 | else: 120 | cfg.visual_observation_shape = img_shape 121 | cfg.method.encoder_model = OmegaConf.create(encoder_model) 122 | cfg.method.view_fusion_model = OmegaConf.create(view_fusion_model) 123 | 124 | with tempfile.TemporaryDirectory() as tempdir: 125 | # RLBench needs to be run with multiprocess 126 | p = multiprocessing.Process(target=train_and_shutdown, args=(cfg, tempdir)) 127 | p.start() 128 | p.join() 129 | assert not p.exitcode 130 | 131 | def test_save_load_snapshot(self, method, cfg_params): 132 | new_params = cfg_params + [ 133 | "env=rlbench/reach_target", 134 | "env.action_mode=JOINT_POSITION", 135 | "demos=1", 136 | ] 137 | super().test_save_load_snapshot(method, new_params) 138 | -------------------------------------------------------------------------------- /tests/unit/test_workspace.py: -------------------------------------------------------------------------------- 1 | # TODO: Test if workspace does pre-training steps, etc 2 | -------------------------------------------------------------------------------- /tests/unit/utils/test_add_demo_to_replay_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gymnasium import spaces 3 | import pytest 4 | 5 | from robobase.utils import add_demo_to_replay_buffer 6 | from robobase.envs.env import DemoEnv 7 | from tests.unit.wrappers.utils import DummyEnv 8 | from robobase.replay_buffer.uniform_replay_buffer import UniformReplayBuffer 9 | from robobase.envs.wrappers import FrameStack, RecedingHorizonControl, ActionSequence 10 | 11 | 12 | def collect_demo_from_dummy_env(env, num_demo): 13 | traj = [] 14 | 15 | for traj_idx in range(num_demo): 16 | obs, info = env.reset() 17 | info["demo"] = 1 18 | 19 | cur_traj = [[obs, info]] 20 | term = 0 21 | trunc = 0 22 | while not term and not trunc: 23 | # Divide by 100 to make it within [-1, 1] range 24 | action = np.ones_like(env.action_space.sample()) * (traj_idx - 2) / 100.0 25 | obs, rew, term, trunc, info = env.step(action) 26 | info["demo_action"] = action 27 | info["demo"] = 1 28 | cur_traj.append([obs, rew, term, trunc, info]) 29 | 30 | traj.append(cur_traj) 31 | 32 | return traj 33 | 34 | 35 | def wrap_env(env, frame_stack, action_sequence, execution_step, demo_env=False): 36 | if not demo_env: 37 | env = FrameStack(env, frame_stack) 38 | if action_sequence == execution_step: 39 | env = ActionSequence(env, action_sequence) 40 | else: 41 | env = RecedingHorizonControl( 42 | env, action_sequence, 5, execution_step, temporal_ensemble=False 43 | ) 44 | 45 | return env 46 | 47 | 48 | @pytest.mark.parametrize( 49 | "frame_stack, action_sequence, execution_step", 50 | [(1, 1, 1), (1, 2, 1), (2, 1, 1), (2, 2, 1), (3, 3, 1), (2, 5, 1), (2, 5, 5)], 51 | ) 52 | def test_add_demo_to_replay_buffer(frame_stack, action_sequence, execution_step): 53 | eps_len = 20 54 | num_demo = 5 55 | 56 | # Check demo collection 57 | env = DummyEnv(episode_len=eps_len) 58 | demos = collect_demo_from_dummy_env(env, num_demo=5) 59 | assert len(demos) == num_demo 60 | for demo in demos: 61 | assert ( 62 | len(demo) == eps_len + 1 63 | ) # demo length should be 1 + ep_len due to first reset 64 | 65 | demo_env = DemoEnv(demos, env.action_space, env.observation_space) 66 | 67 | # For non-demo env, action_seq and frame_stack should be set properly 68 | env = wrap_env(env, frame_stack, action_sequence, execution_step, demo_env=False) 69 | print(env.action_space.shape) 70 | print(env.observation_space) 71 | assert env.action_space.shape[0] == action_sequence 72 | for obs_space in env.observation_space.values(): 73 | assert obs_space.shape[0] == frame_stack 74 | 75 | # For demo env, action_seq and frame_stack should always be 1 76 | demo_env = wrap_env( 77 | demo_env, frame_stack, action_sequence, execution_step, demo_env=True 78 | ) 79 | print(demo_env.action_space.shape) 80 | print(demo_env.observation_space) 81 | 82 | info_elements = spaces.Dict({}) 83 | info_elements["demo"] = spaces.Box(0, 1, shape=(), dtype=np.uint8) 84 | replay_buffer = UniformReplayBuffer( 85 | action_shape=env.action_space.shape, 86 | action_dtype=env.action_space.dtype, 87 | nstep=1, 88 | reward_shape=(), 89 | reward_dtype=np.float32, 90 | observation_elements=env.observation_space, 91 | extra_replay_elements=info_elements, 92 | ) 93 | # print(replay_buffer._frame_stacks, replay_buffer._action_seq_len) 94 | assert replay_buffer._frame_stacks == frame_stack 95 | assert replay_buffer._action_seq_len == action_sequence 96 | 97 | # Test adding demo to replay buffer 98 | for _ in range(len(demos)): 99 | add_demo_to_replay_buffer(demo_env, replay_buffer) 100 | assert replay_buffer.add_count == eps_len * num_demo 101 | assert replay_buffer._num_episodes == num_demo 102 | 103 | # Sample episode from replay buffer 104 | replay_buffer._try_fetch() 105 | episode, _ = replay_buffer._sample_episode() 106 | for v in episode.values(): 107 | assert len(v) == eps_len + 1 # Episode length should be correct 108 | 109 | # Sample transitions from replay buffer 110 | for _ in range(10): 111 | sample = replay_buffer.sample_single() 112 | print(sample["action"]) 113 | # - Frame stack, and action sequence should be correctly implemented 114 | for key in env.observation_space.keys(): 115 | assert sample[key].shape == env.observation_space[key].shape 116 | assert sample["action"].shape == env.action_space.shape 117 | 118 | # Check action_sequence is properly padded with zero 119 | for idx in range(eps_len): 120 | sample = replay_buffer.sample_single(idx) 121 | print(sample["action"]) 122 | num_pad_pos = max(idx - (eps_len - action_sequence), 0) 123 | if num_pad_pos > 0: 124 | assert sample["action"][-num_pad_pos:].sum() == 0 125 | 126 | 127 | if __name__ == "__main__": 128 | test_add_demo_to_replay_buffer(2, 1, 1) 129 | test_add_demo_to_replay_buffer(2, 5, 1) 130 | test_add_demo_to_replay_buffer(2, 5, 5) 131 | -------------------------------------------------------------------------------- /tests/unit/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robobase-org/robobase/411b7c7a40272dcaa7eed0ea2459af07002d53e5/tests/unit/wrappers/__init__.py -------------------------------------------------------------------------------- /tests/unit/wrappers/test_action_sequence.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from gymnasium.vector import SyncVectorEnv 3 | from tests.unit.wrappers.utils import DummyEnv, ACTION_SHAPE 4 | from robobase.envs.wrappers import ActionSequence, RecedingHorizonControl 5 | 6 | NUM_ENVS = 2 7 | SEQ_LEN = 5 8 | TIME_LIMIT = 100 9 | EXE_LEN = 1 10 | 11 | 12 | def _create_receding_horizon_env(): 13 | env = RecedingHorizonControl( 14 | DummyEnv(), 15 | sequence_length=SEQ_LEN, 16 | time_limit=TIME_LIMIT, 17 | execution_length=EXE_LEN, 18 | temporal_ensemble=True, 19 | ) 20 | return env 21 | 22 | 23 | def test_action_sequence_has_correct_shape(): 24 | env = ActionSequence(DummyEnv(), SEQ_LEN) 25 | assert env.action_space.shape == (SEQ_LEN,) + ACTION_SHAPE 26 | 27 | 28 | def test_action_sequence_vec_has_correct_shape(): 29 | env = SyncVectorEnv( 30 | [lambda: ActionSequence(DummyEnv(), SEQ_LEN) for _ in range(NUM_ENVS)] 31 | ) 32 | assert env.action_space.shape == (NUM_ENVS, SEQ_LEN) + ACTION_SHAPE 33 | 34 | 35 | def test_action_sequence_can_step(): 36 | env = ActionSequence(DummyEnv(), SEQ_LEN) 37 | env.reset() 38 | for _ in range(5): 39 | obs, *_, info = env.step(env.action_space.sample()) 40 | assert "action_sequence_mask" in info 41 | assert info["action_sequence_mask"].shape == (SEQ_LEN,) 42 | 43 | 44 | def test_action_sequence_can_step_vec_wrapped_env(): 45 | env = SyncVectorEnv( 46 | [lambda: ActionSequence(DummyEnv(), SEQ_LEN) for _ in range(NUM_ENVS)] 47 | ) 48 | env.reset() 49 | for _ in range(5): 50 | obs, *_, info = env.step(env.action_space.sample()) 51 | assert "action_sequence_mask" in info["final_info"][0] 52 | assert info["final_info"][0]["action_sequence_mask"].shape == (SEQ_LEN,) 53 | 54 | 55 | def test_action_sequence_cant_be_used_with_vec_env(): 56 | with pytest.raises(NotImplementedError): 57 | ActionSequence( 58 | SyncVectorEnv([lambda: DummyEnv() for _ in range(NUM_ENVS)]), SEQ_LEN 59 | ) 60 | 61 | 62 | def test_receding_horizon_has_correct_shape(): 63 | env = _create_receding_horizon_env() 64 | assert env.action_space.shape == (SEQ_LEN,) + ACTION_SHAPE 65 | 66 | 67 | def test_receding_horizon_vec_has_correct_shape(): 68 | env = SyncVectorEnv( 69 | [lambda: _create_receding_horizon_env() for _ in range(NUM_ENVS)] 70 | ) 71 | assert env.action_space.shape == (NUM_ENVS, SEQ_LEN) + ACTION_SHAPE 72 | 73 | 74 | def test_receding_horizon_can_step(): 75 | env = _create_receding_horizon_env() 76 | env.reset() 77 | for _ in range(5): 78 | obs, *_, info = env.step(env.action_space.sample()) 79 | assert "action_sequence_mask" in info 80 | assert info["action_sequence_mask"].shape == (SEQ_LEN,) 81 | 82 | 83 | def test_receding_horizon_can_step_vec_wrapped_env(): 84 | env = SyncVectorEnv( 85 | [lambda: _create_receding_horizon_env() for _ in range(NUM_ENVS)] 86 | ) 87 | env.reset() 88 | for _ in range(5): 89 | obs, *_, info = env.step(env.action_space.sample()) 90 | if "final_info" in info: 91 | assert "action_sequence_mask" in info["final_info"][0] 92 | assert info["final_info"][0]["action_sequence_mask"].shape == (SEQ_LEN,) 93 | else: 94 | assert "action_sequence_mask" in info 95 | assert info["action_sequence_mask"][0].shape == (SEQ_LEN,) 96 | 97 | 98 | def test_receding_horizon_cant_be_used_with_vec_env(): 99 | with pytest.raises(NotImplementedError): 100 | RecedingHorizonControl( 101 | SyncVectorEnv([lambda: DummyEnv() for _ in range(NUM_ENVS)]), 102 | sequence_length=SEQ_LEN, 103 | time_limit=TIME_LIMIT, 104 | execution_length=EXE_LEN, 105 | temporal_ensemble=True, 106 | ) 107 | -------------------------------------------------------------------------------- /tests/unit/wrappers/test_concat_dim.py: -------------------------------------------------------------------------------- 1 | from gymnasium.vector import SyncVectorEnv 2 | from tests.unit.wrappers.utils import ( 3 | DummyEnv, 4 | OBS_SIZE, 5 | OBS_NAME_FLAT1, 6 | OBS_NAME_IMG1, 7 | OBS_NAME_IMG2, 8 | IMG_SHAPE, 9 | OBS_NAME_FLAT2, 10 | ) 11 | from robobase.envs.wrappers import ConcatDim 12 | 13 | NUM_ENVS = 2 14 | CAT_NAME = "combined" 15 | NEW_FLAT_SIZE = OBS_SIZE + OBS_SIZE 16 | NEW_IMG_SHAPE = (6,) + IMG_SHAPE[1:] 17 | 18 | 19 | def test_concat_images_single_env(): 20 | env = ConcatDim(DummyEnv(), 3, 0, CAT_NAME) 21 | assert OBS_NAME_IMG1 not in env.observation_space 22 | assert OBS_NAME_IMG2 not in env.observation_space 23 | assert CAT_NAME in dict(env.observation_space) 24 | assert env.observation_space[CAT_NAME].shape == NEW_IMG_SHAPE 25 | obs, _ = env.reset() 26 | assert obs[CAT_NAME].shape == NEW_IMG_SHAPE 27 | obs, *_ = env.step(env.action_space.sample()) 28 | assert obs[CAT_NAME].shape == NEW_IMG_SHAPE 29 | 30 | 31 | def test_concat_low_dim_single_env(): 32 | env = ConcatDim(DummyEnv(), 1, 0, CAT_NAME) 33 | assert OBS_NAME_FLAT1 not in env.observation_space 34 | assert OBS_NAME_FLAT2 not in env.observation_space 35 | assert CAT_NAME in dict(env.observation_space) 36 | assert env.observation_space[CAT_NAME].shape == (NEW_FLAT_SIZE,) 37 | obs, _ = env.reset() 38 | assert obs[CAT_NAME].shape == (NEW_FLAT_SIZE,) 39 | obs, *_ = env.step(env.action_space.sample()) 40 | assert obs[CAT_NAME].shape == (NEW_FLAT_SIZE,) 41 | 42 | 43 | def test_concat_images_vec_wrapped_env(): 44 | env = SyncVectorEnv( 45 | [lambda: ConcatDim(DummyEnv(), 1, 0, CAT_NAME) for _ in range(NUM_ENVS)] 46 | ) 47 | assert OBS_NAME_FLAT1 not in env.observation_space 48 | assert OBS_NAME_FLAT2 not in env.observation_space 49 | assert CAT_NAME in dict(env.observation_space) 50 | assert env.observation_space[CAT_NAME].shape == (NUM_ENVS, NEW_FLAT_SIZE) 51 | obs, _ = env.reset() 52 | for _ in range(5): 53 | obs, *_, info = env.step(env.action_space.sample()) 54 | assert obs[CAT_NAME].shape == (NUM_ENVS, NEW_FLAT_SIZE) 55 | assert info["final_observation"][0][CAT_NAME].shape == (NEW_FLAT_SIZE,) 56 | 57 | 58 | def test_concat_images_wrapped_vec_env(): 59 | env = ConcatDim( 60 | SyncVectorEnv([lambda: DummyEnv() for _ in range(NUM_ENVS)]), 1, 0, CAT_NAME 61 | ) 62 | assert OBS_NAME_FLAT1 not in env.observation_space 63 | assert OBS_NAME_FLAT2 not in env.observation_space 64 | assert CAT_NAME in dict(env.observation_space) 65 | assert env.observation_space[CAT_NAME].shape == (NUM_ENVS, NEW_FLAT_SIZE) 66 | obs, _ = env.reset() 67 | for _ in range(5): 68 | obs, *_, info = env.step(env.action_space.sample()) 69 | assert obs[CAT_NAME].shape == (NUM_ENVS, NEW_FLAT_SIZE) 70 | assert info["final_observation"][0][CAT_NAME].shape == (NEW_FLAT_SIZE,) 71 | -------------------------------------------------------------------------------- /tests/unit/wrappers/test_frame_stack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gymnasium.vector import SyncVectorEnv 3 | from tests.unit.wrappers.utils import DummyEnv, OBS_SIZE, OBS_NAME_FLAT1 4 | from robobase.envs.wrappers import FrameStack 5 | 6 | NUM_STACK = 5 7 | NUM_ENVS = 2 8 | 9 | 10 | def test_onehot_single_env(): 11 | env = FrameStack(DummyEnv(), NUM_STACK) 12 | assert env.observation_space[OBS_NAME_FLAT1].shape == (NUM_STACK, OBS_SIZE) 13 | obs, _ = env.reset() 14 | assert obs[OBS_NAME_FLAT1].shape == (NUM_STACK, OBS_SIZE) 15 | obs, *_ = env.step(env.action_space.sample()) 16 | assert obs[OBS_NAME_FLAT1].shape == (NUM_STACK, OBS_SIZE) 17 | 18 | 19 | def test_onehot_vec_wrapped_env(): 20 | env = SyncVectorEnv( 21 | [lambda: FrameStack(DummyEnv(), NUM_STACK) for _ in range(NUM_ENVS)] 22 | ) 23 | assert env.observation_space[OBS_NAME_FLAT1].shape == ( 24 | NUM_ENVS, 25 | NUM_STACK, 26 | OBS_SIZE, 27 | ) 28 | obs, info = env.reset() 29 | assert obs[OBS_NAME_FLAT1].shape == (NUM_ENVS, NUM_STACK, OBS_SIZE) 30 | for _ in range(5): 31 | obs, *_, info = env.step(env.action_space.sample()) 32 | assert obs[OBS_NAME_FLAT1].shape == (NUM_ENVS, NUM_STACK, OBS_SIZE) 33 | assert len(info["final_observation"]) == NUM_ENVS 34 | assert info["final_observation"][0][OBS_NAME_FLAT1].shape == (NUM_STACK, OBS_SIZE) 35 | 36 | 37 | def test_onehot_wrapped_vec_env(): 38 | env = FrameStack( 39 | SyncVectorEnv([lambda: DummyEnv() for _ in range(NUM_ENVS)]), NUM_STACK 40 | ) 41 | assert env.observation_space[OBS_NAME_FLAT1].shape == ( 42 | NUM_ENVS, 43 | NUM_STACK, 44 | OBS_SIZE, 45 | ) 46 | obs, info = env.reset() 47 | assert obs[OBS_NAME_FLAT1].shape == (NUM_ENVS, NUM_STACK, OBS_SIZE) 48 | assert np.all(obs[OBS_NAME_FLAT1] == 0) 49 | for _ in range(5): 50 | obs, *_, info = env.step(env.action_space.sample()) 51 | assert obs[OBS_NAME_FLAT1].shape == (NUM_ENVS, NUM_STACK, OBS_SIZE) 52 | assert len(info["final_observation"]) == NUM_ENVS 53 | assert np.all( 54 | info["final_observation"][0][OBS_NAME_FLAT1] == np.arange(1, 6)[:, np.newaxis] 55 | ) 56 | assert info["final_observation"][0][OBS_NAME_FLAT1].shape == (NUM_STACK, OBS_SIZE) 57 | -------------------------------------------------------------------------------- /tests/unit/wrappers/test_onehot_time.py: -------------------------------------------------------------------------------- 1 | from gymnasium.vector import SyncVectorEnv 2 | from tests.unit.wrappers.utils import DummyEnv 3 | from robobase.envs.wrappers import OnehotTime 4 | 5 | NUM_ENVS = 2 6 | EPISODE_LEN = 5 7 | 8 | 9 | def test_onehot_single_env(): 10 | env = OnehotTime(DummyEnv(), EPISODE_LEN) 11 | assert env.observation_space["time"].shape == (EPISODE_LEN + OnehotTime.PADDING,) 12 | obs, _ = env.reset() 13 | assert obs["time"].shape == (EPISODE_LEN + OnehotTime.PADDING,) 14 | obs, *_ = env.step(env.action_space.sample()) 15 | assert obs["time"].shape == (EPISODE_LEN + OnehotTime.PADDING,) 16 | 17 | 18 | def test_onehot_vec_wrapped_env(): 19 | env = SyncVectorEnv( 20 | [lambda: OnehotTime(DummyEnv(), EPISODE_LEN) for _ in range(NUM_ENVS)] 21 | ) 22 | assert env.observation_space["time"].shape == ( 23 | NUM_ENVS, 24 | EPISODE_LEN + OnehotTime.PADDING, 25 | ) 26 | obs, info = env.reset() 27 | assert obs["time"].shape == (NUM_ENVS, EPISODE_LEN + OnehotTime.PADDING) 28 | for _ in range(5): 29 | obs, *_, info = env.step(env.action_space.sample()) 30 | assert obs["time"].shape == (NUM_ENVS, EPISODE_LEN + OnehotTime.PADDING) 31 | assert len(info["final_observation"]) == NUM_ENVS 32 | assert info["final_observation"][0]["time"].shape == ( 33 | EPISODE_LEN + OnehotTime.PADDING, 34 | ) 35 | 36 | 37 | def test_onehot_wrapped_vec_env(): 38 | env = OnehotTime( 39 | SyncVectorEnv([lambda: DummyEnv() for _ in range(NUM_ENVS)]), EPISODE_LEN 40 | ) 41 | assert env.observation_space["time"].shape == ( 42 | NUM_ENVS, 43 | EPISODE_LEN + OnehotTime.PADDING, 44 | ) 45 | obs, info = env.reset() 46 | assert obs["time"].shape == (NUM_ENVS, EPISODE_LEN + OnehotTime.PADDING) 47 | for _ in range(5): 48 | obs, *_, info = env.step(env.action_space.sample()) 49 | assert obs["time"].shape == (NUM_ENVS, EPISODE_LEN + OnehotTime.PADDING) 50 | assert len(info["final_observation"]) == NUM_ENVS 51 | assert info["final_observation"][0]["time"].shape == ( 52 | EPISODE_LEN + OnehotTime.PADDING, 53 | ) 54 | -------------------------------------------------------------------------------- /tests/unit/wrappers/test_rescale_from_tanh.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gymnasium.vector import SyncVectorEnv 3 | from tests.unit.wrappers.utils import DummyEnv, DummyEEEnv, ACTION_SHAPE 4 | from robobase.envs.wrappers import ( 5 | RescaleFromTanh, 6 | RescaleFromTanhEEPose, 7 | RescaleFromTanhWithStandardization, 8 | RescaleFromTanhWithMinMax, 9 | ) 10 | 11 | NUM_ENVS = 2 12 | 13 | 14 | def _assert_rescale_from_tanh_to_2(env): 15 | assert np.all(env.action_space.low == -1) 16 | assert np.all(env.action_space.high == 1) 17 | rescaled_action = env.action(np.ones_like(env.action_space.sample())) 18 | assert np.all(rescaled_action == 2) 19 | 20 | 21 | def _assert_rescale_from_tanh_ee_unit_quarternion(env): 22 | assert np.all(env.action_space.low == -1) 23 | assert np.all(env.action_space.high == 1) 24 | rescaled_action = env.action(env.action_space.sample()) 25 | assert np.allclose(np.linalg.norm(rescaled_action[3:7]), 1) 26 | 27 | 28 | def _assert_rescale_from_tanh_ee_unit_quarternion_vec_env(env): 29 | assert np.all(env.action_space.low == -1) 30 | assert np.all(env.action_space.high == 1) 31 | rescaled_action = env.action(env.action_space.sample()) 32 | assert np.allclose( 33 | np.linalg.norm(rescaled_action[:, 3:7], axis=1), 34 | np.ones(rescaled_action.shape[0]), 35 | ) 36 | 37 | 38 | def _assert_rescale_from_tanh_with_standardization(env): 39 | assert np.all(env.action_space.low == -1) 40 | assert np.all(env.action_space.high == 1) 41 | rescaled_action = env.action(np.ones_like(env.action_space.sample())) 42 | assert np.allclose(rescaled_action, 3 * 0.1 + 0.5) 43 | rescaled_action = env.action(-np.ones_like(env.action_space.sample())) 44 | assert np.allclose(rescaled_action, -3 * 0.1 + 0.5) 45 | 46 | 47 | def _assert_rescale_from_tanh_with_minmax(env): 48 | assert np.all(env.action_space.low == -1) 49 | assert np.all(env.action_space.high == 1) 50 | rescaled_action = env.action(np.ones_like(env.action_space.sample())) 51 | assert np.allclose(rescaled_action, 1 * 1.2 - 0.6 * 1.2) 52 | rescaled_action = env.action(-np.ones_like(env.action_space.sample())) 53 | assert np.allclose(rescaled_action, 0 * 1.2 - 0.6 * 1.2) 54 | 55 | 56 | def test_rescale_single_env(): 57 | env = RescaleFromTanh(DummyEnv()) 58 | _assert_rescale_from_tanh_to_2(env) 59 | 60 | 61 | def test_rescale_ee_single_env(): 62 | env = RescaleFromTanhEEPose(DummyEEEnv()) 63 | _assert_rescale_from_tanh_ee_unit_quarternion(env) 64 | 65 | 66 | def test_rescale_with_standardization_single_env(): 67 | action_stats = { 68 | "mean": np.ones(ACTION_SHAPE) * 0.5, 69 | "std": np.ones(ACTION_SHAPE) * 0.1, 70 | } 71 | env = RescaleFromTanhWithStandardization(DummyEnv(), action_stats) 72 | _assert_rescale_from_tanh_with_standardization(env) 73 | 74 | 75 | def test_rescale_with_minmax_single_env(): 76 | action_stats = { 77 | "min": -np.ones(ACTION_SHAPE) * 0.6, 78 | "max": np.ones(ACTION_SHAPE) * 0.4, 79 | } 80 | env = RescaleFromTanhWithMinMax(DummyEnv(), action_stats, min_max_margin=0.2) 81 | _assert_rescale_from_tanh_with_minmax(env) 82 | 83 | 84 | def test_rescale_wrapped_vec_env(): 85 | env = RescaleFromTanh(SyncVectorEnv([lambda: DummyEnv() for _ in range(NUM_ENVS)])) 86 | _assert_rescale_from_tanh_to_2(env) 87 | 88 | 89 | def test_rescale_ee_wrapped_vec_env(): 90 | env = RescaleFromTanhEEPose( 91 | SyncVectorEnv([lambda: DummyEEEnv() for _ in range(NUM_ENVS)]) 92 | ) 93 | _assert_rescale_from_tanh_ee_unit_quarternion_vec_env(env) 94 | 95 | 96 | def test_rescale_with_standardization_wrapped_vec_env(): 97 | action_stats = { 98 | "mean": np.ones(ACTION_SHAPE) * 0.5, 99 | "std": np.ones(ACTION_SHAPE) * 0.1, 100 | } 101 | env = RescaleFromTanhWithStandardization( 102 | SyncVectorEnv([lambda: DummyEnv() for _ in range(NUM_ENVS)]), action_stats 103 | ) 104 | _assert_rescale_from_tanh_with_standardization(env) 105 | 106 | 107 | def test_rescale_with_minmax_wrapped_vec_env(): 108 | action_stats = { 109 | "min": -np.ones(ACTION_SHAPE) * 0.6, 110 | "max": np.ones(ACTION_SHAPE) * 0.4, 111 | } 112 | env = RescaleFromTanhWithMinMax( 113 | SyncVectorEnv([lambda: DummyEnv() for _ in range(NUM_ENVS)]), 114 | action_stats, 115 | min_max_margin=0.2, 116 | ) 117 | _assert_rescale_from_tanh_with_minmax(env) 118 | -------------------------------------------------------------------------------- /tests/unit/wrappers/test_reward_modifiers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tests.unit.wrappers.utils import DummyRewardEnv 3 | from robobase.envs.wrappers import ClipReward, ScaleReward 4 | 5 | 6 | def test_clip_env(): 7 | env = ClipReward(DummyRewardEnv(), -1, 1) 8 | env.reset() 9 | _, reward, _, _, _ = env.step(env.action_space.sample()) 10 | assert -1 <= reward <= 1 11 | 12 | 13 | def test_scale_env(): 14 | default_reward = np.random.random() 15 | scale = np.random.random() 16 | env = ScaleReward(DummyRewardEnv(default_reward=default_reward), scale=scale) 17 | env.reset() 18 | _, reward, _, _, _ = env.step(env.action_space.sample()) 19 | assert np.isclose(reward, default_reward * scale) 20 | -------------------------------------------------------------------------------- /tests/unit/wrappers/utils.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import numpy as np 3 | from gymnasium import spaces 4 | 5 | OBS_NAME_FLAT1 = "obs0" 6 | OBS_NAME_FLAT2 = "obs1" 7 | OBS_NAME_IMG1 = "obs2" 8 | OBS_NAME_IMG2 = "obs3" 9 | OBS_SIZE = 100 10 | IMG_SHAPE = (3, 8, 8) 11 | ACTION_SHAPE = (2,) 12 | EE_ACTION_SHAPE = (8,) 13 | 14 | 15 | class DummyEnv(gym.Env): 16 | def __init__(self, episode_len: int = 5): 17 | super().__init__() 18 | self.observation_space = spaces.Dict( 19 | { 20 | OBS_NAME_FLAT1: spaces.Box(-1, episode_len, (OBS_SIZE,)), 21 | OBS_NAME_FLAT2: spaces.Box(-1, episode_len, (OBS_SIZE,)), 22 | OBS_NAME_IMG1: spaces.Box(-1, episode_len, IMG_SHAPE), 23 | OBS_NAME_IMG2: spaces.Box(-1, episode_len, IMG_SHAPE), 24 | } 25 | ) 26 | self.action_space = spaces.Box(-2, 2, ACTION_SHAPE) 27 | self._steps = 0 28 | self._episode_len = episode_len 29 | 30 | def step(self, action): 31 | self._steps += 1 32 | flat_obs = self._steps + np.zeros(shape=(OBS_SIZE,)) 33 | img_obs = self._steps + np.zeros(shape=IMG_SHAPE, dtype=np.uint8) 34 | return ( 35 | { 36 | OBS_NAME_FLAT1: flat_obs, 37 | OBS_NAME_FLAT2: flat_obs, 38 | OBS_NAME_IMG1: img_obs, 39 | OBS_NAME_IMG2: img_obs, 40 | }, 41 | 0 if self._steps < self._episode_len else 100, 42 | self._steps >= self._episode_len, 43 | False, 44 | {}, 45 | ) 46 | 47 | def reset(self, *args, **kwargs): 48 | self._steps = 0 49 | flat_obs = np.zeros(shape=(OBS_SIZE,)) 50 | img_obs = np.zeros(shape=IMG_SHAPE, dtype=np.uint8) 51 | return { 52 | OBS_NAME_FLAT1: flat_obs, 53 | OBS_NAME_FLAT2: flat_obs, 54 | OBS_NAME_IMG1: img_obs, 55 | OBS_NAME_IMG2: img_obs, 56 | }, {} 57 | 58 | 59 | class DummyEEEnv(gym.Env): 60 | def __init__(self, episode_len: int = 5): 61 | super().__init__() 62 | self.observation_space = spaces.Dict( 63 | { 64 | OBS_NAME_FLAT1: spaces.Box(-1, 1, (OBS_SIZE,)), 65 | OBS_NAME_FLAT2: spaces.Box(-1, 1, (OBS_SIZE,)), 66 | OBS_NAME_IMG1: spaces.Box(-1, 1, IMG_SHAPE), 67 | OBS_NAME_IMG2: spaces.Box(-1, 1, IMG_SHAPE), 68 | } 69 | ) 70 | act_min = np.array([-0.1, -0.5, 0.8] + 4 * [0.0] + [0.0], dtype=np.float32) 71 | act_max = np.array([0.7, 0.5, 1.7] + 4 * [1.0] + [1.0], dtype=np.float32) 72 | self.action_space = spaces.Box(act_min, act_max, EE_ACTION_SHAPE) 73 | self._steps = 0 74 | self._episode_len = episode_len 75 | 76 | 77 | class DummyRewardEnv(DummyEnv): 78 | def __init__(self, episode_len: int = 5, default_reward: float | None = None): 79 | super().__init__(episode_len) 80 | self._default_reward = default_reward 81 | 82 | def step(self, action): 83 | obs, _, terminated, truncated, info = super().step(action) 84 | reward = np.random.randint(-5, 5) 85 | if self._default_reward: 86 | reward = self._default_reward 87 | return obs, reward, terminated, truncated, info 88 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import hydra 4 | 5 | 6 | @hydra.main( 7 | config_path="robobase/cfgs", config_name="robobase_config", version_base=None 8 | ) 9 | def main(cfg): 10 | from robobase.workspace import Workspace 11 | 12 | root_dir = Path.cwd() 13 | 14 | workspace = Workspace(cfg) 15 | 16 | snapshot = root_dir / "snapshot.pt" 17 | if snapshot.exists(): 18 | print(f"resuming: {snapshot}") 19 | workspace.load_snapshot() 20 | workspace.train() 21 | 22 | 23 | if __name__ == "__main__": 24 | main() 25 | --------------------------------------------------------------------------------