├── .gitignore ├── README.md ├── bert_config ├── bert-base-uncased │ ├── config.json │ └── vocab.txt └── xlm-roberta-base │ ├── config.json │ ├── sentencepiece.bpe.model │ └── tokenizer.json ├── diffusion_policy ├── LICENSE ├── README.md ├── conda_environment.yaml ├── conda_environment_macos.yaml ├── conda_environment_real.yaml ├── demo_pusht.py ├── demo_real_robot.py ├── diffusion_policy.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ └── top_level.txt ├── diffusion_policy │ ├── codecs │ │ └── imagecodecs_numcodecs.py │ ├── common │ │ ├── checkpoint_util.py │ │ ├── cv2_util.py │ │ ├── env_util.py │ │ ├── json_logger.py │ │ ├── nested_dict_util.py │ │ ├── normalize_util.py │ │ ├── pose_trajectory_interpolator.py │ │ ├── precise_sleep.py │ │ ├── pymunk_override.py │ │ ├── pymunk_util.py │ │ ├── pytorch_util.py │ │ ├── replay_buffer.py │ │ ├── robomimic_config_util.py │ │ ├── robomimic_util.py │ │ ├── sampler.py │ │ └── timestamp_accumulator.py │ ├── config │ │ ├── task │ │ │ ├── blockpush_lowdim_seed.yaml │ │ │ ├── blockpush_lowdim_seed_abs.yaml │ │ │ ├── can_image.yaml │ │ │ ├── can_image_abs.yaml │ │ │ ├── can_lowdim.yaml │ │ │ ├── can_lowdim_abs.yaml │ │ │ ├── kitchen_lowdim.yaml │ │ │ ├── kitchen_lowdim_abs.yaml │ │ │ ├── lift_image.yaml │ │ │ ├── lift_image_abs.yaml │ │ │ ├── lift_lowdim.yaml │ │ │ ├── lift_lowdim_abs.yaml │ │ │ ├── pusht_image.yaml │ │ │ ├── pusht_lowdim.yaml │ │ │ ├── real_pusht_image.yaml │ │ │ ├── square_image.yaml │ │ │ ├── square_image_abs.yaml │ │ │ ├── square_lowdim.yaml │ │ │ ├── square_lowdim_abs.yaml │ │ │ ├── tool_hang_image.yaml │ │ │ ├── tool_hang_image_abs.yaml │ │ │ ├── tool_hang_lowdim.yaml │ │ │ ├── tool_hang_lowdim_abs.yaml │ │ │ ├── transport_image.yaml │ │ │ ├── transport_image_abs.yaml │ │ │ ├── transport_lowdim.yaml │ │ │ └── transport_lowdim_abs.yaml │ │ ├── train_bet_lowdim_workspace.yaml │ │ ├── train_diffusion_transformer_hybrid_workspace.yaml │ │ ├── train_diffusion_transformer_lowdim_kitchen_workspace.yaml │ │ ├── train_diffusion_transformer_lowdim_pusht_workspace.yaml │ │ ├── train_diffusion_transformer_lowdim_workspace.yaml │ │ ├── train_diffusion_transformer_real_hybrid_workspace.yaml │ │ ├── train_diffusion_unet_ddim_hybrid_workspace.yaml │ │ ├── train_diffusion_unet_ddim_lowdim_workspace.yaml │ │ ├── train_diffusion_unet_hybrid_workspace.yaml │ │ ├── train_diffusion_unet_image_pretrained_workspace.yaml │ │ ├── train_diffusion_unet_image_workspace.yaml │ │ ├── train_diffusion_unet_lowdim_workspace.yaml │ │ ├── train_diffusion_unet_real_hybrid_workspace.yaml │ │ ├── train_diffusion_unet_real_image_workspace.yaml │ │ ├── train_diffusion_unet_real_pretrained_workspace.yaml │ │ ├── train_diffusion_unet_video_workspace.yaml │ │ ├── train_ibc_dfo_hybrid_workspace.yaml │ │ ├── train_ibc_dfo_lowdim_workspace.yaml │ │ ├── train_ibc_dfo_real_hybrid_workspace.yaml │ │ ├── train_robomimic_image_workspace.yaml │ │ ├── train_robomimic_lowdim_workspace.yaml │ │ └── train_robomimic_real_image_workspace.yaml │ ├── dataset │ │ ├── base_dataset.py │ │ ├── blockpush_lowdim_dataset.py │ │ ├── kitchen_lowdim_dataset.py │ │ ├── kitchen_mjl_lowdim_dataset.py │ │ ├── pusht_dataset.py │ │ ├── pusht_image_dataset.py │ │ ├── real_pusht_image_dataset.py │ │ ├── robomimic_replay_image_dataset.py │ │ └── robomimic_replay_lowdim_dataset.py │ ├── env │ │ ├── block_pushing │ │ │ ├── assets │ │ │ │ ├── block.urdf │ │ │ │ ├── block2.urdf │ │ │ │ ├── blocks │ │ │ │ │ ├── blue_cube.urdf │ │ │ │ │ ├── cube.obj │ │ │ │ │ ├── green_star.urdf │ │ │ │ │ ├── moon.obj │ │ │ │ │ ├── pentagon.obj │ │ │ │ │ ├── red_moon.urdf │ │ │ │ │ ├── star.obj │ │ │ │ │ └── yellow_pentagon.urdf │ │ │ │ ├── insert.urdf │ │ │ │ ├── plane.obj │ │ │ │ ├── suction │ │ │ │ │ ├── base.obj │ │ │ │ │ ├── cylinder.urdf │ │ │ │ │ ├── cylinder_real.urdf │ │ │ │ │ ├── head.obj │ │ │ │ │ ├── mid.obj │ │ │ │ │ ├── suction-base.urdf │ │ │ │ │ ├── suction-head-long.urdf │ │ │ │ │ ├── suction-head.urdf │ │ │ │ │ └── tip.obj │ │ │ │ ├── workspace.urdf │ │ │ │ ├── workspace_real.urdf │ │ │ │ ├── zone.obj │ │ │ │ ├── zone.urdf │ │ │ │ └── zone2.urdf │ │ │ ├── block_pushing.py │ │ │ ├── block_pushing_discontinuous.py │ │ │ ├── block_pushing_multimodal.py │ │ │ ├── oracles │ │ │ │ ├── discontinuous_push_oracle.py │ │ │ │ ├── multimodal_push_oracle.py │ │ │ │ ├── oriented_push_oracle.py │ │ │ │ ├── pushing_info.py │ │ │ │ └── reach_oracle.py │ │ │ └── utils │ │ │ │ ├── pose3d.py │ │ │ │ ├── utils_pybullet.py │ │ │ │ └── xarm_sim_robot.py │ │ ├── kitchen │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── kitchen_lowdim_wrapper.py │ │ │ ├── kitchen_util.py │ │ │ ├── relay_policy_learning │ │ │ │ ├── adept_envs │ │ │ │ │ ├── .pylintrc │ │ │ │ │ ├── .style.yapf │ │ │ │ │ └── adept_envs │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── base_robot.py │ │ │ │ │ │ ├── franka │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── assets │ │ │ │ │ │ │ └── franka_kitchen_jntpos_act_ab.xml │ │ │ │ │ │ ├── kitchen_multitask_v0.py │ │ │ │ │ │ └── robot │ │ │ │ │ │ │ ├── franka_config.xml │ │ │ │ │ │ │ └── franka_robot.py │ │ │ │ │ │ ├── mujoco_env.py │ │ │ │ │ │ ├── robot_env.py │ │ │ │ │ │ ├── simulation │ │ │ │ │ │ ├── module.py │ │ │ │ │ │ ├── renderer.py │ │ │ │ │ │ └── sim_robot.py │ │ │ │ │ │ └── utils │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── configurable.py │ │ │ │ │ │ ├── constants.py │ │ │ │ │ │ ├── parse_demos.py │ │ │ │ │ │ └── quatmath.py │ │ │ │ ├── adept_models │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── CONTRIBUTING.public.md │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── README.public.md │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── kitchen │ │ │ │ │ │ ├── assets │ │ │ │ │ │ │ ├── backwall_asset.xml │ │ │ │ │ │ │ ├── backwall_chain.xml │ │ │ │ │ │ │ ├── counters_asset.xml │ │ │ │ │ │ │ ├── counters_chain.xml │ │ │ │ │ │ │ ├── hingecabinet_asset.xml │ │ │ │ │ │ │ ├── hingecabinet_chain.xml │ │ │ │ │ │ │ ├── kettle_asset.xml │ │ │ │ │ │ │ ├── kettle_chain.xml │ │ │ │ │ │ │ ├── microwave_asset.xml │ │ │ │ │ │ │ ├── microwave_chain.xml │ │ │ │ │ │ │ ├── oven_asset.xml │ │ │ │ │ │ │ ├── oven_chain.xml │ │ │ │ │ │ │ ├── slidecabinet_asset.xml │ │ │ │ │ │ │ └── slidecabinet_chain.xml │ │ │ │ │ │ ├── counters.xml │ │ │ │ │ │ ├── hingecabinet.xml │ │ │ │ │ │ ├── kettle.xml │ │ │ │ │ │ ├── kitchen.xml │ │ │ │ │ │ ├── meshes │ │ │ │ │ │ │ ├── burnerplate.stl │ │ │ │ │ │ │ ├── burnerplate_mesh.stl │ │ │ │ │ │ │ ├── cabinetbase.stl │ │ │ │ │ │ │ ├── cabinetdrawer.stl │ │ │ │ │ │ │ ├── cabinethandle.stl │ │ │ │ │ │ │ ├── countertop.stl │ │ │ │ │ │ │ ├── faucet.stl │ │ │ │ │ │ │ ├── handle2.stl │ │ │ │ │ │ │ ├── hingecabinet.stl │ │ │ │ │ │ │ ├── hingedoor.stl │ │ │ │ │ │ │ ├── hingehandle.stl │ │ │ │ │ │ │ ├── hood.stl │ │ │ │ │ │ │ ├── kettle.stl │ │ │ │ │ │ │ ├── kettlehandle.stl │ │ │ │ │ │ │ ├── knob.stl │ │ │ │ │ │ │ ├── lightswitch.stl │ │ │ │ │ │ │ ├── lightswitchbase.stl │ │ │ │ │ │ │ ├── micro.stl │ │ │ │ │ │ │ ├── microbutton.stl │ │ │ │ │ │ │ ├── microdoor.stl │ │ │ │ │ │ │ ├── microefeet.stl │ │ │ │ │ │ │ ├── microfeet.stl │ │ │ │ │ │ │ ├── microhandle.stl │ │ │ │ │ │ │ ├── microwindow.stl │ │ │ │ │ │ │ ├── oven.stl │ │ │ │ │ │ │ ├── ovenhandle.stl │ │ │ │ │ │ │ ├── oventop.stl │ │ │ │ │ │ │ ├── ovenwindow.stl │ │ │ │ │ │ │ ├── slidecabinet.stl │ │ │ │ │ │ │ ├── slidedoor.stl │ │ │ │ │ │ │ ├── stoverim.stl │ │ │ │ │ │ │ ├── tile.stl │ │ │ │ │ │ │ └── wall.stl │ │ │ │ │ │ ├── microwave.xml │ │ │ │ │ │ ├── oven.xml │ │ │ │ │ │ └── slidecabinet.xml │ │ │ │ │ └── scenes │ │ │ │ │ │ └── basic_scene.xml │ │ │ │ └── third_party │ │ │ │ │ └── franka │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── README.md │ │ │ │ │ ├── assets │ │ │ │ │ ├── actuator0.xml │ │ │ │ │ ├── actuator1.xml │ │ │ │ │ ├── assets.xml │ │ │ │ │ ├── basic_scene.xml │ │ │ │ │ ├── chain0.xml │ │ │ │ │ ├── chain0_overlay.xml │ │ │ │ │ ├── chain1.xml │ │ │ │ │ └── teleop_actuator.xml │ │ │ │ │ ├── bi-franka_panda.xml │ │ │ │ │ ├── franka_panda.xml │ │ │ │ │ ├── franka_panda_teleop.xml │ │ │ │ │ └── meshes │ │ │ │ │ ├── collision │ │ │ │ │ ├── finger.stl │ │ │ │ │ ├── hand.stl │ │ │ │ │ ├── link0.stl │ │ │ │ │ ├── link1.stl │ │ │ │ │ ├── link2.stl │ │ │ │ │ ├── link3.stl │ │ │ │ │ ├── link4.stl │ │ │ │ │ ├── link5.stl │ │ │ │ │ ├── link6.stl │ │ │ │ │ └── link7.stl │ │ │ │ │ └── visual │ │ │ │ │ ├── finger.stl │ │ │ │ │ ├── hand.stl │ │ │ │ │ ├── link0.stl │ │ │ │ │ ├── link1.stl │ │ │ │ │ ├── link2.stl │ │ │ │ │ ├── link3.stl │ │ │ │ │ ├── link4.stl │ │ │ │ │ ├── link5.stl │ │ │ │ │ ├── link6.stl │ │ │ │ │ └── link7.stl │ │ │ └── v0.py │ │ ├── pusht │ │ │ ├── __init__.py │ │ │ ├── pusht_env.py │ │ │ ├── pusht_image_env.py │ │ │ ├── pusht_keypoints_env.py │ │ │ ├── pymunk_keypoint_manager.py │ │ │ └── pymunk_override.py │ │ └── robomimic │ │ │ ├── robomimic_image_wrapper.py │ │ │ └── robomimic_lowdim_wrapper.py │ ├── env_runner │ │ ├── base_image_runner.py │ │ ├── base_lowdim_runner.py │ │ ├── blockpush_lowdim_runner.py │ │ ├── kitchen_lowdim_runner.py │ │ ├── pusht_image_runner.py │ │ ├── pusht_keypoints_runner.py │ │ ├── real_pusht_image_runner.py │ │ ├── robomimic_image_runner.py │ │ └── robomimic_lowdim_runner.py │ ├── gym_util │ │ ├── async_vector_env.py │ │ ├── multistep_wrapper.py │ │ ├── sync_vector_env.py │ │ ├── video_recording_wrapper.py │ │ └── video_wrapper.py │ ├── model │ │ ├── bet │ │ │ ├── action_ae │ │ │ │ ├── __init__.py │ │ │ │ └── discretizers │ │ │ │ │ └── k_means.py │ │ │ ├── latent_generators │ │ │ │ ├── latent_generator.py │ │ │ │ ├── mingpt.py │ │ │ │ └── transformer.py │ │ │ ├── libraries │ │ │ │ ├── loss_fn.py │ │ │ │ └── mingpt │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── model.py │ │ │ │ │ ├── trainer.py │ │ │ │ │ └── utils.py │ │ │ └── utils.py │ │ ├── common │ │ │ ├── dict_of_tensor_mixin.py │ │ │ ├── lr_scheduler.py │ │ │ ├── module_attr_mixin.py │ │ │ ├── normalizer.py │ │ │ ├── rotation_transformer.py │ │ │ ├── shape_util.py │ │ │ └── tensor_util.py │ │ ├── diffusion │ │ │ ├── conditional_unet1d-bak.py │ │ │ ├── conditional_unet1d.py │ │ │ ├── conditional_unet1d_for123.py │ │ │ ├── conv1d_components.py │ │ │ ├── ema_model.py │ │ │ ├── mask_generator.py │ │ │ ├── positional_embedding.py │ │ │ └── transformer_for_diffusion.py │ │ └── vision │ │ │ ├── crop_randomizer.py │ │ │ ├── model_getter.py │ │ │ └── multi_image_obs_encoder.py │ ├── policy │ │ ├── base_image_policy.py │ │ ├── base_lowdim_policy.py │ │ ├── bet_lowdim_policy.py │ │ ├── diffusion_transformer_hybrid_image_policy.py │ │ ├── diffusion_transformer_lowdim_policy.py │ │ ├── diffusion_unet_hybrid_image_policy.py │ │ ├── diffusion_unet_image_policy.py │ │ ├── diffusion_unet_lowdim_policy.py │ │ ├── diffusion_unet_video_policy.py │ │ ├── ibc_dfo_hybrid_image_policy.py │ │ ├── ibc_dfo_lowdim_policy.py │ │ ├── robomimic_image_policy.py │ │ └── robomimic_lowdim_policy.py │ ├── real_world │ │ ├── keystroke_counter.py │ │ ├── multi_camera_visualizer.py │ │ ├── multi_realsense.py │ │ ├── real_data_conversion.py │ │ ├── real_env.py │ │ ├── real_inference_util.py │ │ ├── realsense_config │ │ │ ├── 415_high_accuracy_mode.json │ │ │ └── 435_high_accuracy_mode.json │ │ ├── rtde_interpolation_controller.py │ │ ├── single_realsense.py │ │ ├── spacemouse.py │ │ ├── spacemouse_shared_memory.py │ │ └── video_recorder.py │ ├── scripts │ │ ├── bet_blockpush_conversion.py │ │ ├── blockpush_abs_conversion.py │ │ ├── episode_lengths.py │ │ ├── generate_bet_blockpush.py │ │ ├── real_dataset_conversion.py │ │ ├── real_pusht_metrics.py │ │ ├── real_pusht_successrate.py │ │ ├── robomimic_dataset_action_comparison.py │ │ └── robomimic_dataset_conversion.py │ ├── shared_memory │ │ ├── shared_memory_queue.py │ │ ├── shared_memory_ring_buffer.py │ │ ├── shared_memory_util.py │ │ └── shared_ndarray.py │ └── workspace │ │ ├── base_workspace.py │ │ ├── train_bet_lowdim_workspace.py │ │ ├── train_diffusion_transformer_hybrid_workspace.py │ │ ├── train_diffusion_transformer_lowdim_workspace.py │ │ ├── train_diffusion_unet_hybrid_workspace.py │ │ ├── train_diffusion_unet_image_workspace.py │ │ ├── train_diffusion_unet_lowdim_workspace.py │ │ ├── train_diffusion_unet_video_workspace.py │ │ ├── train_ibc_dfo_hybrid_workspace.py │ │ ├── train_ibc_dfo_lowdim_workspace.py │ │ ├── train_robomimic_image_workspace.py │ │ └── train_robomimic_lowdim_workspace.py ├── eval.py ├── eval_real_robot.py ├── multirun_metrics.py ├── pyrightconfig.json ├── ray_exec.py ├── ray_train_multirun.py ├── setup.py ├── tests │ ├── test_block_pushing.py │ ├── test_cv2_util.py │ ├── test_multi_realsense.py │ ├── test_pose_trajectory_interpolator.py │ ├── test_precise_sleep.py │ ├── test_replay_buffer.py │ ├── test_ring_buffer.py │ ├── test_robomimic_image_runner.py │ ├── test_robomimic_lowdim_runner.py │ ├── test_shared_queue.py │ ├── test_single_realsense.py │ └── test_timestamp_accumulator.py └── train.py ├── habitat_extensions ├── __init__.py ├── config │ ├── __init__.py │ ├── default.py │ ├── r2r_vlnce.yaml │ ├── rxr_vlnce_en.yaml │ ├── rxr_vlnce_hi.yaml │ └── rxr_vlnce_te.yaml ├── habitat_simulator.py ├── maps.py ├── measures.py ├── nav.py ├── obs_transformers.py ├── sensors.py ├── shortest_path_follower.py ├── task.py └── utils.py ├── run.py ├── run_r2r ├── iter_train.yaml ├── main.bash └── r2r_vlnce.yaml └── vlnce_baselines ├── __init__.py ├── common ├── aux_losses.py ├── base_il_trainer.py ├── env_utils.py ├── environments_dp.py ├── ops.py ├── recollection_dataset.py ├── transformer.py └── utils.py ├── config ├── __init__.py ├── default.py ├── nonlearning.yaml └── r2r_configs │ ├── cma.yaml │ ├── cma_aug.yaml │ ├── cma_aug_tune.yaml │ ├── cma_da.yaml │ ├── cma_da_aug_tune.yaml │ ├── cma_pm.yaml │ ├── cma_pm_aug.yaml │ ├── cma_pm_aug_tune.yaml │ ├── cma_pm_da.yaml │ ├── cma_pm_da_aug_tune.yaml │ ├── cma_sf.yaml │ ├── cma_ss.yaml │ ├── seq2seq.yaml │ ├── seq2seq_aug.yaml │ ├── seq2seq_aug_tune.yaml │ ├── seq2seq_da.yaml │ ├── seq2seq_pm.yaml │ ├── seq2seq_pm_aug.yaml │ ├── seq2seq_pm_da_aug_tune.yaml │ └── test_set_inference.yaml ├── dagger_trainer.py ├── models ├── Policy_ViewSelection_ETP_DP.py ├── __init__.py ├── dp │ ├── __init__.py │ ├── config │ │ ├── data_config.yaml │ │ ├── defaults.yaml │ │ └── nomad.yaml │ ├── dp_init.py │ ├── model │ │ ├── __init__.py │ │ ├── ops.py │ │ ├── pretrain_cmt.py │ │ ├── transformer.py │ │ └── vilmodel.py │ ├── nomad.py │ ├── run_pt │ │ ├── r2r_model_config_dep.json │ │ └── r2r_pretrain_habitat.json │ ├── utils.py │ ├── utils │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── logger.py │ │ ├── misc.py │ │ ├── save.py │ │ ├── utils.py │ │ └── visualize_utils.py │ └── visual_policy.py ├── encoders │ ├── instruction_encoder.py │ └── resnet_encoders.py ├── etp │ ├── vilmodel_cmt.py │ └── vlnbert_init.py ├── graph_utils_dp.py ├── policy.py ├── utils.py └── vlnbert │ ├── vlnbert_PREVALENT.py │ └── vlnbert_init.py ├── ss_trainer_ETP_DP.py ├── utils.py └── waypoint_pred ├── TRM_net.py ├── transformer ├── pytorch_transformer │ ├── file_utils.py │ ├── modeling_bert.py │ └── modeling_utils.py └── waypoint_bert.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .ftpignore 2 | .ftpconfig 3 | .vscode 4 | 5 | # Byte-compiled / optimized / DLL files 6 | .ipynb_checkpoints/ 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | *.swp 11 | *.png 12 | *.zip 13 | 14 | /data 15 | /logs 16 | /pretrained 17 | /interact 18 | /pretrain_src/datasets 19 | /pretrain_src/img_features -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### DAgger Diffusion Navigation: DAgger Boosted Diffusion Policy for Vision-Language Navigation 2 | 3 | ## TODOs 4 | 5 | * [x] Release the evaluation code of the DifNav Model. 6 | * [x] Release the checkpoints of the DifNav Model for each scene. 7 | * [ ] Release the online data augmentation code. 8 | * [ ] Release the training data and code. 9 | 10 | ## Requirements 11 | 12 | 1. Install `Habitat simulator`: follow instructions from [ETPNav](https://github.com/MarSaKi/ETPNav) and [VLN-CE](https://github.com/jacobkrantz/VLN-CE). 13 | 2. Download the `Matterport3D Scene Dataset (MP3D)` from [Matterport](https://github.com/niessner/Matterport) 14 | 3. Install the dependencies of the [ETPNav](https://github.com/MarSaKi/ETPNav) and [Nomad](https://github.com/robodhruv/visualnav-transformer). 15 | 4. Download annotations and trained models from [Google_Drive](https://drive.google.com/drive/u/1/folders/1BcEmhBIjMo7aDo1ORbB8sjfOpjpmUrso). 16 | 5. The data should be stored under the `data` folder with the following structure: 17 | ``` 18 | data- 19 | ├── scene_datasets 20 | │ └── mp3d 21 | ├── datasets 22 | │ └── R2R_VLNCE_v1-2_preprocessed_BERTidx 23 | ├── checkpoints 24 | │ └── open_area.pth 25 | └── ddppo-models 26 | ``` 27 | 28 | ## Evaluation 29 | 30 | Evaluate each scene in our experiment(Open Area,Narrow Space,Stairs): 31 | ``` 32 | bash run_r2r/main.bash open_area eval 2333 33 | ``` 34 | 35 | ## Acknowledgement 36 | - [ETPNav](https://github.com/MarSaKi/ETPNav): ETPNav implementation. 37 | - [Nomad](https://github.com/robodhruv/visualnav-transformer): NoMaD implementation. 38 | -------------------------------------------------------------------------------- /bert_config/bert-base-uncased/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForMaskedLM" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 3072, 12 | "layer_norm_eps": 1e-12, 13 | "max_position_embeddings": 512, 14 | "model_type": "bert", 15 | "num_attention_heads": 12, 16 | "num_hidden_layers": 12, 17 | "pad_token_id": 0, 18 | "position_embedding_type": "absolute", 19 | "transformers_version": "4.6.0.dev0", 20 | "type_vocab_size": 2, 21 | "use_cache": true, 22 | "vocab_size": 30522 23 | } 24 | -------------------------------------------------------------------------------- /bert_config/xlm-roberta-base/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "XLMRobertaForMaskedLM" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "bos_token_id": 0, 7 | "eos_token_id": 2, 8 | "hidden_act": "gelu", 9 | "hidden_dropout_prob": 0.1, 10 | "hidden_size": 768, 11 | "initializer_range": 0.02, 12 | "intermediate_size": 3072, 13 | "layer_norm_eps": 1e-05, 14 | "max_position_embeddings": 514, 15 | "model_type": "xlm-roberta", 16 | "num_attention_heads": 12, 17 | "num_hidden_layers": 12, 18 | "output_past": true, 19 | "pad_token_id": 1, 20 | "position_embedding_type": "absolute", 21 | "transformers_version": "4.17.0.dev0", 22 | "type_vocab_size": 1, 23 | "use_cache": true, 24 | "vocab_size": 250002 25 | } 26 | -------------------------------------------------------------------------------- /bert_config/xlm-roberta-base/sentencepiece.bpe.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/bert_config/xlm-roberta-base/sentencepiece.bpe.model -------------------------------------------------------------------------------- /diffusion_policy/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Columbia Artificial Intelligence and Robotics Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /diffusion_policy/conda_environment.yaml: -------------------------------------------------------------------------------- 1 | name: robodiff 2 | channels: 3 | - pytorch 4 | - pytorch3d 5 | - nvidia 6 | - conda-forge 7 | dependencies: 8 | - python=3.9 9 | - pip=22.2.2 10 | - cudatoolkit=11.6 11 | - pytorch=1.12.1 12 | - torchvision=0.13.1 13 | - pytorch3d=0.7.0 14 | - numpy=1.23.3 15 | - numba==0.56.4 16 | - scipy==1.9.1 17 | - py-opencv=4.6.0 18 | - cffi=1.15.1 19 | - ipykernel=6.16 20 | - matplotlib=3.6.1 21 | - zarr=2.12.0 22 | - numcodecs=0.10.2 23 | - h5py=3.7.0 24 | - hydra-core=1.2.0 25 | - einops=0.4.1 26 | - tqdm=4.64.1 27 | - dill=0.3.5.1 28 | - scikit-video=1.1.11 29 | - scikit-image=0.19.3 30 | - gym=0.21.0 31 | - pymunk=6.2.1 32 | - wandb=0.13.3 33 | - threadpoolctl=3.1.0 34 | - shapely=1.8.4 35 | - cython=0.29.32 36 | - imageio=2.22.0 37 | - imageio-ffmpeg=0.4.7 38 | - termcolor=2.0.1 39 | - tensorboard=2.10.1 40 | - tensorboardx=2.5.1 41 | - psutil=5.9.2 42 | - click=8.0.4 43 | - boto3=1.24.96 44 | - accelerate=0.13.2 45 | - datasets=2.6.1 46 | - diffusers=0.11.1 47 | - av=10.0.0 48 | - cmake=3.24.3 49 | # trick to avoid cpu affinity issue described in https://github.com/pytorch/pytorch/issues/99625 50 | - llvm-openmp=14 51 | # trick to force reinstall imagecodecs via pip 52 | - imagecodecs==2022.8.8 53 | - pip: 54 | - ray[default,tune]==2.2.0 55 | # requires mujoco py dependencies libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf 56 | - free-mujoco-py==2.1.6 57 | - pygame==2.1.2 58 | - pybullet-svl==3.1.6.4 59 | - robosuite @ https://github.com/cheng-chi/robosuite/archive/277ab9588ad7a4f4b55cf75508b44aa67ec171f0.tar.gz 60 | - robomimic==0.2.0 61 | - pytorchvideo==0.1.5 62 | # pip package required for jpeg-xl 63 | - imagecodecs==2022.9.26 64 | - r3m @ https://github.com/facebookresearch/r3m/archive/b2334e726887fa0206962d7984c69c5fb09cceab.tar.gz 65 | - dm-control==1.0.9 66 | -------------------------------------------------------------------------------- /diffusion_policy/conda_environment_macos.yaml: -------------------------------------------------------------------------------- 1 | name: robodiff 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | dependencies: 6 | - python=3.9 7 | - pip=22.2.2 8 | - pytorch=1.12.1 9 | - torchvision=0.13.1 10 | - numpy=1.23.3 11 | - numba==0.56.4 12 | - scipy==1.9.1 13 | - py-opencv=4.6.0 14 | - cffi=1.15.1 15 | - ipykernel=6.16 16 | - matplotlib=3.6.1 17 | - zarr=2.12.0 18 | - numcodecs=0.10.2 19 | - h5py=3.7.0 20 | - hydra-core=1.2.0 21 | - einops=0.4.1 22 | - tqdm=4.64.1 23 | - dill=0.3.5.1 24 | - scikit-video=1.1.11 25 | - scikit-image=0.19.3 26 | - gym=0.21.0 27 | - pymunk=6.2.1 28 | - wandb=0.13.3 29 | - threadpoolctl=3.1.0 30 | - shapely=1.8.4 31 | - cython=0.29.32 32 | - imageio=2.22.0 33 | - imageio-ffmpeg=0.4.7 34 | - termcolor=2.0.1 35 | - tensorboard=2.10.1 36 | - tensorboardx=2.5.1 37 | - psutil=5.9.2 38 | - click=8.0.4 39 | - boto3=1.24.96 40 | - accelerate=0.13.2 41 | - datasets=2.6.1 42 | - diffusers=0.11.1 43 | - av=10.0.0 44 | - cmake=3.24.3 45 | # trick to force reinstall imagecodecs via pip 46 | - imagecodecs==2022.8.8 47 | - pip: 48 | - ray[default,tune]==2.2.0 49 | - pygame==2.1.2 50 | - robomimic==0.2.0 51 | - pytorchvideo==0.1.5 52 | - atomics==1.0.2 53 | # No support for jpeg-xl for MacOS 54 | - imagecodecs==2022.9.26 55 | - r3m @ https://github.com/facebookresearch/r3m/archive/b2334e726887fa0206962d7984c69c5fb09cceab.tar.gz 56 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: diffusion-policy 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | License: UNKNOWN 7 | Platform: UNKNOWN 8 | License-File: LICENSE 9 | 10 | UNKNOWN 11 | 12 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | LICENSE 2 | README.md 3 | setup.py 4 | diffusion_policy.egg-info/PKG-INFO 5 | diffusion_policy.egg-info/SOURCES.txt 6 | diffusion_policy.egg-info/dependency_links.txt 7 | diffusion_policy.egg-info/top_level.txt -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/common/checkpoint_util.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Dict 2 | import os 3 | 4 | class TopKCheckpointManager: 5 | def __init__(self, 6 | save_dir, 7 | monitor_key: str, 8 | mode='min', 9 | k=1, 10 | format_str='epoch={epoch:03d}-train_loss={train_loss:.3f}.ckpt' 11 | ): 12 | assert mode in ['max', 'min'] 13 | assert k >= 0 14 | 15 | self.save_dir = save_dir 16 | self.monitor_key = monitor_key 17 | self.mode = mode 18 | self.k = k 19 | self.format_str = format_str 20 | self.path_value_map = dict() 21 | 22 | def get_ckpt_path(self, data: Dict[str, float]) -> Optional[str]: 23 | if self.k == 0: 24 | return None 25 | 26 | value = data[self.monitor_key] 27 | ckpt_path = os.path.join( 28 | self.save_dir, self.format_str.format(**data)) 29 | 30 | if len(self.path_value_map) < self.k: 31 | # under-capacity 32 | self.path_value_map[ckpt_path] = value 33 | return ckpt_path 34 | 35 | # at capacity 36 | sorted_map = sorted(self.path_value_map.items(), key=lambda x: x[1]) 37 | min_path, min_value = sorted_map[0] 38 | max_path, max_value = sorted_map[-1] 39 | 40 | delete_path = None 41 | if self.mode == 'max': 42 | if value > min_value: 43 | delete_path = min_path 44 | else: 45 | if value < max_value: 46 | delete_path = max_path 47 | 48 | if delete_path is None: 49 | return None 50 | else: 51 | del self.path_value_map[delete_path] 52 | self.path_value_map[ckpt_path] = value 53 | 54 | if not os.path.exists(self.save_dir): 55 | os.mkdir(self.save_dir) 56 | 57 | if os.path.exists(delete_path): 58 | os.remove(delete_path) 59 | return ckpt_path 60 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/common/env_util.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def render_env_video(env, states, actions=None): 6 | observations = states 7 | imgs = list() 8 | for i in range(len(observations)): 9 | state = observations[i] 10 | env.set_state(state) 11 | if i == 0: 12 | env.set_state(state) 13 | img = env.render() 14 | # draw action 15 | if actions is not None: 16 | action = actions[i] 17 | coord = (action / 512 * 96).astype(np.int32) 18 | cv2.drawMarker(img, coord, 19 | color=(255,0,0), markerType=cv2.MARKER_CROSS, 20 | markerSize=8, thickness=1) 21 | imgs.append(img) 22 | imgs = np.array(imgs) 23 | return imgs 24 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/common/nested_dict_util.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | def nested_dict_map(f, x): 4 | """ 5 | Map f over all leaf of nested dict x 6 | """ 7 | 8 | if not isinstance(x, dict): 9 | return f(x) 10 | y = dict() 11 | for key, value in x.items(): 12 | y[key] = nested_dict_map(f, value) 13 | return y 14 | 15 | def nested_dict_reduce(f, x): 16 | """ 17 | Map f over all values of nested dict x, and reduce to a single value 18 | """ 19 | if not isinstance(x, dict): 20 | return x 21 | 22 | reduced_values = list() 23 | for value in x.values(): 24 | reduced_values.append(nested_dict_reduce(f, value)) 25 | y = functools.reduce(f, reduced_values) 26 | return y 27 | 28 | 29 | def nested_dict_check(f, x): 30 | bool_dict = nested_dict_map(f, x) 31 | result = nested_dict_reduce(lambda x, y: x and y, bool_dict) 32 | return result 33 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/common/precise_sleep.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def precise_sleep(dt: float, slack_time: float=0.001, time_func=time.monotonic): 4 | """ 5 | Use hybrid of time.sleep and spinning to minimize jitter. 6 | Sleep dt - slack_time seconds first, then spin for the rest. 7 | """ 8 | t_start = time_func() 9 | if dt > slack_time: 10 | time.sleep(dt - slack_time) 11 | t_end = t_start + dt 12 | while time_func() < t_end: 13 | pass 14 | return 15 | 16 | def precise_wait(t_end: float, slack_time: float=0.001, time_func=time.monotonic): 17 | t_start = time_func() 18 | t_wait = t_end - t_start 19 | if t_wait > 0: 20 | t_sleep = t_wait - slack_time 21 | if t_sleep > 0: 22 | time.sleep(t_sleep) 23 | while time_func() < t_end: 24 | pass 25 | return 26 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/common/pymunk_util.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import pymunk 3 | import pymunk.pygame_util 4 | import numpy as np 5 | 6 | COLLTYPE_DEFAULT = 0 7 | COLLTYPE_MOUSE = 1 8 | COLLTYPE_BALL = 2 9 | 10 | def get_body_type(static=False): 11 | body_type = pymunk.Body.DYNAMIC 12 | if static: 13 | body_type = pymunk.Body.STATIC 14 | return body_type 15 | 16 | 17 | def create_rectangle(space, 18 | pos_x,pos_y,width,height, 19 | density=3,static=False): 20 | body = pymunk.Body(body_type=get_body_type(static)) 21 | body.position = (pos_x,pos_y) 22 | shape = pymunk.Poly.create_box(body,(width,height)) 23 | shape.density = density 24 | space.add(body,shape) 25 | return body, shape 26 | 27 | 28 | def create_rectangle_bb(space, 29 | left, bottom, right, top, 30 | **kwargs): 31 | pos_x = (left + right) / 2 32 | pos_y = (top + bottom) / 2 33 | height = top - bottom 34 | width = right - left 35 | return create_rectangle(space, pos_x, pos_y, width, height, **kwargs) 36 | 37 | def create_circle(space, pos_x, pos_y, radius, density=3, static=False): 38 | body = pymunk.Body(body_type=get_body_type(static)) 39 | body.position = (pos_x, pos_y) 40 | shape = pymunk.Circle(body, radius=radius) 41 | shape.density = density 42 | shape.collision_type = COLLTYPE_BALL 43 | space.add(body, shape) 44 | return body, shape 45 | 46 | def get_body_state(body): 47 | state = np.zeros(6, dtype=np.float32) 48 | state[:2] = body.position 49 | state[2] = body.angle 50 | state[3:5] = body.velocity 51 | state[5] = body.angular_velocity 52 | return state 53 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/common/robomimic_config_util.py: -------------------------------------------------------------------------------- 1 | from omegaconf import OmegaConf 2 | from robomimic.config import config_factory 3 | import robomimic.scripts.generate_paper_configs as gpc 4 | from robomimic.scripts.generate_paper_configs import ( 5 | modify_config_for_default_image_exp, 6 | modify_config_for_default_low_dim_exp, 7 | modify_config_for_dataset, 8 | ) 9 | 10 | def get_robomimic_config( 11 | algo_name='bc_rnn', 12 | hdf5_type='low_dim', 13 | task_name='square', 14 | dataset_type='ph' 15 | ): 16 | base_dataset_dir = '/tmp/null' 17 | filter_key = None 18 | 19 | # decide whether to use low-dim or image training defaults 20 | modifier_for_obs = modify_config_for_default_image_exp 21 | if hdf5_type in ["low_dim", "low_dim_sparse", "low_dim_dense"]: 22 | modifier_for_obs = modify_config_for_default_low_dim_exp 23 | 24 | algo_config_name = "bc" if algo_name == "bc_rnn" else algo_name 25 | config = config_factory(algo_name=algo_config_name) 26 | # turn into default config for observation modalities (e.g.: low-dim or rgb) 27 | config = modifier_for_obs(config) 28 | # add in config based on the dataset 29 | config = modify_config_for_dataset( 30 | config=config, 31 | task_name=task_name, 32 | dataset_type=dataset_type, 33 | hdf5_type=hdf5_type, 34 | base_dataset_dir=base_dataset_dir, 35 | filter_key=filter_key, 36 | ) 37 | # add in algo hypers based on dataset 38 | algo_config_modifier = getattr(gpc, f'modify_{algo_name}_config_for_dataset') 39 | config = algo_config_modifier( 40 | config=config, 41 | task_name=task_name, 42 | dataset_type=dataset_type, 43 | hdf5_type=hdf5_type, 44 | ) 45 | return config 46 | 47 | 48 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/blockpush_lowdim_seed.yaml: -------------------------------------------------------------------------------- 1 | name: blockpush_lowdim_seed 2 | 3 | obs_dim: 16 4 | action_dim: 2 5 | keypoint_dim: 2 6 | obs_eef_target: True 7 | 8 | env_runner: 9 | _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner 10 | n_train: 6 11 | n_train_vis: 2 12 | train_start_seed: 0 13 | n_test: 50 14 | n_test_vis: 4 15 | test_start_seed: 100000 16 | max_steps: 350 17 | n_obs_steps: ${n_obs_steps} 18 | n_action_steps: ${n_action_steps} 19 | fps: 5 20 | past_action: ${past_action_visible} 21 | abs_action: False 22 | obs_eef_target: ${task.obs_eef_target} 23 | n_envs: null 24 | 25 | dataset: 26 | _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset 27 | zarr_path: data/block_pushing/multimodal_push_seed.zarr 28 | horizon: ${horizon} 29 | pad_before: ${eval:'${n_obs_steps}-1'} 30 | pad_after: ${eval:'${n_action_steps}-1'} 31 | obs_eef_target: ${task.obs_eef_target} 32 | use_manual_normalizer: False 33 | seed: 42 34 | val_ratio: 0.02 35 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/blockpush_lowdim_seed_abs.yaml: -------------------------------------------------------------------------------- 1 | name: blockpush_lowdim_seed_abs 2 | 3 | obs_dim: 16 4 | action_dim: 2 5 | keypoint_dim: 2 6 | obs_eef_target: True 7 | 8 | env_runner: 9 | _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner 10 | n_train: 6 11 | n_train_vis: 2 12 | train_start_seed: 0 13 | n_test: 50 14 | n_test_vis: 4 15 | test_start_seed: 100000 16 | max_steps: 350 17 | n_obs_steps: ${n_obs_steps} 18 | n_action_steps: ${n_action_steps} 19 | fps: 5 20 | past_action: ${past_action_visible} 21 | abs_action: True 22 | obs_eef_target: ${task.obs_eef_target} 23 | n_envs: null 24 | 25 | dataset: 26 | _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset 27 | zarr_path: data/block_pushing/multimodal_push_seed_abs.zarr 28 | horizon: ${horizon} 29 | pad_before: ${eval:'${n_obs_steps}-1'} 30 | pad_after: ${eval:'${n_action_steps}-1'} 31 | obs_eef_target: ${task.obs_eef_target} 32 | use_manual_normalizer: False 33 | seed: 42 34 | val_ratio: 0.02 35 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/can_image.yaml: -------------------------------------------------------------------------------- 1 | name: can_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name can 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/can_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: can_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name can 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/can_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: can_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name can 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | n_envs: 28 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | seed: 42 45 | val_ratio: 0.02 46 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/can_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: can_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name can 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 11 | abs_action: &abs_action True 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | n_envs: 28 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | rotation_rep: rotation_6d 45 | seed: 42 46 | val_ratio: 0.02 47 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/kitchen_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: kitchen_lowdim 2 | 3 | obs_dim: 60 4 | action_dim: 9 5 | keypoint_dim: 3 6 | 7 | dataset_dir: &dataset_dir data/kitchen 8 | 9 | env_runner: 10 | _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner 11 | dataset_dir: *dataset_dir 12 | n_train: 6 13 | n_train_vis: 2 14 | train_start_seed: 0 15 | n_test: 50 16 | n_test_vis: 4 17 | test_start_seed: 100000 18 | max_steps: 280 19 | n_obs_steps: ${n_obs_steps} 20 | n_action_steps: ${n_action_steps} 21 | render_hw: [240, 360] 22 | fps: 12.5 23 | past_action: ${past_action_visible} 24 | n_envs: null 25 | 26 | dataset: 27 | _target_: diffusion_policy.dataset.kitchen_lowdim_dataset.KitchenLowdimDataset 28 | dataset_dir: *dataset_dir 29 | horizon: ${horizon} 30 | pad_before: ${eval:'${n_obs_steps}-1'} 31 | pad_after: ${eval:'${n_action_steps}-1'} 32 | seed: 42 33 | val_ratio: 0.02 34 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/kitchen_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: kitchen_lowdim 2 | 3 | obs_dim: 60 4 | action_dim: 9 5 | keypoint_dim: 3 6 | 7 | abs_action: True 8 | robot_noise_ratio: 0.1 9 | 10 | env_runner: 11 | _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner 12 | dataset_dir: data/kitchen 13 | n_train: 6 14 | n_train_vis: 2 15 | train_start_seed: 0 16 | n_test: 50 17 | n_test_vis: 4 18 | test_start_seed: 100000 19 | max_steps: 280 20 | n_obs_steps: ${n_obs_steps} 21 | n_action_steps: ${n_action_steps} 22 | render_hw: [240, 360] 23 | fps: 12.5 24 | past_action: ${past_action_visible} 25 | abs_action: ${task.abs_action} 26 | robot_noise_ratio: ${task.robot_noise_ratio} 27 | n_envs: null 28 | 29 | dataset: 30 | _target_: diffusion_policy.dataset.kitchen_mjl_lowdim_dataset.KitchenMjlLowdimDataset 31 | dataset_dir: data/kitchen/kitchen_demos_multitask 32 | horizon: ${horizon} 33 | pad_before: ${eval:'${n_obs_steps}-1'} 34 | pad_after: ${eval:'${n_action_steps}-1'} 35 | abs_action: ${task.abs_action} 36 | robot_noise_ratio: ${task.robot_noise_ratio} 37 | seed: 42 38 | val_ratio: 0.02 39 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/lift_image.yaml: -------------------------------------------------------------------------------- 1 | name: lift_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name lift 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 1 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 3 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/lift_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: lift_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name lift 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | n_train: 6 32 | n_train_vis: 2 33 | train_start_idx: 0 34 | n_test: 50 35 | n_test_vis: 4 36 | test_start_seed: 100000 37 | # use python's eval function as resolver, single-quoted string as argument 38 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 39 | n_obs_steps: ${n_obs_steps} 40 | n_action_steps: ${n_action_steps} 41 | render_obs_key: 'agentview_image' 42 | fps: 10 43 | crf: 22 44 | past_action: ${past_action_visible} 45 | abs_action: *abs_action 46 | tqdm_interval_sec: 1.0 47 | n_envs: 28 48 | # evaluation at this config requires a 16 core 64GB instance. 49 | 50 | dataset: 51 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 52 | shape_meta: *shape_meta 53 | dataset_path: *dataset_path 54 | horizon: ${horizon} 55 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 56 | pad_after: ${eval:'${n_action_steps}-1'} 57 | n_obs_steps: ${dataset_obs_steps} 58 | abs_action: *abs_action 59 | rotation_rep: 'rotation_6d' 60 | use_legacy_normalizer: False 61 | use_cache: True 62 | seed: 42 63 | val_ratio: 0.02 64 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/lift_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: lift_lowdim 2 | 3 | obs_dim: 19 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name lift 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | tqdm_interval_sec: 1.0 34 | n_envs: 28 35 | 36 | dataset: 37 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 38 | dataset_path: *dataset_path 39 | horizon: ${horizon} 40 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 41 | pad_after: ${eval:'${n_action_steps}-1'} 42 | obs_keys: *obs_keys 43 | abs_action: *abs_action 44 | use_legacy_normalizer: False 45 | seed: 42 46 | val_ratio: 0.02 47 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/lift_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: lift_lowdim 2 | 3 | obs_dim: 19 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name lift 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 11 | abs_action: &abs_action True 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 3 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | tqdm_interval_sec: 1.0 34 | n_envs: 28 35 | 36 | dataset: 37 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 38 | dataset_path: *dataset_path 39 | horizon: ${horizon} 40 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 41 | pad_after: ${eval:'${n_action_steps}-1'} 42 | obs_keys: *obs_keys 43 | abs_action: *abs_action 44 | use_legacy_normalizer: False 45 | rotation_rep: rotation_6d 46 | seed: 42 47 | val_ratio: 0.02 48 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/pusht_image.yaml: -------------------------------------------------------------------------------- 1 | name: pusht_image 2 | 3 | image_shape: &image_shape [3, 96, 96] 4 | shape_meta: &shape_meta 5 | # acceptable types: rgb, low_dim 6 | obs: 7 | image: 8 | shape: *image_shape 9 | type: rgb 10 | agent_pos: 11 | shape: [2] 12 | type: low_dim 13 | action: 14 | shape: [2] 15 | 16 | env_runner: 17 | _target_: diffusion_policy.env_runner.pusht_image_runner.PushTImageRunner 18 | n_train: 6 19 | n_train_vis: 2 20 | train_start_seed: 0 21 | n_test: 50 22 | n_test_vis: 4 23 | legacy_test: True 24 | test_start_seed: 100000 25 | max_steps: 300 26 | n_obs_steps: ${n_obs_steps} 27 | n_action_steps: ${n_action_steps} 28 | fps: 10 29 | past_action: ${past_action_visible} 30 | n_envs: null 31 | 32 | dataset: 33 | _target_: diffusion_policy.dataset.pusht_image_dataset.PushTImageDataset 34 | zarr_path: data/pusht/pusht_cchi_v7_replay.zarr 35 | horizon: ${horizon} 36 | pad_before: ${eval:'${n_obs_steps}-1'} 37 | pad_after: ${eval:'${n_action_steps}-1'} 38 | seed: 42 39 | val_ratio: 0.02 40 | max_train_episodes: 90 41 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/pusht_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: pusht_lowdim 2 | 3 | obs_dim: 20 # 9*2 keypoints + 2 state 4 | action_dim: 2 5 | keypoint_dim: 2 6 | 7 | env_runner: 8 | _target_: diffusion_policy.env_runner.pusht_keypoints_runner.PushTKeypointsRunner 9 | keypoint_visible_rate: ${keypoint_visible_rate} 10 | n_train: 6 11 | n_train_vis: 2 12 | train_start_seed: 0 13 | n_test: 50 14 | n_test_vis: 4 15 | legacy_test: True 16 | test_start_seed: 100000 17 | max_steps: 300 18 | n_obs_steps: ${n_obs_steps} 19 | n_action_steps: ${n_action_steps} 20 | n_latency_steps: ${n_latency_steps} 21 | fps: 10 22 | agent_keypoints: False 23 | past_action: ${past_action_visible} 24 | n_envs: null 25 | 26 | dataset: 27 | _target_: diffusion_policy.dataset.pusht_dataset.PushTLowdimDataset 28 | zarr_path: data/pusht/pusht_cchi_v7_replay.zarr 29 | horizon: ${horizon} 30 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 31 | pad_after: ${eval:'${n_action_steps}-1'} 32 | seed: 42 33 | val_ratio: 0.02 34 | max_train_episodes: 90 35 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/real_pusht_image.yaml: -------------------------------------------------------------------------------- 1 | name: real_image 2 | 3 | image_shape: [3, 240, 320] 4 | dataset_path: data/pusht_real/real_pusht_20230105 5 | 6 | shape_meta: &shape_meta 7 | # acceptable types: rgb, low_dim 8 | obs: 9 | # camera_0: 10 | # shape: ${task.image_shape} 11 | # type: rgb 12 | camera_1: 13 | shape: ${task.image_shape} 14 | type: rgb 15 | # camera_2: 16 | # shape: ${task.image_shape} 17 | # type: rgb 18 | camera_3: 19 | shape: ${task.image_shape} 20 | type: rgb 21 | # camera_4: 22 | # shape: ${task.image_shape} 23 | # type: rgb 24 | robot_eef_pose: 25 | shape: [2] 26 | type: low_dim 27 | action: 28 | shape: [2] 29 | 30 | env_runner: 31 | _target_: diffusion_policy.env_runner.real_pusht_image_runner.RealPushTImageRunner 32 | 33 | dataset: 34 | _target_: diffusion_policy.dataset.real_pusht_image_dataset.RealPushTImageDataset 35 | shape_meta: *shape_meta 36 | dataset_path: ${task.dataset_path} 37 | horizon: ${horizon} 38 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 39 | pad_after: ${eval:'${n_action_steps}-1'} 40 | n_obs_steps: ${dataset_obs_steps} 41 | n_latency_steps: ${n_latency_steps} 42 | use_cache: True 43 | seed: 42 44 | val_ratio: 0.00 45 | max_train_episodes: null 46 | delta_action: False 47 | 48 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/square_image.yaml: -------------------------------------------------------------------------------- 1 | name: square_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name square 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/square_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: square_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name square 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/square_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: square_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name square 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | n_envs: 28 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | seed: 42 45 | val_ratio: 0.02 46 | max_train_episodes: null 47 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/square_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: square_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name square 9 | dataset_type: &dataset_type ph 10 | abs_action: &abs_action True 11 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 12 | 13 | 14 | env_runner: 15 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 16 | dataset_path: *dataset_path 17 | obs_keys: *obs_keys 18 | n_train: 6 19 | n_train_vis: 2 20 | train_start_idx: 0 21 | n_test: 50 22 | n_test_vis: 4 23 | test_start_seed: 100000 24 | # use python's eval function as resolver, single-quoted string as argument 25 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 26 | n_obs_steps: ${n_obs_steps} 27 | n_action_steps: ${n_action_steps} 28 | n_latency_steps: ${n_latency_steps} 29 | render_hw: [128,128] 30 | fps: 10 31 | crf: 22 32 | past_action: ${past_action_visible} 33 | abs_action: *abs_action 34 | n_envs: 28 35 | 36 | dataset: 37 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 38 | dataset_path: *dataset_path 39 | horizon: ${horizon} 40 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 41 | pad_after: ${eval:'${n_action_steps}-1'} 42 | obs_keys: *obs_keys 43 | abs_action: *abs_action 44 | use_legacy_normalizer: False 45 | seed: 42 46 | val_ratio: 0.02 47 | max_train_episodes: null 48 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/tool_hang_image.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | sideview_image: 7 | shape: [3, 240, 240] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 240, 240] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name tool_hang 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | max_steps: 700 39 | n_obs_steps: ${n_obs_steps} 40 | n_action_steps: ${n_action_steps} 41 | render_obs_key: 'sideview_image' 42 | fps: 10 43 | crf: 22 44 | past_action: ${past_action_visible} 45 | abs_action: *abs_action 46 | tqdm_interval_sec: 1.0 47 | n_envs: 28 48 | # evaluation at this config requires a 16 core 64GB instance. 49 | 50 | dataset: 51 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 52 | shape_meta: *shape_meta 53 | dataset_path: *dataset_path 54 | horizon: ${horizon} 55 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 56 | pad_after: ${eval:'${n_action_steps}-1'} 57 | n_obs_steps: ${dataset_obs_steps} 58 | abs_action: *abs_action 59 | rotation_rep: 'rotation_6d' 60 | use_legacy_normalizer: False 61 | use_cache: True 62 | seed: 42 63 | val_ratio: 0.02 64 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/tool_hang_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_image_abs 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | sideview_image: 7 | shape: [3, 240, 240] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 240, 240] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name tool_hang 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | max_steps: 700 39 | n_obs_steps: ${n_obs_steps} 40 | n_action_steps: ${n_action_steps} 41 | render_obs_key: 'sideview_image' 42 | fps: 10 43 | crf: 22 44 | past_action: ${past_action_visible} 45 | abs_action: *abs_action 46 | tqdm_interval_sec: 1.0 47 | n_envs: 28 48 | # evaluation at this config requires a 16 core 64GB instance. 49 | 50 | dataset: 51 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 52 | shape_meta: *shape_meta 53 | dataset_path: *dataset_path 54 | horizon: ${horizon} 55 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 56 | pad_after: ${eval:'${n_action_steps}-1'} 57 | n_obs_steps: ${dataset_obs_steps} 58 | abs_action: *abs_action 59 | rotation_rep: 'rotation_6d' 60 | use_legacy_normalizer: False 61 | use_cache: True 62 | seed: 42 63 | val_ratio: 0.02 64 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/tool_hang_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_lowdim 2 | 3 | obs_dim: 53 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name tool_hang 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | max_steps: 700 24 | n_obs_steps: ${n_obs_steps} 25 | n_action_steps: ${n_action_steps} 26 | n_latency_steps: ${n_latency_steps} 27 | render_hw: [128,128] 28 | fps: 10 29 | crf: 22 30 | past_action: ${past_action_visible} 31 | abs_action: *abs_action 32 | n_envs: 28 33 | # seed 42 will crash MuJoCo for some reason. 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | seed: 42 45 | val_ratio: 0.02 46 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/tool_hang_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_lowdim 2 | 3 | obs_dim: 53 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name tool_hang 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 11 | abs_action: &abs_action True 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | max_steps: 700 24 | n_obs_steps: ${n_obs_steps} 25 | n_action_steps: ${n_action_steps} 26 | n_latency_steps: ${n_latency_steps} 27 | render_hw: [128,128] 28 | fps: 10 29 | crf: 22 30 | past_action: ${past_action_visible} 31 | abs_action: *abs_action 32 | n_envs: 28 33 | # seed 42 will crash MuJoCo for some reason. 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | rotation_rep: rotation_6d 45 | seed: 42 46 | val_ratio: 0.02 47 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/transport_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: transport_lowdim 2 | 3 | obs_dim: 59 # 41+(3+4+2)*2 4 | action_dim: 14 # 7*2 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys [ 8 | 'object', 9 | 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 10 | 'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos' 11 | ] 12 | task_name: &task_name transport 13 | dataset_type: &dataset_type ph 14 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 15 | abs_action: &abs_action False 16 | 17 | env_runner: 18 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 19 | dataset_path: *dataset_path 20 | obs_keys: *obs_keys 21 | n_train: 6 22 | n_train_vis: 2 23 | train_start_idx: 0 24 | n_test: 50 25 | n_test_vis: 5 26 | test_start_seed: 100000 27 | max_steps: 700 28 | n_obs_steps: ${n_obs_steps} 29 | n_action_steps: ${n_action_steps} 30 | n_latency_steps: ${n_latency_steps} 31 | render_hw: [128,128] 32 | fps: 10 33 | crf: 22 34 | past_action: ${past_action_visible} 35 | abs_action: *abs_action 36 | n_envs: 28 37 | # evaluation at this config requires a 16 core 64GB instance. 38 | 39 | dataset: 40 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 41 | dataset_path: *dataset_path 42 | horizon: ${horizon} 43 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 44 | pad_after: ${eval:'${n_action_steps}-1'} 45 | obs_keys: *obs_keys 46 | abs_action: *abs_action 47 | use_legacy_normalizer: False 48 | seed: 42 49 | val_ratio: 0.02 50 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/config/task/transport_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: transport_lowdim 2 | 3 | obs_dim: 59 # 41+(3+4+2)*2 4 | action_dim: 20 # 10*2 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys [ 8 | 'object', 9 | 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 10 | 'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos' 11 | ] 12 | task_name: &task_name transport 13 | dataset_type: &dataset_type ph 14 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 15 | abs_action: &abs_action True 16 | 17 | env_runner: 18 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 19 | dataset_path: *dataset_path 20 | obs_keys: *obs_keys 21 | n_train: 6 22 | n_train_vis: 2 23 | train_start_idx: 0 24 | n_test: 50 25 | n_test_vis: 4 26 | test_start_seed: 100000 27 | max_steps: 700 28 | n_obs_steps: ${n_obs_steps} 29 | n_action_steps: ${n_action_steps} 30 | n_latency_steps: ${n_latency_steps} 31 | render_hw: [128,128] 32 | fps: 10 33 | crf: 22 34 | past_action: ${past_action_visible} 35 | abs_action: *abs_action 36 | n_envs: 28 37 | # evaluation at this config requires a 16 core 64GB instance. 38 | 39 | dataset: 40 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 41 | dataset_path: *dataset_path 42 | horizon: ${horizon} 43 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 44 | pad_after: ${eval:'${n_action_steps}-1'} 45 | obs_keys: *obs_keys 46 | abs_action: *abs_action 47 | use_legacy_normalizer: False 48 | seed: 42 49 | val_ratio: 0.02 50 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/dataset/base_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import torch 4 | import torch.nn 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer 6 | 7 | class BaseLowdimDataset(torch.utils.data.Dataset): 8 | def get_validation_dataset(self) -> 'BaseLowdimDataset': 9 | # return an empty dataset by default 10 | return BaseLowdimDataset() 11 | 12 | def get_normalizer(self, **kwargs) -> LinearNormalizer: 13 | raise NotImplementedError() 14 | 15 | def get_all_actions(self) -> torch.Tensor: 16 | raise NotImplementedError() 17 | 18 | def __len__(self) -> int: 19 | return 0 20 | 21 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 22 | """ 23 | output: 24 | obs: T, Do 25 | action: T, Da 26 | """ 27 | raise NotImplementedError() 28 | 29 | 30 | class BaseImageDataset(torch.utils.data.Dataset): 31 | def get_validation_dataset(self) -> 'BaseLowdimDataset': 32 | # return an empty dataset by default 33 | return BaseImageDataset() 34 | 35 | def get_normalizer(self, **kwargs) -> LinearNormalizer: 36 | raise NotImplementedError() 37 | 38 | def get_all_actions(self) -> torch.Tensor: 39 | raise NotImplementedError() 40 | 41 | def __len__(self) -> int: 42 | return 0 43 | 44 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 45 | """ 46 | output: 47 | obs: 48 | key: T, * 49 | action: T, Da 50 | """ 51 | raise NotImplementedError() 52 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/block2.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/blocks/blue_cube.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/blocks/green_star.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/blocks/red_moon.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/blocks/yellow_pentagon.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/insert.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/plane.obj: -------------------------------------------------------------------------------- 1 | # Blender v2.66 (sub 1) OBJ File: '' 2 | # www.blender.org 3 | mtllib plane.mtl 4 | o Plane 5 | v 15.000000 -15.000000 0.000000 6 | v 15.000000 15.000000 0.000000 7 | v -15.000000 15.000000 0.000000 8 | v -15.000000 -15.000000 0.000000 9 | 10 | vt 15.000000 0.000000 11 | vt 15.000000 15.000000 12 | vt 0.000000 15.000000 13 | vt 0.000000 0.000000 14 | 15 | usemtl Material 16 | s off 17 | f 1/1 2/2 3/3 18 | f 1/1 3/3 4/4 19 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/workspace.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/workspace_real.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/zone.obj: -------------------------------------------------------------------------------- 1 | # Object Export From Tinkercad Server 2015 2 | 3 | mtllib obj.mtl 4 | 5 | o obj_0 6 | v 10 -10 20 7 | v 10 -10 0 8 | v 10 10 0 9 | v 10 10 20 10 | v 9.002 9.003 20 11 | v 9.002 -9.002 20 12 | v -10 10 0 13 | v -10 10 20 14 | v -9.003 9.003 20 15 | v -9.003 9.003 0 16 | v 9.002 9.003 0 17 | v 9.002 -9.002 0 18 | v -9.003 -9.002 0 19 | v -9.003 -9.002 20 20 | v -10 -10 0 21 | v -10 -10 20 22 | # 16 vertices 23 | 24 | g group_0_15277357 25 | 26 | usemtl color_15277357 27 | s 0 28 | 29 | f 1 2 3 30 | f 1 3 4 31 | f 4 5 6 32 | f 4 6 1 33 | f 9 10 11 34 | f 9 11 5 35 | f 6 12 13 36 | f 6 13 14 37 | f 10 9 14 38 | f 10 14 13 39 | f 7 10 13 40 | f 7 13 15 41 | f 4 8 5 42 | f 9 5 8 43 | f 8 7 15 44 | f 8 15 16 45 | f 10 7 11 46 | f 3 11 7 47 | f 11 3 12 48 | f 2 12 3 49 | f 14 16 6 50 | f 1 6 16 51 | f 16 15 2 52 | f 16 2 1 53 | f 9 8 14 54 | f 16 14 8 55 | f 7 8 3 56 | f 4 3 8 57 | f 2 15 12 58 | f 13 12 15 59 | f 12 6 5 60 | f 12 5 11 61 | # 32 faces 62 | 63 | #end of obj_0 64 | 65 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/zone.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/assets/zone2.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/block_pushing/oracles/pushing_info.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The Reach ML Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Dataclass holding info needed for pushing oracles.""" 17 | import dataclasses 18 | from typing import Any 19 | 20 | 21 | @dataclasses.dataclass 22 | class PushingInfo: 23 | """Holds onto info necessary for pushing state machine.""" 24 | 25 | xy_block: Any = None 26 | xy_ee: Any = None 27 | xy_pre_block: Any = None 28 | xy_delta_to_nexttoblock: Any = None 29 | xy_delta_to_touchingblock: Any = None 30 | xy_dir_block_to_ee: Any = None 31 | theta_threshold_to_orient: Any = None 32 | theta_threshold_flat_enough: Any = None 33 | theta_error: Any = None 34 | obstacle_poses: Any = None 35 | distance_to_target: Any = None 36 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/__init__.py: -------------------------------------------------------------------------------- 1 | """Environments using kitchen and Franka robot.""" 2 | from gym.envs.registration import register 3 | 4 | register( 5 | id="kitchen-microwave-kettle-light-slider-v0", 6 | entry_point="diffusion_policy.env.kitchen.v0:KitchenMicrowaveKettleLightSliderV0", 7 | max_episode_steps=280, 8 | reward_threshold=1.0, 9 | ) 10 | 11 | register( 12 | id="kitchen-microwave-kettle-burner-light-v0", 13 | entry_point="diffusion_policy.env.kitchen.v0:KitchenMicrowaveKettleBottomBurnerLightV0", 14 | max_episode_steps=280, 15 | reward_threshold=1.0, 16 | ) 17 | 18 | register( 19 | id="kitchen-kettle-microwave-light-slider-v0", 20 | entry_point="diffusion_policy.env.kitchen.v0:KitchenKettleMicrowaveLightSliderV0", 21 | max_episode_steps=280, 22 | reward_threshold=1.0, 23 | ) 24 | 25 | register( 26 | id="kitchen-all-v0", 27 | entry_point="diffusion_policy.env.kitchen.v0:KitchenAllV0", 28 | max_episode_steps=280, 29 | reward_threshold=1.0, 30 | ) 31 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/kitchen_lowdim_wrapper.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Optional, Optional 2 | import numpy as np 3 | import gym 4 | from gym.spaces import Box 5 | from diffusion_policy.env.kitchen.base import KitchenBase 6 | 7 | class KitchenLowdimWrapper(gym.Env): 8 | def __init__(self, 9 | env: KitchenBase, 10 | init_qpos: Optional[np.ndarray]=None, 11 | init_qvel: Optional[np.ndarray]=None, 12 | render_hw = (240,360) 13 | ): 14 | self.env = env 15 | self.init_qpos = init_qpos 16 | self.init_qvel = init_qvel 17 | self.render_hw = render_hw 18 | 19 | @property 20 | def action_space(self): 21 | return self.env.action_space 22 | 23 | @property 24 | def observation_space(self): 25 | return self.env.observation_space 26 | 27 | def seed(self, seed=None): 28 | return self.env.seed(seed) 29 | 30 | def reset(self): 31 | if self.init_qpos is not None: 32 | # reset anyway to be safe, not very expensive 33 | _ = self.env.reset() 34 | # start from known state 35 | self.env.set_state(self.init_qpos, self.init_qvel) 36 | obs = self.env._get_obs() 37 | return obs 38 | # obs, _, _, _ = self.env.step(np.zeros_like( 39 | # self.action_space.sample())) 40 | # return obs 41 | else: 42 | return self.env.reset() 43 | 44 | def render(self, mode='rgb_array'): 45 | h, w = self.render_hw 46 | return self.env.render(mode=mode, width=w, height=h) 47 | 48 | def step(self, a): 49 | return self.env.step(a) 50 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/kitchen_util.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import numpy as np 3 | 4 | def parse_mjl_logs(read_filename, skipamount): 5 | with open(read_filename, mode='rb') as file: 6 | fileContent = file.read() 7 | headers = struct.unpack('iiiiiii', fileContent[:28]) 8 | nq = headers[0] 9 | nv = headers[1] 10 | nu = headers[2] 11 | nmocap = headers[3] 12 | nsensordata = headers[4] 13 | nuserdata = headers[5] 14 | name_len = headers[6] 15 | name = struct.unpack(str(name_len) + 's', fileContent[28:28+name_len])[0] 16 | rem_size = len(fileContent[28 + name_len:]) 17 | num_floats = int(rem_size/4) 18 | dat = np.asarray(struct.unpack(str(num_floats) + 'f', fileContent[28+name_len:])) 19 | recsz = 1 + nq + nv + nu + 7*nmocap + nsensordata + nuserdata 20 | if rem_size % recsz != 0: 21 | print("ERROR") 22 | else: 23 | dat = np.reshape(dat, (int(len(dat)/recsz), recsz)) 24 | dat = dat.T 25 | 26 | time = dat[0,:][::skipamount] - 0*dat[0, 0] 27 | qpos = dat[1:nq + 1, :].T[::skipamount, :] 28 | qvel = dat[nq+1:nq+nv+1,:].T[::skipamount, :] 29 | ctrl = dat[nq+nv+1:nq+nv+nu+1,:].T[::skipamount,:] 30 | mocap_pos = dat[nq+nv+nu+1:nq+nv+nu+3*nmocap+1,:].T[::skipamount, :] 31 | mocap_quat = dat[nq+nv+nu+3*nmocap+1:nq+nv+nu+7*nmocap+1,:].T[::skipamount, :] 32 | sensordata = dat[nq+nv+nu+7*nmocap+1:nq+nv+nu+7*nmocap+nsensordata+1,:].T[::skipamount,:] 33 | userdata = dat[nq+nv+nu+7*nmocap+nsensordata+1:,:].T[::skipamount,:] 34 | 35 | data = dict(nq=nq, 36 | nv=nv, 37 | nu=nu, 38 | nmocap=nmocap, 39 | nsensordata=nsensordata, 40 | name=name, 41 | time=time, 42 | qpos=qpos, 43 | qvel=qvel, 44 | ctrl=ctrl, 45 | mocap_pos=mocap_pos, 46 | mocap_quat=mocap_quat, 47 | sensordata=sensordata, 48 | userdata=userdata, 49 | logName = read_filename 50 | ) 51 | return data 52 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Copyright 2020 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import adept_envs.franka 18 | 19 | from adept_envs.utils.configurable import global_config 20 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Copyright 2020 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from gym.envs.registration import register 18 | 19 | # Relax the robot 20 | register( 21 | id='kitchen_relax-v1', 22 | entry_point='adept_envs.franka.kitchen_multitask_v0:KitchenTaskRelaxV1', 23 | max_episode_steps=280, 24 | ) -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/constants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Copyright 2020 Google LLC 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import os 18 | 19 | ENVS_ROOT_PATH = os.path.abspath(os.path.join( 20 | os.path.dirname(os.path.abspath(__file__)), 21 | "../../")) 22 | 23 | MODELS_PATH = os.path.abspath(os.path.join(ENVS_ROOT_PATH, "../adept_models/")) 24 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/.gitignore: -------------------------------------------------------------------------------- 1 | # General 2 | .DS_Store 3 | *.swp 4 | *.profraw 5 | 6 | # Editors 7 | .vscode 8 | .idea 9 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/CONTRIBUTING.public.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows 28 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 29 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/README.public.md: -------------------------------------------------------------------------------- 1 | # D'Suite Scenes 2 | 3 | This repository is based on a collection of [MuJoCo](http://www.mujoco.org/) simulation 4 | scenes and common assets for D'Suite environments. Based on code in the ROBEL suite 5 | https://github.com/google-research/robel 6 | 7 | ## Disclaimer 8 | 9 | This is not an official Google product. 10 | 11 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/__init__.py -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/backwall_asset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/backwall_chain.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/counters_asset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/hingecabinet_asset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/kettle_asset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/kettle_chain.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/microwave_asset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/slidecabinet_asset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/counters.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/hingecabinet.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/kettle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/kitchen.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/burnerplate.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/burnerplate.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/burnerplate_mesh.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/burnerplate_mesh.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinetbase.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinetbase.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinetdrawer.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinetdrawer.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinethandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinethandle.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/countertop.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/countertop.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/faucet.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/faucet.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/handle2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/handle2.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingecabinet.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingecabinet.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingedoor.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingedoor.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingehandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingehandle.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hood.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hood.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/kettle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/kettle.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/kettlehandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/kettlehandle.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/knob.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/knob.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/lightswitch.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/lightswitch.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/lightswitchbase.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/lightswitchbase.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/micro.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/micro.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microbutton.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microbutton.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microdoor.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microdoor.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microefeet.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microefeet.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microfeet.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microfeet.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microhandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microhandle.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microwindow.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microwindow.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/oven.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/oven.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/ovenhandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/ovenhandle.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/oventop.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/oventop.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/ovenwindow.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/ovenwindow.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/slidecabinet.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/slidecabinet.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/slidedoor.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/slidedoor.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/stoverim.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/stoverim.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/tile.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/tile.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/wall.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/wall.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/microwave.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/oven.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/slidecabinet.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/scenes/basic_scene.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/README.md: -------------------------------------------------------------------------------- 1 | # franka 2 | Franka panda mujoco models 3 | 4 | 5 | # Environment 6 | 7 | franka_panda.xml | coming soon 8 | :-------------------------:|:-------------------------: 9 | ![Alt text](franka_panda.png?raw=false "sawyer") | coming soon 10 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/actuator0.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/actuator1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/basic_scene.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/teleop_actuator.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/finger.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/finger.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/hand.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/hand.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link0.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link1.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link2.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link3.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link4.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link5.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link6.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link7.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/finger.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/finger.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/hand.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/hand.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link0.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link1.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link2.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link3.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link4.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link5.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link6.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link7.stl -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/kitchen/v0.py: -------------------------------------------------------------------------------- 1 | from diffusion_policy.env.kitchen.base import KitchenBase 2 | 3 | 4 | class KitchenMicrowaveKettleBottomBurnerLightV0(KitchenBase): 5 | TASK_ELEMENTS = ["microwave", "kettle", "bottom burner", "light switch"] 6 | COMPLETE_IN_ANY_ORDER = False 7 | 8 | 9 | class KitchenMicrowaveKettleLightSliderV0(KitchenBase): 10 | TASK_ELEMENTS = ["microwave", "kettle", "light switch", "slide cabinet"] 11 | COMPLETE_IN_ANY_ORDER = False 12 | 13 | 14 | class KitchenKettleMicrowaveLightSliderV0(KitchenBase): 15 | TASK_ELEMENTS = ["kettle", "microwave", "light switch", "slide cabinet"] 16 | COMPLETE_IN_ANY_ORDER = False 17 | 18 | 19 | class KitchenAllV0(KitchenBase): 20 | TASK_ELEMENTS = KitchenBase.ALL_TASKS 21 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env/pusht/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | import diffusion_policy.env.pusht 3 | 4 | register( 5 | id='pusht-keypoints-v0', 6 | entry_point='envs.pusht.pusht_keypoints_env:PushTKeypointsEnv', 7 | max_episode_steps=200, 8 | reward_threshold=1.0 9 | ) -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env_runner/base_image_runner.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from diffusion_policy.policy.base_image_policy import BaseImagePolicy 3 | 4 | class BaseImageRunner: 5 | def __init__(self, output_dir): 6 | self.output_dir = output_dir 7 | 8 | def run(self, policy: BaseImagePolicy) -> Dict: 9 | raise NotImplementedError() 10 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env_runner/base_lowdim_runner.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy 3 | 4 | class BaseLowdimRunner: 5 | def __init__(self, output_dir): 6 | self.output_dir = output_dir 7 | 8 | def run(self, policy: BaseLowdimPolicy) -> Dict: 9 | raise NotImplementedError() 10 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/env_runner/real_pusht_image_runner.py: -------------------------------------------------------------------------------- 1 | from diffusion_policy.policy.base_image_policy import BaseImagePolicy 2 | from diffusion_policy.env_runner.base_image_runner import BaseImageRunner 3 | 4 | class RealPushTImageRunner(BaseImageRunner): 5 | def __init__(self, 6 | output_dir): 7 | super().__init__(output_dir) 8 | 9 | def run(self, policy: BaseImagePolicy): 10 | return dict() 11 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/gym_util/video_recording_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | from diffusion_policy.real_world.video_recorder import VideoRecorder 4 | 5 | class VideoRecordingWrapper(gym.Wrapper): 6 | def __init__(self, 7 | env, 8 | video_recoder: VideoRecorder, 9 | mode='rgb_array', 10 | file_path=None, 11 | steps_per_render=1, 12 | **kwargs 13 | ): 14 | """ 15 | When file_path is None, don't record. 16 | """ 17 | super().__init__(env) 18 | 19 | self.mode = mode 20 | self.render_kwargs = kwargs 21 | self.steps_per_render = steps_per_render 22 | self.file_path = file_path 23 | self.video_recoder = video_recoder 24 | 25 | self.step_count = 0 26 | 27 | def reset(self, **kwargs): 28 | obs = super().reset(**kwargs) 29 | self.frames = list() 30 | self.step_count = 1 31 | self.video_recoder.stop() 32 | return obs 33 | 34 | def step(self, action): 35 | result = super().step(action) 36 | self.step_count += 1 37 | if self.file_path is not None \ 38 | and ((self.step_count % self.steps_per_render) == 0): 39 | if not self.video_recoder.is_ready(): 40 | self.video_recoder.start(self.file_path) 41 | 42 | frame = self.env.render( 43 | mode=self.mode, **self.render_kwargs) 44 | assert frame.dtype == np.uint8 45 | self.video_recoder.write_frame(frame) 46 | return result 47 | 48 | def render(self, mode='rgb_array', **kwargs): 49 | if self.video_recoder.is_ready(): 50 | self.video_recoder.stop() 51 | return self.file_path 52 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/gym_util/video_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | class VideoWrapper(gym.Wrapper): 5 | def __init__(self, 6 | env, 7 | mode='rgb_array', 8 | enabled=True, 9 | steps_per_render=1, 10 | **kwargs 11 | ): 12 | super().__init__(env) 13 | 14 | self.mode = mode 15 | self.enabled = enabled 16 | self.render_kwargs = kwargs 17 | self.steps_per_render = steps_per_render 18 | 19 | self.frames = list() 20 | self.step_count = 0 21 | 22 | def reset(self, **kwargs): 23 | obs = super().reset(**kwargs) 24 | self.frames = list() 25 | self.step_count = 1 26 | if self.enabled: 27 | frame = self.env.render( 28 | mode=self.mode, **self.render_kwargs) 29 | assert frame.dtype == np.uint8 30 | self.frames.append(frame) 31 | return obs 32 | 33 | def step(self, action): 34 | result = super().step(action) 35 | self.step_count += 1 36 | if self.enabled and ((self.step_count % self.steps_per_render) == 0): 37 | frame = self.env.render( 38 | mode=self.mode, **self.render_kwargs) 39 | assert frame.dtype == np.uint8 40 | self.frames.append(frame) 41 | return result 42 | 43 | def render(self, mode='rgb_array', **kwargs): 44 | return self.frames 45 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/model/bet/action_ae/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.utils.data import DataLoader 4 | import abc 5 | 6 | from typing import Optional, Union 7 | 8 | import diffusion_policy.model.bet.utils as utils 9 | 10 | 11 | class AbstractActionAE(utils.SaveModule, abc.ABC): 12 | @abc.abstractmethod 13 | def fit_model( 14 | self, 15 | input_dataloader: DataLoader, 16 | eval_dataloader: DataLoader, 17 | obs_encoding_net: Optional[nn.Module] = None, 18 | ) -> None: 19 | pass 20 | 21 | @abc.abstractmethod 22 | def encode_into_latent( 23 | self, 24 | input_action: torch.Tensor, 25 | input_rep: Optional[torch.Tensor], 26 | ) -> torch.Tensor: 27 | """ 28 | Given the input action, discretize it. 29 | 30 | Inputs: 31 | input_action (shape: ... x action_dim): The input action to discretize. This can be in a batch, 32 | and is generally assumed that the last dimnesion is the action dimension. 33 | 34 | Outputs: 35 | discretized_action (shape: ... x num_tokens): The discretized action. 36 | """ 37 | raise NotImplementedError 38 | 39 | @abc.abstractmethod 40 | def decode_actions( 41 | self, 42 | latent_action_batch: Optional[torch.Tensor], 43 | input_rep_batch: Optional[torch.Tensor] = None, 44 | ) -> torch.Tensor: 45 | """ 46 | Given a discretized action, convert it to a continuous action. 47 | 48 | Inputs: 49 | latent_action_batch (shape: ... x num_tokens): The discretized action 50 | generated by the discretizer. 51 | 52 | Outputs: 53 | continuous_action (shape: ... x action_dim): The continuous action. 54 | """ 55 | raise NotImplementedError 56 | 57 | @property 58 | @abc.abstractmethod 59 | def num_latents(self) -> Union[int, float]: 60 | """ 61 | Number of possible latents for this generator, useful for state priors that use softmax. 62 | """ 63 | return float("inf") 64 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/model/bet/libraries/mingpt/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) Copyright (c) 2020 Andrej Karpathy 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | 9 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/model/bet/libraries/mingpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/diffusion_policy/diffusion_policy/model/bet/libraries/mingpt/__init__.py -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/model/bet/libraries/mingpt/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | 7 | 8 | def set_seed(seed): 9 | random.seed(seed) 10 | np.random.seed(seed) 11 | torch.manual_seed(seed) 12 | torch.cuda.manual_seed_all(seed) 13 | 14 | 15 | def top_k_logits(logits, k): 16 | v, ix = torch.topk(logits, k) 17 | out = logits.clone() 18 | out[out < v[:, [-1]]] = -float("Inf") 19 | return out 20 | 21 | 22 | @torch.no_grad() 23 | def sample(model, x, steps, temperature=1.0, sample=False, top_k=None): 24 | """ 25 | take a conditioning sequence of indices in x (of shape (b,t)) and predict the next token in 26 | the sequence, feeding the predictions back into the model each time. Clearly the sampling 27 | has quadratic complexity unlike an RNN that is only linear, and has a finite context window 28 | of block_size, unlike an RNN that has an infinite context window. 29 | """ 30 | block_size = model.get_block_size() 31 | model.eval() 32 | for k in range(steps): 33 | x_cond = ( 34 | x if x.size(1) <= block_size else x[:, -block_size:] 35 | ) # crop context if needed 36 | logits, _ = model(x_cond) 37 | # pluck the logits at the final step and scale by temperature 38 | logits = logits[:, -1, :] / temperature 39 | # optionally crop probabilities to only the top k options 40 | if top_k is not None: 41 | logits = top_k_logits(logits, top_k) 42 | # apply softmax to convert to probabilities 43 | probs = F.softmax(logits, dim=-1) 44 | # sample from the distribution or take the most likely 45 | if sample: 46 | ix = torch.multinomial(probs, num_samples=1) 47 | else: 48 | _, ix = torch.topk(probs, k=1, dim=-1) 49 | # append to the sequence and continue 50 | x = torch.cat((x, ix), dim=1) 51 | 52 | return x 53 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/model/common/dict_of_tensor_mixin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class DictOfTensorMixin(nn.Module): 5 | def __init__(self, params_dict=None): 6 | super().__init__() 7 | if params_dict is None: 8 | params_dict = nn.ParameterDict() 9 | self.params_dict = params_dict 10 | 11 | @property 12 | def device(self): 13 | return next(iter(self.parameters())).device 14 | 15 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): 16 | def dfs_add(dest, keys, value: torch.Tensor): 17 | if len(keys) == 1: 18 | dest[keys[0]] = value 19 | return 20 | 21 | if keys[0] not in dest: 22 | dest[keys[0]] = nn.ParameterDict() 23 | dfs_add(dest[keys[0]], keys[1:], value) 24 | 25 | def load_dict(state_dict, prefix): 26 | out_dict = nn.ParameterDict() 27 | for key, value in state_dict.items(): 28 | value: torch.Tensor 29 | if key.startswith(prefix): 30 | param_keys = key[len(prefix):].split('.')[1:] 31 | # if len(param_keys) == 0: 32 | # import pdb; pdb.set_trace() 33 | dfs_add(out_dict, param_keys, value.clone()) 34 | return out_dict 35 | 36 | self.params_dict = load_dict(state_dict, prefix + 'params_dict') 37 | self.params_dict.requires_grad_(False) 38 | return 39 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/model/common/module_attr_mixin.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class ModuleAttrMixin(nn.Module): 4 | def __init__(self): 5 | super().__init__() 6 | self._dummy_variable = nn.Parameter() 7 | 8 | @property 9 | def device(self): 10 | return next(iter(self.parameters())).device 11 | 12 | @property 13 | def dtype(self): 14 | return next(iter(self.parameters())).dtype 15 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/model/common/shape_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple, Callable 2 | import torch 3 | import torch.nn as nn 4 | 5 | def get_module_device(m: nn.Module): 6 | device = torch.device('cpu') 7 | try: 8 | param = next(iter(m.parameters())) 9 | device = param.device 10 | except StopIteration: 11 | pass 12 | return device 13 | 14 | @torch.no_grad() 15 | def get_output_shape( 16 | input_shape: Tuple[int], 17 | net: Callable[[torch.Tensor], torch.Tensor] 18 | ): 19 | device = get_module_device(net) 20 | test_input = torch.zeros((1,)+tuple(input_shape), device=device) 21 | test_output = net(test_input) 22 | output_shape = tuple(test_output.shape[1:]) 23 | return output_shape 24 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/model/diffusion/conv1d_components.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | # from einops.layers.torch import Rearrange 5 | 6 | 7 | class Downsample1d(nn.Module): 8 | def __init__(self, dim): 9 | super().__init__() 10 | self.conv = nn.Conv1d(dim, dim, 3, 2, 1) 11 | 12 | def forward(self, x): 13 | return self.conv(x) 14 | 15 | class Upsample1d(nn.Module): 16 | def __init__(self, dim): 17 | super().__init__() 18 | self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1) 19 | 20 | def forward(self, x): 21 | return self.conv(x) 22 | 23 | class Conv1dBlock(nn.Module): 24 | ''' 25 | Conv1d --> GroupNorm --> Mish 26 | ''' 27 | 28 | def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8): 29 | super().__init__() 30 | 31 | self.block = nn.Sequential( 32 | nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2), 33 | # Rearrange('batch channels horizon -> batch channels 1 horizon'), 34 | nn.GroupNorm(n_groups, out_channels), 35 | # Rearrange('batch channels 1 horizon -> batch channels horizon'), 36 | nn.Mish(), 37 | ) 38 | 39 | def forward(self, x): 40 | return self.block(x) 41 | 42 | 43 | def test(): 44 | cb = Conv1dBlock(256, 128, kernel_size=3) 45 | x = torch.zeros((1,256,16)) 46 | o = cb(x) 47 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/model/diffusion/positional_embedding.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | 5 | class SinusoidalPosEmb(nn.Module): 6 | def __init__(self, dim): 7 | super().__init__() 8 | self.dim = dim 9 | 10 | def forward(self, x): 11 | device = x.device 12 | half_dim = self.dim // 2 13 | emb = math.log(10000) / (half_dim - 1) 14 | emb = torch.exp(torch.arange(half_dim, device=device) * -emb) 15 | emb = x[:, None] * emb[None, :] 16 | emb = torch.cat((emb.sin(), emb.cos()), dim=-1) 17 | return emb 18 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/model/vision/model_getter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | def get_resnet(name, weights=None, **kwargs): 5 | """ 6 | name: resnet18, resnet34, resnet50 7 | weights: "IMAGENET1K_V1", "r3m" 8 | """ 9 | # load r3m weights 10 | if (weights == "r3m") or (weights == "R3M"): 11 | return get_r3m(name=name, **kwargs) 12 | 13 | func = getattr(torchvision.models, name) 14 | resnet = func(weights=weights, **kwargs) 15 | resnet.fc = torch.nn.Identity() 16 | return resnet 17 | 18 | def get_r3m(name, **kwargs): 19 | """ 20 | name: resnet18, resnet34, resnet50 21 | """ 22 | import r3m 23 | r3m.device = 'cpu' 24 | model = r3m.load_r3m(name) 25 | r3m_model = model.module 26 | resnet_model = r3m_model.convnet 27 | resnet_model = resnet_model.to('cpu') 28 | return resnet_model 29 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/policy/base_image_policy.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import torch 3 | import torch.nn as nn 4 | from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer 6 | 7 | class BaseImagePolicy(ModuleAttrMixin): 8 | # init accepts keyword argument shape_meta, see config/task/*_image.yaml 9 | 10 | def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 11 | """ 12 | obs_dict: 13 | str: B,To,* 14 | return: B,Ta,Da 15 | """ 16 | raise NotImplementedError() 17 | 18 | # reset state for stateful policies 19 | def reset(self): 20 | pass 21 | 22 | # ========== training =========== 23 | # no standard training interface except setting normalizer 24 | def set_normalizer(self, normalizer: LinearNormalizer): 25 | raise NotImplementedError() 26 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/policy/base_lowdim_policy.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import torch 3 | import torch.nn as nn 4 | from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer 6 | 7 | class BaseLowdimPolicy(ModuleAttrMixin): 8 | # ========= inference ============ 9 | # also as self.device and self.dtype for inference device transfer 10 | def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 11 | """ 12 | obs_dict: 13 | obs: B,To,Do 14 | return: 15 | action: B,Ta,Da 16 | To = 3 17 | Ta = 4 18 | T = 6 19 | |o|o|o| 20 | | | |a|a|a|a| 21 | |o|o| 22 | | |a|a|a|a|a| 23 | | | | | |a|a| 24 | """ 25 | raise NotImplementedError() 26 | 27 | # reset state for stateful policies 28 | def reset(self): 29 | pass 30 | 31 | # ========== training =========== 32 | # no standard training interface except setting normalizer 33 | def set_normalizer(self, normalizer: LinearNormalizer): 34 | raise NotImplementedError() 35 | 36 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/real_world/keystroke_counter.py: -------------------------------------------------------------------------------- 1 | from pynput.keyboard import Key, KeyCode, Listener 2 | from collections import defaultdict 3 | from threading import Lock 4 | 5 | class KeystrokeCounter(Listener): 6 | def __init__(self): 7 | self.key_count_map = defaultdict(lambda:0) 8 | self.key_press_list = list() 9 | self.lock = Lock() 10 | super().__init__(on_press=self.on_press, on_release=self.on_release) 11 | 12 | def on_press(self, key): 13 | with self.lock: 14 | self.key_count_map[key] += 1 15 | self.key_press_list.append(key) 16 | 17 | def on_release(self, key): 18 | pass 19 | 20 | def clear(self): 21 | with self.lock: 22 | self.key_count_map = defaultdict(lambda:0) 23 | self.key_press_list = list() 24 | 25 | def __getitem__(self, key): 26 | with self.lock: 27 | return self.key_count_map[key] 28 | 29 | def get_press_events(self): 30 | with self.lock: 31 | events = list(self.key_press_list) 32 | self.key_press_list = list() 33 | return events 34 | 35 | if __name__ == '__main__': 36 | import time 37 | with KeystrokeCounter() as counter: 38 | try: 39 | while True: 40 | print('Space:', counter[Key.space]) 41 | print('q:', counter[KeyCode(char='q')]) 42 | time.sleep(1/60) 43 | except KeyboardInterrupt: 44 | events = counter.get_press_events() 45 | print(events) 46 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/real_world/real_inference_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Callable, Tuple 2 | import numpy as np 3 | from diffusion_policy.common.cv2_util import get_image_transform 4 | 5 | def get_real_obs_dict( 6 | env_obs: Dict[str, np.ndarray], 7 | shape_meta: dict, 8 | ) -> Dict[str, np.ndarray]: 9 | obs_dict_np = dict() 10 | obs_shape_meta = shape_meta['obs'] 11 | for key, attr in obs_shape_meta.items(): 12 | type = attr.get('type', 'low_dim') 13 | shape = attr.get('shape') 14 | if type == 'rgb': 15 | this_imgs_in = env_obs[key] 16 | t,hi,wi,ci = this_imgs_in.shape 17 | co,ho,wo = shape 18 | assert ci == co 19 | out_imgs = this_imgs_in 20 | if (ho != hi) or (wo != wi) or (this_imgs_in.dtype == np.uint8): 21 | tf = get_image_transform( 22 | input_res=(wi,hi), 23 | output_res=(wo,ho), 24 | bgr_to_rgb=False) 25 | out_imgs = np.stack([tf(x) for x in this_imgs_in]) 26 | if this_imgs_in.dtype == np.uint8: 27 | out_imgs = out_imgs.astype(np.float32) / 255 28 | # THWC to TCHW 29 | obs_dict_np[key] = np.moveaxis(out_imgs,-1,1) 30 | elif type == 'low_dim': 31 | this_data_in = env_obs[key] 32 | if 'pose' in key and shape == (2,): 33 | # take X,Y coordinates 34 | this_data_in = this_data_in[...,[0,1]] 35 | obs_dict_np[key] = this_data_in 36 | return obs_dict_np 37 | 38 | 39 | def get_real_obs_resolution( 40 | shape_meta: dict 41 | ) -> Tuple[int, int]: 42 | out_res = None 43 | obs_shape_meta = shape_meta['obs'] 44 | for key, attr in obs_shape_meta.items(): 45 | type = attr.get('type', 'low_dim') 46 | shape = attr.get('shape') 47 | if type == 'rgb': 48 | co,ho,wo = shape 49 | if out_res is None: 50 | out_res = (wo, ho) 51 | assert out_res == (wo, ho) 52 | return out_res 53 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/scripts/bet_blockpush_conversion.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | 10 | import os 11 | import click 12 | import pathlib 13 | import numpy as np 14 | from diffusion_policy.common.replay_buffer import ReplayBuffer 15 | 16 | @click.command() 17 | @click.option('-i', '--input', required=True, help='input dir contains npy files') 18 | @click.option('-o', '--output', required=True, help='output zarr path') 19 | @click.option('--abs_action', is_flag=True, default=False) 20 | def main(input, output, abs_action): 21 | data_directory = pathlib.Path(input) 22 | observations = np.load( 23 | data_directory / "multimodal_push_observations.npy" 24 | ) 25 | actions = np.load(data_directory / "multimodal_push_actions.npy") 26 | masks = np.load(data_directory / "multimodal_push_masks.npy") 27 | 28 | buffer = ReplayBuffer.create_empty_numpy() 29 | for i in range(len(masks)): 30 | eps_len = int(masks[i].sum()) 31 | obs = observations[i,:eps_len].astype(np.float32) 32 | action = actions[i,:eps_len].astype(np.float32) 33 | if abs_action: 34 | prev_eef_target = obs[:,8:10] 35 | next_eef_target = prev_eef_target + action 36 | action = next_eef_target 37 | data = { 38 | 'obs': obs, 39 | 'action': action 40 | } 41 | buffer.add_episode(data) 42 | 43 | buffer.save_to_path(zarr_path=output, chunk_length=-1) 44 | 45 | if __name__ == '__main__': 46 | main() 47 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/scripts/blockpush_abs_conversion.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | import os 10 | import click 11 | import pathlib 12 | from diffusion_policy.common.replay_buffer import ReplayBuffer 13 | 14 | 15 | @click.command() 16 | @click.option('-i', '--input', required=True) 17 | @click.option('-o', '--output', required=True) 18 | @click.option('-t', '--target_eef_idx', default=8, type=int) 19 | def main(input, output, target_eef_idx): 20 | buffer = ReplayBuffer.copy_from_path(input) 21 | obs = buffer['obs'] 22 | action = buffer['action'] 23 | prev_eef_target = obs[:,target_eef_idx:target_eef_idx+action.shape[1]] 24 | next_eef_target = prev_eef_target + action 25 | action[:] = next_eef_target 26 | buffer.save_to_path(zarr_path=output, chunk_length=-1) 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/scripts/episode_lengths.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | import click 10 | import numpy as np 11 | import json 12 | from diffusion_policy.common.replay_buffer import ReplayBuffer 13 | 14 | @click.command() 15 | @click.option('--input', '-i', required=True) 16 | @click.option('--dt', default=0.1, type=float) 17 | def main(input, dt): 18 | buffer = ReplayBuffer.create_from_path(input) 19 | lengths = buffer.episode_lengths 20 | durations = lengths * dt 21 | result = { 22 | 'duration/mean': np.mean(durations) 23 | } 24 | 25 | text = json.dumps(result, indent=2) 26 | print(text) 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/scripts/robomimic_dataset_action_comparison.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | import os 10 | import click 11 | import pathlib 12 | import h5py 13 | import numpy as np 14 | from tqdm import tqdm 15 | from scipy.spatial.transform import Rotation 16 | 17 | def read_all_actions(hdf5_file, metric_skip_steps=1): 18 | n_demos = len(hdf5_file['data']) 19 | all_actions = list() 20 | for i in tqdm(range(n_demos)): 21 | actions = hdf5_file[f'data/demo_{i}/actions'][:] 22 | all_actions.append(actions[metric_skip_steps:]) 23 | all_actions = np.concatenate(all_actions, axis=0) 24 | return all_actions 25 | 26 | 27 | @click.command() 28 | @click.option('-i', '--input', required=True, help='input hdf5 path') 29 | @click.option('-o', '--output', required=True, help='output hdf5 path. Parent directory must exist') 30 | def main(input, output): 31 | # process inputs 32 | input = pathlib.Path(input).expanduser() 33 | assert input.is_file() 34 | output = pathlib.Path(output).expanduser() 35 | assert output.is_file() 36 | 37 | input_file = h5py.File(str(input), 'r') 38 | output_file = h5py.File(str(output), 'r') 39 | 40 | input_all_actions = read_all_actions(input_file) 41 | output_all_actions = read_all_actions(output_file) 42 | pos_dist = np.linalg.norm(input_all_actions[:,:3] - output_all_actions[:,:3], axis=-1) 43 | rot_dist = (Rotation.from_rotvec(input_all_actions[:,3:6] 44 | ) * Rotation.from_rotvec(output_all_actions[:,3:6]).inv() 45 | ).magnitude() 46 | 47 | print(f'max pos dist: {pos_dist.max()}') 48 | print(f'max rot dist: {rot_dist.max()}') 49 | 50 | if __name__ == "__main__": 51 | main() 52 | -------------------------------------------------------------------------------- /diffusion_policy/diffusion_policy/shared_memory/shared_memory_util.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | from dataclasses import dataclass 3 | import numpy as np 4 | from multiprocessing.managers import SharedMemoryManager 5 | from atomics import atomicview, MemoryOrder, UINT 6 | 7 | @dataclass 8 | class ArraySpec: 9 | name: str 10 | shape: Tuple[int] 11 | dtype: np.dtype 12 | 13 | 14 | class SharedAtomicCounter: 15 | def __init__(self, 16 | shm_manager: SharedMemoryManager, 17 | size :int=8 # 64bit int 18 | ): 19 | shm = shm_manager.SharedMemory(size=size) 20 | self.shm = shm 21 | self.size = size 22 | self.store(0) # initialize 23 | 24 | @property 25 | def buf(self): 26 | return self.shm.buf[:self.size] 27 | 28 | def load(self) -> int: 29 | with atomicview(buffer=self.buf, atype=UINT) as a: 30 | value = a.load(order=MemoryOrder.ACQUIRE) 31 | return value 32 | 33 | def store(self, value: int): 34 | with atomicview(buffer=self.buf, atype=UINT) as a: 35 | a.store(value, order=MemoryOrder.RELEASE) 36 | 37 | def add(self, value: int): 38 | with atomicview(buffer=self.buf, atype=UINT) as a: 39 | a.add(value, order=MemoryOrder.ACQ_REL) 40 | -------------------------------------------------------------------------------- /diffusion_policy/pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "exclude": [ 3 | "data/**", 4 | "data_local/**", 5 | "outputs/**" 6 | ] 7 | } -------------------------------------------------------------------------------- /diffusion_policy/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name = 'diffusion_policy', 5 | packages = find_packages(), 6 | ) 7 | -------------------------------------------------------------------------------- /diffusion_policy/tests/test_block_pushing.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | ROOT_DIR = os.path.dirname(os.path.dirname(__file__)) 5 | sys.path.append(ROOT_DIR) 6 | os.chdir(ROOT_DIR) 7 | 8 | from diffusion_policy.env.block_pushing.block_pushing_multimodal import BlockPushMultimodal 9 | from gym.wrappers import FlattenObservation 10 | from diffusion_policy.gym_util.multistep_wrapper import MultiStepWrapper 11 | from diffusion_policy.gym_util.video_wrapper import VideoWrapper 12 | 13 | def test(): 14 | env = MultiStepWrapper( 15 | VideoWrapper( 16 | FlattenObservation( 17 | BlockPushMultimodal() 18 | ), 19 | enabled=True, 20 | steps_per_render=2 21 | ), 22 | n_obs_steps=2, 23 | n_action_steps=8, 24 | max_episode_steps=16 25 | ) 26 | env = BlockPushMultimodal() 27 | obs = env.reset() 28 | import pdb; pdb.set_trace() 29 | 30 | env = FlattenObservation(BlockPushMultimodal()) 31 | obs = env.reset() 32 | action = env.action_space.sample() 33 | next_obs, reward, done, info = env.step(action) 34 | print(obs[8:10] + action - next_obs[8:10]) 35 | import pdb; pdb.set_trace() 36 | 37 | for i in range(3): 38 | obs, reward, done, info = env.step(env.action_space.sample()) 39 | img = env.render() 40 | import pdb; pdb.set_trace() 41 | print("Done!", done) 42 | 43 | if __name__ == '__main__': 44 | test() 45 | -------------------------------------------------------------------------------- /diffusion_policy/tests/test_cv2_util.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | ROOT_DIR = os.path.dirname(os.path.dirname(__file__)) 5 | sys.path.append(ROOT_DIR) 6 | os.chdir(ROOT_DIR) 7 | 8 | import numpy as np 9 | from diffusion_policy.common.cv2_util import get_image_transform 10 | 11 | 12 | def test(): 13 | tf = get_image_transform((1280,720), (640,480), bgr_to_rgb=False) 14 | in_img = np.zeros((720,1280,3), dtype=np.uint8) 15 | out_img = tf(in_img) 16 | # print(out_img.shape) 17 | assert out_img.shape == (480,640,3) 18 | 19 | # import pdb; pdb.set_trace() 20 | 21 | if __name__ == '__main__': 22 | test() 23 | -------------------------------------------------------------------------------- /diffusion_policy/tests/test_precise_sleep.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | ROOT_DIR = os.path.dirname(os.path.dirname(__file__)) 5 | sys.path.append(ROOT_DIR) 6 | os.chdir(ROOT_DIR) 7 | 8 | import time 9 | import numpy as np 10 | from diffusion_policy.common.precise_sleep import precise_sleep, precise_wait 11 | 12 | 13 | def test_sleep(): 14 | dt = 0.1 15 | tol = 1e-3 16 | time_samples = list() 17 | for i in range(100): 18 | precise_sleep(dt) 19 | # time.sleep(dt) 20 | time_samples.append(time.monotonic()) 21 | time_deltas = np.diff(time_samples) 22 | 23 | from matplotlib import pyplot as plt 24 | plt.plot(time_deltas) 25 | plt.ylim((dt-tol,dt+tol)) 26 | 27 | 28 | def test_wait(): 29 | dt = 0.1 30 | tol = 1e-3 31 | errors = list() 32 | t_start = time.monotonic() 33 | for i in range(1,100): 34 | t_end_desired = t_start + i * dt 35 | time.sleep(t_end_desired - time.monotonic()) 36 | t_end = time.monotonic() 37 | errors.append(t_end - t_end_desired) 38 | 39 | new_errors = list() 40 | t_start = time.monotonic() 41 | for i in range(1,100): 42 | t_end_desired = t_start + i * dt 43 | precise_wait(t_end_desired) 44 | t_end = time.monotonic() 45 | new_errors.append(t_end - t_end_desired) 46 | 47 | from matplotlib import pyplot as plt 48 | plt.plot(errors, label='time.sleep') 49 | plt.plot(new_errors, label='sleep/spin hybrid') 50 | plt.ylim((-tol,+tol)) 51 | plt.title('0.1 sec sleep error') 52 | plt.legend() 53 | 54 | 55 | if __name__ == '__main__': 56 | test_sleep() 57 | -------------------------------------------------------------------------------- /diffusion_policy/tests/test_replay_buffer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | ROOT_DIR = os.path.dirname(os.path.dirname(__file__)) 5 | sys.path.append(ROOT_DIR) 6 | os.chdir(ROOT_DIR) 7 | 8 | import zarr 9 | from diffusion_policy.common.replay_buffer import ReplayBuffer 10 | 11 | def test(): 12 | import numpy as np 13 | buff = ReplayBuffer.create_empty_numpy() 14 | buff.add_episode({ 15 | 'obs': np.zeros((100,10), dtype=np.float16) 16 | }) 17 | buff.add_episode({ 18 | 'obs': np.ones((50,10)), 19 | 'action': np.ones((50,2)) 20 | }) 21 | # buff.rechunk(256) 22 | obs = buff.get_episode(0) 23 | 24 | import numpy as np 25 | buff = ReplayBuffer.create_empty_zarr() 26 | buff.add_episode({ 27 | 'obs': np.zeros((100,10), dtype=np.float16) 28 | }) 29 | buff.add_episode({ 30 | 'obs': np.ones((50,10)), 31 | 'action': np.ones((50,2)) 32 | }) 33 | obs = buff.get_episode(0) 34 | buff.set_chunks({ 35 | 'obs': (100,10), 36 | 'action': (100,2) 37 | }) 38 | 39 | 40 | def test_real(): 41 | import os 42 | dist_group = zarr.open( 43 | os.path.expanduser('~/dev/diffusion_policy/data/pusht/pusht_cchi_v2.zarr'), 'r') 44 | 45 | buff = ReplayBuffer.create_empty_numpy() 46 | key, group = next(iter(dist_group.items())) 47 | for key, group in dist_group.items(): 48 | buff.add_episode(group) 49 | 50 | # out_path = os.path.expanduser('~/dev/diffusion_policy/data/pusht_cchi2_v2_replay.zarr') 51 | out_path = os.path.expanduser('~/dev/diffusion_policy/data/test.zarr') 52 | out_store = zarr.DirectoryStore(out_path) 53 | buff.save_to_store(out_store) 54 | 55 | buff = ReplayBuffer.copy_from_path(out_path, store=zarr.MemoryStore()) 56 | buff.pop_episode() 57 | 58 | 59 | def test_pop(): 60 | buff = ReplayBuffer.create_from_path( 61 | '/home/chengchi/dev/diffusion_policy/data/pusht_cchi_v3_replay.zarr', 62 | mode='rw') 63 | -------------------------------------------------------------------------------- /diffusion_policy/tests/test_robomimic_image_runner.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | ROOT_DIR = os.path.dirname(os.path.dirname(__file__)) 5 | sys.path.append(ROOT_DIR) 6 | os.chdir(ROOT_DIR) 7 | 8 | from diffusion_policy.env_runner.robomimic_image_runner import RobomimicImageRunner 9 | 10 | def test(): 11 | import os 12 | from omegaconf import OmegaConf 13 | cfg_path = os.path.expanduser('~/dev/diffusion_policy/diffusion_policy/config/task/lift_image.yaml') 14 | cfg = OmegaConf.load(cfg_path) 15 | cfg['n_obs_steps'] = 1 16 | cfg['n_action_steps'] = 1 17 | cfg['past_action_visible'] = False 18 | runner_cfg = cfg['env_runner'] 19 | runner_cfg['n_train'] = 1 20 | runner_cfg['n_test'] = 1 21 | del runner_cfg['_target_'] 22 | runner = RobomimicImageRunner( 23 | **runner_cfg, 24 | output_dir='/tmp/test') 25 | 26 | # import pdb; pdb.set_trace() 27 | 28 | self = runner 29 | env = self.env 30 | env.seed(seeds=self.env_seeds) 31 | obs = env.reset() 32 | for i in range(10): 33 | _ = env.step(env.action_space.sample()) 34 | 35 | imgs = env.render() 36 | 37 | if __name__ == '__main__': 38 | test() 39 | -------------------------------------------------------------------------------- /diffusion_policy/tests/test_robomimic_lowdim_runner.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | ROOT_DIR = os.path.dirname(os.path.dirname(__file__)) 5 | sys.path.append(ROOT_DIR) 6 | os.chdir(ROOT_DIR) 7 | 8 | from diffusion_policy.env_runner.robomimic_lowdim_runner import RobomimicLowdimRunner 9 | 10 | def test(): 11 | import os 12 | from omegaconf import OmegaConf 13 | cfg_path = os.path.expanduser('~/dev/diffusion_policy/diffusion_policy/config/task/lift_lowdim.yaml') 14 | cfg = OmegaConf.load(cfg_path) 15 | cfg['n_obs_steps'] = 1 16 | cfg['n_action_steps'] = 1 17 | cfg['past_action_visible'] = False 18 | runner_cfg = cfg['env_runner'] 19 | runner_cfg['n_train'] = 1 20 | runner_cfg['n_test'] = 0 21 | del runner_cfg['_target_'] 22 | runner = RobomimicLowdimRunner( 23 | **runner_cfg, 24 | output_dir='/tmp/test') 25 | 26 | # import pdb; pdb.set_trace() 27 | 28 | self = runner 29 | env = self.env 30 | env.seed(seeds=self.env_seeds) 31 | obs = env.reset() 32 | 33 | if __name__ == '__main__': 34 | test() 35 | -------------------------------------------------------------------------------- /diffusion_policy/tests/test_shared_queue.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | ROOT_DIR = os.path.dirname(os.path.dirname(__file__)) 5 | sys.path.append(ROOT_DIR) 6 | os.chdir(ROOT_DIR) 7 | 8 | import numpy as np 9 | from multiprocessing.managers import SharedMemoryManager 10 | from diffusion_policy.shared_memory.shared_memory_queue import SharedMemoryQueue, Full, Empty 11 | 12 | 13 | def test(): 14 | shm_manager = SharedMemoryManager() 15 | shm_manager.start() 16 | example = { 17 | 'cmd': 0, 18 | 'pose': np.zeros((6,)) 19 | } 20 | queue = SharedMemoryQueue.create_from_examples( 21 | shm_manager=shm_manager, 22 | examples=example, 23 | buffer_size=3 24 | ) 25 | raised = False 26 | try: 27 | queue.get() 28 | except Empty: 29 | raised = True 30 | assert raised 31 | 32 | data = { 33 | 'cmd': 1, 34 | 'pose': np.ones((6,)) 35 | } 36 | queue.put(data) 37 | result = queue.get() 38 | assert result['cmd'] == data['cmd'] 39 | assert np.allclose(result['pose'], data['pose']) 40 | 41 | queue.put(data) 42 | queue.put(data) 43 | queue.put(data) 44 | assert queue.qsize() == 3 45 | raised = False 46 | try: 47 | queue.put(data) 48 | except Full: 49 | raised = True 50 | assert raised 51 | 52 | result = queue.get_all() 53 | assert np.allclose(result['cmd'], [1,1,1]) 54 | 55 | queue.put({'cmd': 0}) 56 | queue.put({'cmd': 1}) 57 | queue.put({'cmd': 2}) 58 | queue.get() 59 | queue.put({'cmd': 3}) 60 | 61 | result = queue.get_k(3) 62 | assert np.allclose(result['cmd'], [1,2,3]) 63 | 64 | queue.clear() 65 | 66 | if __name__ == "__main__": 67 | test() 68 | -------------------------------------------------------------------------------- /diffusion_policy/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Usage: 3 | Training: 4 | python train.py --config-name=train_diffusion_lowdim_workspace 5 | """ 6 | 7 | import sys 8 | # use line-buffering for both stdout and stderr 9 | sys.stdout = open(sys.stdout.fileno(), mode='w', buffering=1) 10 | sys.stderr = open(sys.stderr.fileno(), mode='w', buffering=1) 11 | 12 | import hydra 13 | from omegaconf import OmegaConf 14 | import pathlib 15 | from diffusion_policy.workspace.base_workspace import BaseWorkspace 16 | 17 | # allows arbitrary python code execution in configs using the ${eval:''} resolver 18 | OmegaConf.register_new_resolver("eval", eval, replace=True) 19 | 20 | @hydra.main( 21 | version_base=None, 22 | config_path=str(pathlib.Path(__file__).parent.joinpath( 23 | 'diffusion_policy','config')) 24 | ) 25 | def main(cfg: OmegaConf): 26 | # resolve immediately so all the ${now:} resolvers 27 | # will use the same time. 28 | OmegaConf.resolve(cfg) 29 | 30 | cls = hydra.utils.get_class(cfg._target_) 31 | workspace: BaseWorkspace = cls(cfg) 32 | workspace.run() 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /habitat_extensions/__init__.py: -------------------------------------------------------------------------------- 1 | from habitat_extensions import measures, obs_transformers, sensors, nav 2 | from habitat_extensions.config.default import get_extended_config 3 | from habitat_extensions.task import VLNCEDatasetV1 4 | from habitat_extensions.habitat_simulator import Simulator 5 | -------------------------------------------------------------------------------- /habitat_extensions/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/habitat_extensions/config/__init__.py -------------------------------------------------------------------------------- /habitat_extensions/config/r2r_vlnce.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 20 3 | 4 | SIMULATOR: 5 | ACTION_SPACE_CONFIG: v0 6 | AGENT_0: 7 | SENSORS: [RGB_SENSOR, DEPTH_SENSOR] 8 | FORWARD_STEP_SIZE: 0.25 9 | TURN_ANGLE: 15 10 | HABITAT_SIM_V0: 11 | GPU_DEVICE_ID: 0 12 | ALLOW_SLIDING: True 13 | RGB_SENSOR: 14 | WIDTH: 224 15 | HEIGHT: 224 16 | HFOV: 90 17 | TYPE: HabitatSimRGBSensor 18 | DEPTH_SENSOR: 19 | WIDTH: 256 # pretrained DDPPO resnet needs 256x256 20 | HEIGHT: 256 21 | HFOV: 90 22 | TYPE: 23 | Sim-v1 24 | 25 | TASK: 26 | TYPE: VLN-v0 27 | SUCCESS_DISTANCE: 3.0 28 | SENSORS: [ 29 | INSTRUCTION_SENSOR, 30 | # SHORTEST_PATH_SENSOR, 31 | # VLN_ORACLE_PROGRESS_SENSOR 32 | ] 33 | INSTRUCTION_SENSOR_UUID: instruction 34 | POSSIBLE_ACTIONS: [STOP, MOVE_FORWARD, TURN_LEFT, TURN_RIGHT, HIGHTOLOW] 35 | MEASUREMENTS: [ 36 | # DISTANCE_TO_GOAL, 37 | # SUCCESS, 38 | # SPL, 39 | # NDTW, 40 | # PATH_LENGTH, 41 | # ORACLE_SUCCESS, 42 | # STEPS_TAKEN 43 | ] 44 | SUCCESS: 45 | SUCCESS_DISTANCE: 3.0 46 | SPL: 47 | SUCCESS_DISTANCE: 3.0 48 | NDTW: 49 | SUCCESS_DISTANCE: 3.0 50 | GT_PATH: data/datasets/R2R_VLNCE_v1-2_preprocessed/{split}/{split}_gt.json.gz 51 | SDTW: 52 | SUCCESS_DISTANCE: 3.0 53 | GT_PATH: data/datasets/R2R_VLNCE_v1-2_preprocessed/{split}/{split}_gt.json.gz 54 | ORACLE_SUCCESS: 55 | SUCCESS_DISTANCE: 3.0 56 | DATASET: 57 | TYPE: VLN-CE-v1 58 | SPLIT: train 59 | DATA_PATH: data/datasets/R2R_VLNCE_v1-2_preprocessed_BERTidx/{split}/{split}_bertidx.json.gz 60 | SCENES_DIR: data/scene_datasets/ 61 | -------------------------------------------------------------------------------- /run_r2r/iter_train.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: run_r2r/r2r_vlnce.yaml 2 | SIMULATOR_GPU_IDS: [0] 3 | TORCH_GPU_ID: 0 4 | TORCH_GPU_IDS: [0] 5 | TRAINER_NAME: SS-ETP-DP 6 | GPU_NUMBERS: 1 7 | NUM_ENVIRONMENTS: 1 8 | TENSORBOARD_DIR: data/logs/tensorboard_dirs/ 9 | CHECKPOINT_FOLDER: data/logs/checkpoints/ 10 | EVAL_CKPT_PATH_DIR: data/logs/checkpoints/ 11 | RESULTS_DIR: data/logs/eval_results/ 12 | VIDEO_DIR: data/logs/video/ 13 | VIDEO_OPTION: [] # disk 14 | 15 | 16 | EVAL: 17 | USE_CKPT_CONFIG: False 18 | SPLIT: '' 19 | EPISODE_COUNT: -1 20 | CKPT_PATH_DIR: '' 21 | fast_eval: False 22 | 23 | IL: 24 | iters: 15000 25 | log_every: 200 26 | lr: 1e-5 27 | batch_size: 1 # equal to NUM_ENVIRONMENTS 28 | ml_weight: 1.0 29 | # expert_policy: spl 30 | expert_policy: ndtw 31 | 32 | sample_ratio: 1.0 33 | decay_interval: 3000 34 | 35 | max_traj_len: 75 # Diffusion policy REF 36 | max_text_len: 80 37 | loc_noise: 0.5 38 | tryout: True 39 | 40 | MODEL: 41 | task_type: r2r 42 | 43 | USE_NDTW: True 44 | 45 | policy_name: PolicyViewSelectionETPDP 46 | NUM_ANGLES: 12 47 | fix_lang_embedding: False 48 | fix_pano_embedding: False 49 | use_depth_embedding: True 50 | use_sprels: True 51 | 52 | spatial_output: False 53 | RGB_ENCODER: 54 | output_size: 512 55 | DEPTH_ENCODER: 56 | output_size: 256 57 | VISUAL_DIM: 58 | vis_hidden: 768 59 | directional: 128 60 | INSTRUCTION_ENCODER: 61 | bidirectional: True 62 | 63 | DP: 64 | model_config: vlnce_baselines/models/dp/run_pt/r2r_model_config_dep.json 65 | default_config: vlnce_baselines/models/dp/config/defaults.yaml 66 | nomad_config: vlnce_baselines/models/dp/config/nomad.yaml 67 | scene: '' 68 | 69 | -------------------------------------------------------------------------------- /run_r2r/main.bash: -------------------------------------------------------------------------------- 1 | export GLOG_minloglevel=2 2 | export MAGNUM_LOG=quiet 3 | 4 | scene=$1 5 | 6 | flag1="--exp_name release_r2r 7 | --run-type train 8 | --exp-config run_r2r/iter_train.yaml 9 | SIMULATOR_GPU_IDS [0] 10 | TORCH_GPU_IDS [0] 11 | GPU_NUMBERS 1 12 | NUM_ENVIRONMENTS 1 13 | IL.iters 660 14 | IL.lr 1e-5 15 | IL.log_every 500 16 | IL.ml_weight 1.0 17 | IL.sample_ratio 0.5 18 | IL.decay_interval 4000 19 | IL.load_from_ckpt False 20 | IL.is_requeue True 21 | IL.waypoint_aug True 22 | TASK_CONFIG.SIMULATOR.HABITAT_SIM_V0.ALLOW_SLIDING True 23 | MODEL.pretrained_path pretrained/ETP/mlm.sap_r2r/ckpts/model_step_82500.pt 24 | " 25 | 26 | flag2=" --exp_name release_r2r 27 | --run-type eval 28 | --exp-config run_r2r/iter_train.yaml 29 | SIMULATOR_GPU_IDS [0] 30 | TORCH_GPU_IDS [0] 31 | GPU_NUMBERS 1 32 | NUM_ENVIRONMENTS 1 33 | TASK_CONFIG.SIMULATOR.HABITAT_SIM_V0.ALLOW_SLIDING True 34 | EVAL.SPLIT val_seen 35 | EVAL.CKPT_PATH_DIR data/checkpoints/open_area.pth 36 | MODEL.DP.scene $scene 37 | " 38 | 39 | flag3="--exp_name release_r2r 40 | --run-type inference 41 | --exp-config run_r2r/iter_train.yaml 42 | SIMULATOR_GPU_IDS [0,1] 43 | TORCH_GPU_IDS [0,1] 44 | GPU_NUMBERS 1 45 | NUM_ENVIRONMENTS 8 46 | TASK_CONFIG.SIMULATOR.HABITAT_SIM_V0.ALLOW_SLIDING True 47 | INFERENCE.CKPT_PATH data/logs/checkpoints/release_r2r/ckpt.iter12000.pth 48 | INFERENCE.PREDICTIONS_FILE preds.json 49 | IL.back_algo control 50 | " 51 | 52 | mode=$2 53 | port=$3 54 | 55 | echo $mode 56 | case $mode in 57 | train) 58 | echo "###### train mode ######" 59 | torchrun --master_port=$port run.py $flag1 60 | ;; 61 | eval) 62 | echo "###### eval mode ######" 63 | python run.py $flag2 64 | ;; 65 | infer) 66 | echo "###### infer mode ######" 67 | python run.py $flag3 68 | ;; 69 | esac -------------------------------------------------------------------------------- /run_r2r/r2r_vlnce.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 5000 3 | 4 | SIMULATOR: 5 | ACTION_SPACE_CONFIG: v0 6 | AGENT_0: 7 | SENSORS: [RGB_SENSOR, DEPTH_SENSOR] 8 | FORWARD_STEP_SIZE: 0.25 9 | TURN_ANGLE: 15 10 | HABITAT_SIM_V0: 11 | GPU_DEVICE_ID: 0 12 | ALLOW_SLIDING: True 13 | # ALLOW_SLIDING: False 14 | RGB_SENSOR: 15 | WIDTH: 224 16 | HEIGHT: 224 17 | HFOV: 90 18 | TYPE: HabitatSimRGBSensor 19 | DEPTH_SENSOR: 20 | WIDTH: 256 # pretrained DDPPO resnet needs 256x256 21 | HEIGHT: 256 22 | HFOV: 90 23 | TYPE: 24 | Sim-v1 25 | 26 | TASK: 27 | TYPE: VLN-v0 28 | POSSIBLE_ACTIONS: [STOP, MOVE_FORWARD, TURN_LEFT, TURN_RIGHT, HIGHTOLOW] 29 | SUCCESS_DISTANCE: 3.0 30 | SENSORS: [ 31 | INSTRUCTION_SENSOR, 32 | # SHORTEST_PATH_SENSOR, 33 | # VLN_ORACLE_PROGRESS_SENSOR 34 | ] 35 | INSTRUCTION_SENSOR_UUID: instruction 36 | MEASUREMENTS: [ 37 | # DISTANCE_TO_GOAL, 38 | # SUCCESS, 39 | # SPL, 40 | # NDTW, 41 | # PATH_LENGTH, 42 | # ORACLE_SUCCESS, 43 | # STEPS_TAKEN 44 | ] 45 | SUCCESS: 46 | SUCCESS_DISTANCE: 3.0 47 | SPL: 48 | SUCCESS_DISTANCE: 3.0 49 | NDTW: 50 | SUCCESS_DISTANCE: 3.0 51 | GT_PATH: data/datasets/R2R_VLNCE_v1-2_preprocessed/{split}/{split}_gt.json.gz 52 | SDTW: 53 | SUCCESS_DISTANCE: 3.0 54 | GT_PATH: data/datasets/R2R_VLNCE_v1-2_preprocessed/{split}/{split}_gt.json.gz 55 | ORACLE_SUCCESS: 56 | SUCCESS_DISTANCE: 3.0 57 | 58 | DATASET: 59 | TYPE: VLN-CE-v1 60 | # SPLIT: train 61 | Scene: open_area 62 | DATA_PATH: data/datasets/R2R_VLNCE_v1-2_preprocessed_BERTidx/scene/open_area_bertidx.json.gz 63 | SCENES_DIR: data/scene_datasets/ 64 | 65 | TOP_DOWN_MAP_VLNCE: 66 | DRAW_REFERENCE_PATH: True 67 | -------------------------------------------------------------------------------- /vlnce_baselines/__init__.py: -------------------------------------------------------------------------------- 1 | from vlnce_baselines import ss_trainer_ETP_DP 2 | from vlnce_baselines.common import environments_dp 3 | from vlnce_baselines.models import ( 4 | Policy_ViewSelection_ETP_DP, 5 | ) 6 | -------------------------------------------------------------------------------- /vlnce_baselines/common/aux_losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class _AuxLosses: 5 | def __init__(self): 6 | self._losses = {} 7 | self._loss_alphas = {} 8 | self._is_active = False 9 | 10 | def clear(self): 11 | self._losses.clear() 12 | self._loss_alphas.clear() 13 | 14 | def register_loss(self, name, loss, alpha=1.0): 15 | assert self.is_active() 16 | assert name not in self._losses 17 | 18 | self._losses[name] = loss 19 | self._loss_alphas[name] = alpha 20 | 21 | def get_loss(self, name): 22 | return self._losses[name] 23 | 24 | def reduce(self, mask): 25 | assert self.is_active() 26 | total = torch.tensor(0.0).cuda() 27 | 28 | for k in self._losses.keys(): 29 | k_loss = torch.masked_select(self._losses[k], mask).mean() 30 | total = total + self._loss_alphas[k] * k_loss 31 | 32 | return total 33 | 34 | def is_active(self): 35 | return self._is_active 36 | 37 | def activate(self): 38 | self._is_active = True 39 | 40 | def deactivate(self): 41 | self._is_active = False 42 | 43 | 44 | AuxLosses = _AuxLosses() 45 | -------------------------------------------------------------------------------- /vlnce_baselines/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/vlnce_baselines/config/__init__.py -------------------------------------------------------------------------------- /vlnce_baselines/config/nonlearning.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | EVAL: 3 | SPLIT: val_unseen 4 | # any num greater than the actual episode count evals every episode 5 | EPISODE_COUNT: 10 6 | EVAL_NONLEARNING: True 7 | NONLEARNING: 8 | # RandomAgent or HandcraftedAgent 9 | AGENT: RandomAgent 10 | 11 | INFERENCE: 12 | SPLIT: val_unseen 13 | PREDICTIONS_FILE: predictions.json 14 | INFERENCE_NONLEARNING: True 15 | NONLEARNING: 16 | # RandomAgent or HandcraftedAgent 17 | AGENT: "RandomAgent" 18 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | TRAINER_NAME: dagger # recollect_trainer 3 | SIMULATOR_GPU_IDS: [0] 4 | TORCH_GPU_ID: 0 5 | GPU_NUMBERS: 1 6 | NUM_ENVIRONMENTS: 1 7 | TENSORBOARD_DIR: data/tensorboard_dirs/cma 8 | CHECKPOINT_FOLDER: data/checkpoints/cma 9 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma 10 | 11 | EVAL: 12 | USE_CKPT_CONFIG: False 13 | SPLIT: val_unseen 14 | EPISODE_COUNT: -1 15 | 16 | IL: 17 | epochs: 45 18 | batch_size: 5 19 | 20 | RECOLLECT_TRAINER: 21 | gt_file: 22 | data/datasets/R2R_VLNCE_v1-2_preprocessed/{split}/{split}_gt.json.gz 23 | 24 | DAGGER: 25 | iterations: 1 26 | update_size: 10819 27 | p: 1.0 28 | preload_lmdb_features: False 29 | lmdb_features_dir: data/trajectories_dirs/cma/trajectories.lmdb 30 | 31 | MODEL: 32 | policy_name: CMAPolicy 33 | 34 | INSTRUCTION_ENCODER: 35 | bidirectional: True 36 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_aug.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task_aug.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_aug 6 | CHECKPOINT_FOLDER: data/checkpoints/cma_aug 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_aug 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 45 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 1 20 | update_size: 157232 21 | p: 1.0 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/cma_aug/trajectories.lmdb 24 | 25 | MODEL: 26 | policy_name: CMAPolicy 27 | 28 | INSTRUCTION_ENCODER: 29 | bidirectional: True 30 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_aug_tune.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_aug_tune 6 | CHECKPOINT_FOLDER: data/checkpoints/cma_aug_tune 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_aug_tune 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 45 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 1 20 | update_size: 10819 21 | p: 1.0 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/cma/trajectories.lmdb 24 | load_from_ckpt: True 25 | ckpt_to_load: data/checkpoints/cma_aug/best_checkpoint.pth # REPLACE 26 | 27 | MODEL: 28 | policy_name: CMAPolicy 29 | 30 | INSTRUCTION_ENCODER: 31 | bidirectional: True 32 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_da.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_IDS: [0] 3 | TORCH_GPU_ID: 0 4 | TORCH_GPU_IDS: [0] 5 | GPU_NUMBERS: 1 6 | NUM_ENVIRONMENTS: 1 7 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_da 8 | CHECKPOINT_FOLDER: data/checkpoints/cma_da 9 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_da 10 | 11 | EVAL: 12 | USE_CKPT_CONFIG: False 13 | SPLIT: val_unseen 14 | EPISODE_COUNT: -1 15 | 16 | IL: 17 | epochs: 4 18 | batch_size: 5 19 | 20 | DAGGER: 21 | iterations: 10 22 | update_size: 5000 23 | p: 0.75 24 | preload_lmdb_features: False 25 | lmdb_features_dir: data/trajectories_dirs/cma_da/trajectories.lmdb 26 | 27 | MODEL: 28 | policy_name: CMAPolicy 29 | 30 | INSTRUCTION_ENCODER: 31 | bidirectional: True 32 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_da_aug_tune.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_da_aug_tune 6 | CHECKPOINT_FOLDER: data/checkpoints/cma_da_aug_tune 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_da_aug_tune 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 4 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 10 20 | update_size: 5000 21 | p: 0.5 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/cma_da_aug_tune/trajectories.lmdb 24 | load_from_ckpt: True 25 | ckpt_to_load: data/checkpoints/cma_aug_tune/best_checkpoint.pth # REPLACE 26 | 27 | MODEL: 28 | policy_name: CMAPolicy 29 | 30 | INSTRUCTION_ENCODER: 31 | bidirectional: True 32 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_pm.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_pm 6 | CHECKPOINT_FOLDER: data/checkpoints/cma_pm 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_pm 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 45 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 1 20 | update_size: 10819 21 | p: 1.0 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/cma/trajectories.lmdb 24 | 25 | MODEL: 26 | policy_name: CMAPolicy 27 | 28 | INSTRUCTION_ENCODER: 29 | bidirectional: True 30 | 31 | PROGRESS_MONITOR: 32 | use: True 33 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_pm_aug.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task_aug.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_pm_aug 6 | CHECKPOINT_FOLDER: data/checkpoints/cma_pm_aug 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_pm_aug 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 45 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 1 20 | update_size: 157232 21 | p: 1.0 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/cma_aug/trajectories.lmdb 24 | 25 | MODEL: 26 | policy_name: CMAPolicy 27 | 28 | INSTRUCTION_ENCODER: 29 | bidirectional: True 30 | 31 | PROGRESS_MONITOR: 32 | use: True 33 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_pm_aug_tune.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_pm_aug_tune 6 | CHECKPOINT_FOLDER: data/checkpoints/cma_pm_aug_tune 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_pm_aug_tune 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 45 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 1 20 | update_size: 10819 21 | p: 1.0 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/cma/trajectories.lmdb 24 | load_from_ckpt: True 25 | ckpt_to_load: data/checkpoints/cma_pm_aug/best_checkpoint.pth # REPLACE 26 | 27 | MODEL: 28 | policy_name: CMAPolicy 29 | 30 | INSTRUCTION_ENCODER: 31 | bidirectional: True 32 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_pm_da.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_pm_da 6 | CHECKPOINT_FOLDER: data/checkpoints/cma_pm_da 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_pm_da 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 4 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 10 20 | update_size: 5000 21 | p: 0.75 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/cma_pm_da/trajectories.lmdb 24 | 25 | MODEL: 26 | policy_name: CMAPolicy 27 | 28 | INSTRUCTION_ENCODER: 29 | bidirectional: True 30 | 31 | PROGRESS_MONITOR: 32 | use: True 33 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_pm_da_aug_tune.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_pm_da_aug_tune 6 | CHECKPOINT_FOLDER: data/checkpoints/cma_pm_da_aug_tune 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_pm_da_aug_tune 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 4 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 10 20 | update_size: 5000 21 | p: 0.5 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/cma_pm_da_aug_tune/trajectories.lmdb 24 | load_from_ckpt: True 25 | ckpt_to_load: data/checkpoints/cma_pm_aug/best_checkpoint.pth # REPLACE 26 | 27 | MODEL: 28 | policy_name: CMAPolicy 29 | 30 | INSTRUCTION_ENCODER: 31 | bidirectional: True 32 | 33 | PROGRESS_MONITOR: 34 | use: True 35 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_sf.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_IDS: [0] 3 | TORCH_GPU_ID: 0 4 | TORCH_GPU_IDS: [0] 5 | GPU_NUMBERS: 1 6 | NUM_ENVIRONMENTS: 1 7 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_sf 8 | CHECKPOINT_FOLDER: data/checkpoints/cma_sf 9 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_sf 10 | 11 | EVAL: 12 | USE_CKPT_CONFIG: False 13 | SPLIT: val_unseen 14 | EPISODE_COUNT: -1 15 | 16 | IL: 17 | epochs: 50 18 | batch_size: 8 19 | schedule_ratio: 0.75 20 | decay_time: 10 21 | 22 | max_traj_len: 130 23 | 24 | MODEL: 25 | policy_name: CMAPolicyO 26 | 27 | INSTRUCTION_ENCODER: 28 | bidirectional: True 29 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/cma_ss.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_IDS: [0] 3 | TORCH_GPU_ID: 0 4 | TORCH_GPU_IDS: [0] 5 | TRAINER_NAME: ss 6 | GPU_NUMBERS: 1 7 | NUM_ENVIRONMENTS: 1 8 | TENSORBOARD_DIR: data/tensorboard_dirs/cma_ss 9 | CHECKPOINT_FOLDER: data/checkpoints/cma_ss 10 | EVAL_CKPT_PATH_DIR: data/checkpoints/cma_ss 11 | 12 | EVAL: 13 | USE_CKPT_CONFIG: False 14 | SPLIT: val_unseen 15 | EPISODE_COUNT: -1 16 | 17 | #RL: 18 | # POLICY: 19 | # OBS_TRANSFORMS: 20 | # ENABLED_TRANSFORMS: [Resize] 21 | 22 | IL: 23 | epochs: 50 24 | batch_size: 8 25 | schedule_ratio: 0.75 26 | decay_time: 10 27 | 28 | max_traj_len: 130 29 | 30 | MODEL: 31 | policy_name: CMAPolicyO 32 | 33 | INSTRUCTION_ENCODER: 34 | bidirectional: True 35 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/seq2seq.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | TRAINER_NAME: dagger # recollect_trainer 3 | SIMULATOR_GPU_ID: 0 4 | TORCH_GPU_ID: 0 5 | NUM_ENVIRONMENTS: 1 6 | TENSORBOARD_DIR: data/tensorboard_dirs/seq2seq 7 | CHECKPOINT_FOLDER: data/checkpoints/seq2seq 8 | EVAL_CKPT_PATH_DIR: data/checkpoints/seq2seq 9 | 10 | EVAL: 11 | USE_CKPT_CONFIG: False 12 | SPLIT: val_unseen 13 | EPISODE_COUNT: -1 14 | 15 | IL: 16 | epochs: 15 17 | batch_size: 5 18 | 19 | RECOLLECT_TRAINER: 20 | gt_file: 21 | data/datasets/R2R_VLNCE_v1-2_preprocessed/{split}/{split}_gt.json.gz 22 | 23 | DAGGER: 24 | iterations: 1 25 | update_size: 10819 26 | p: 1.0 27 | preload_lmdb_features: False 28 | lmdb_features_dir: data/trajectories_dirs/seq2seq/trajectories.lmdb 29 | 30 | MODEL: 31 | policy_name: Seq2SeqPolicy 32 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/seq2seq_aug.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task_aug.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/seq2seq_aug 6 | CHECKPOINT_FOLDER: data/checkpoints/seq2seq_aug 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/seq2seq_aug 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 15 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 1 20 | update_size: 157232 21 | p: 1.0 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/seq2seq_aug/trajectories.lmdb 24 | 25 | MODEL: 26 | policy_name: Seq2SeqPolicy 27 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/seq2seq_aug_tune.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/seq2seq_aug_tune 6 | CHECKPOINT_FOLDER: data/checkpoints/seq2seq_aug_tune 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/seq2seq_aug_tune 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 15 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 1 20 | update_size: 10819 21 | p: 1.0 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/seq2seq/trajectories.lmdb 24 | load_from_ckpt: True 25 | ckpt_to_load: data/checkpoints/seq2seq_aug/best_checkpoint.pth # REPLACE 26 | 27 | MODEL: 28 | policy_name: Seq2SeqPolicy 29 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/seq2seq_da.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/seq2seq_da 6 | CHECKPOINT_FOLDER: data/checkpoints/seq2seq_da 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/seq2seq_da 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 4 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 10 20 | update_size: 5000 21 | p: 0.75 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/seq2seq_da/trajectories.lmdb 24 | 25 | MODEL: 26 | policy_name: Seq2SeqPolicy 27 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/seq2seq_pm.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/seq2seq_pm 6 | CHECKPOINT_FOLDER: data/checkpoints/seq2seq_pm 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/seq2seq_pm 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 15 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 1 20 | update_size: 10819 21 | p: 1.0 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/seq2seq/trajectories.lmdb 24 | 25 | MODEL: 26 | policy_name: Seq2SeqPolicy 27 | 28 | PROGRESS_MONITOR: 29 | use: True 30 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/seq2seq_pm_aug.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task_aug.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/seq2seq_pm_aug 6 | CHECKPOINT_FOLDER: data/checkpoints/seq2seq_pm_aug 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/seq2seq_pm_aug 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 15 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 1 20 | update_size: 157232 21 | p: 1.0 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/seq2seq_aug/trajectories.lmdb 24 | 25 | MODEL: 26 | policy_name: Seq2SeqPolicy 27 | 28 | PROGRESS_MONITOR: 29 | use: True 30 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/seq2seq_pm_da_aug_tune.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_ENVIRONMENTS: 1 5 | TENSORBOARD_DIR: data/tensorboard_dirs/seq2seq_pm_da_aug_tune 6 | CHECKPOINT_FOLDER: data/checkpoints/seq2seq_pm_da_aug_tune 7 | EVAL_CKPT_PATH_DIR: data/checkpoints/seq2seq_pm_da_aug_tune 8 | 9 | EVAL: 10 | USE_CKPT_CONFIG: False 11 | SPLIT: val_unseen 12 | EPISODE_COUNT: -1 13 | 14 | IL: 15 | epochs: 4 16 | batch_size: 5 17 | 18 | DAGGER: 19 | iterations: 10 20 | update_size: 5000 21 | p: 0.75 22 | preload_lmdb_features: False 23 | lmdb_features_dir: data/trajectories_dirs/seq2seq_pm_da_aug_tune/trajectories.lmdb 24 | load_from_ckpt: True 25 | ckpt_to_load: data/checkpoints/seq2seq_pm_aug/best_checkpoint.pth # REPLACE 26 | 27 | MODEL: 28 | policy_name: Seq2SeqPolicy 29 | 30 | PROGRESS_MONITOR: 31 | use: True 32 | -------------------------------------------------------------------------------- /vlnce_baselines/config/r2r_configs/test_set_inference.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_PROCESSES: 1 5 | 6 | INFERENCE: 7 | SPLIT: test 8 | USE_CKPT_CONFIG: False 9 | SAMPLE: False 10 | CKPT_PATH: data/checkpoints/CMA_PM_DA_Aug.pth 11 | PREDICTIONS_FILE: predictions.json 12 | 13 | MODEL: 14 | policy_name: CMAPolicy 15 | 16 | INSTRUCTION_ENCODER: 17 | bidirectional: True 18 | 19 | CMA: 20 | use: True 21 | 22 | PROGRESS_MONITOR: 23 | use: True 24 | -------------------------------------------------------------------------------- /vlnce_baselines/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/vlnce_baselines/models/__init__.py -------------------------------------------------------------------------------- /vlnce_baselines/models/dp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/vlnce_baselines/models/dp/__init__.py -------------------------------------------------------------------------------- /vlnce_baselines/models/dp/config/data_config.yaml: -------------------------------------------------------------------------------- 1 | action_stats: 2 | min: [-1.07, -1.05] # [min_dx, min_dy] 3 | max: [1.07, 1.03] # [max_dx, max_dy] 4 | -------------------------------------------------------------------------------- /vlnce_baselines/models/dp/config/defaults.yaml: -------------------------------------------------------------------------------- 1 | # defaults for training 2 | project_name: vln_nomad 3 | run_name: vln_nomad 4 | 5 | # training setup 6 | use_wandb: True # set to false if you don't want to log to wandb 7 | train: True 8 | batch_size: 400 9 | eval_batch_size: 400 10 | epochs: 30 11 | gpu_ids: [0] 12 | num_workers: 4 13 | lr: 5e-4 14 | optimizer: adam 15 | seed: 0 16 | clipping: False 17 | train_subset: 1. 18 | 19 | # model params 20 | model_type: gnm 21 | obs_encoding_size: 1024 22 | goal_encoding_size: 1024 23 | 24 | # normalization for the action space 25 | normalize: True 26 | 27 | # context 28 | context_type: temporal 29 | context_size: 5 30 | 31 | # tradeoff between action and distance prediction loss 32 | alpha: 0.5 33 | 34 | # tradeoff between task loss and kld 35 | beta: 0.1 36 | 37 | obs_type: image 38 | goal_type: image 39 | scheduler: null 40 | 41 | # distance bounds for distance and action and distance predictions 42 | distance: 43 | min_dist_cat: 0 44 | max_dist_cat: 20 45 | action: 46 | min_dist_cat: 2 47 | max_dist_cat: 10 48 | close_far_threshold: 10 # distance threshold used to seperate the close and the far subgoals that are sampled per datapoint 49 | 50 | # action output params 51 | len_traj_pred: 5 52 | learn_angle: True 53 | 54 | # dataset specific parameters 55 | image_size: [85, 64] # width, height 56 | 57 | # logging stuff 58 | ## =0 turns off 59 | print_log_freq: 100 # in iterations 60 | image_log_freq: 1000 # in iterations 61 | num_images_log: 8 # number of images to log in a logging iteration 62 | pairwise_test_freq: 10 # in epochs 63 | eval_fraction: 0.25 # fraction of the dataset to use for evaluation 64 | wandb_log_freq: 10 # in iterations 65 | eval_freq: 1 # in epochs 66 | 67 | -------------------------------------------------------------------------------- /vlnce_baselines/models/dp/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/vlnce_baselines/models/dp/model/__init__.py -------------------------------------------------------------------------------- /vlnce_baselines/models/dp/nomad.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import time 4 | import pdb 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | class NoMaD(nn.Module): 11 | 12 | # Change the vision_encoder to cross-attention encoder with language tokens 13 | def __init__(self, vision_encoder, 14 | noise_pred_net, 15 | dist_pred_net): 16 | super(NoMaD, self).__init__() 17 | 18 | 19 | self.vision_encoder = vision_encoder 20 | self.noise_pred_net = noise_pred_net 21 | self.dist_pred_net = dist_pred_net 22 | 23 | def forward(self, func_name, **kwargs): 24 | if func_name == "vision_encoder" : 25 | output = self.vision_encoder(kwargs["batch"]) 26 | elif func_name == "noise_pred_net": 27 | output = self.noise_pred_net(sample=kwargs["sample"], timestep=kwargs["timestep"], global_cond=kwargs["global_cond"]) 28 | elif func_name == "dist_pred_net": 29 | output = self.dist_pred_net(kwargs["obsgoal_cond"]) 30 | else: 31 | raise NotImplementedError 32 | return output 33 | 34 | 35 | class DenseNetwork(nn.Module): 36 | def __init__(self, embedding_dim): 37 | super(DenseNetwork, self).__init__() 38 | 39 | self.embedding_dim = embedding_dim 40 | self.network = nn.Sequential( 41 | nn.Linear(self.embedding_dim, self.embedding_dim//4), 42 | nn.ReLU(), 43 | nn.Linear(self.embedding_dim//4, self.embedding_dim//16), 44 | nn.ReLU(), 45 | nn.Linear(self.embedding_dim//16, 1) 46 | ) 47 | 48 | def forward(self, x): 49 | x = x.reshape((-1, self.embedding_dim)) 50 | output = self.network(x) 51 | return output 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /vlnce_baselines/models/dp/run_pt/r2r_model_config_dep.json: -------------------------------------------------------------------------------- 1 | { 2 | "pred_head_dropout_prob": 0.1, 3 | "attention_probs_dropout_prob": 0.1, 4 | "finetuning_task": null, 5 | "hidden_act": "gelu", 6 | "hidden_dropout_prob": 0.1, 7 | "hidden_size": 768, 8 | "image_feat_size": 512, 9 | "depth_feat_size": 128, 10 | "image_prob_size": 1000, 11 | "angle_feat_size": 4, 12 | "obj_feat_size": 0, 13 | "obj_prob_size": 0, 14 | "img_feature_type": "imagenet", 15 | "initializer_range": 0.02, 16 | "intermediate_size": 3072, 17 | "num_l_layers": 9, 18 | "num_x_layers": 4, 19 | "num_pano_layers": 2, 20 | "layer_norm_eps": 1e-12, 21 | "max_position_embeddings": 512, 22 | "max_action_steps": 150, 23 | "num_attention_heads": 12, 24 | "num_hidden_layers": 12, 25 | "num_labels": 2, 26 | "output_attentions": false, 27 | "output_hidden_states": false, 28 | "pruned_heads": {}, 29 | "torchscript": false, 30 | "type_vocab_size": 2, 31 | "update_lang_bert": true, 32 | "vocab_size": 30522, 33 | "use_lang2visn_attn": true, 34 | "graph_sprels": true, 35 | "glocal_fuse": true, 36 | "lang_bert_name": "bert-base-uncased", 37 | "pretrain_tasks": ["mlm", "sap"], 38 | 39 | 40 | "num_query_tokens": 20, 41 | "qformer_num_layers": 2, 42 | "encoding_size": 512 43 | 44 | } 45 | -------------------------------------------------------------------------------- /vlnce_baselines/models/dp/run_pt/r2r_pretrain_habitat.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_config": "", 3 | "checkpoint": null, 4 | "output_dir": "", 5 | "mrc_mask_prob": 0.15, 6 | "max_txt_len": 100, 7 | "train_batch_size": 256, 8 | "val_batch_size": 256, 9 | "val_sample_num": null, 10 | "gradient_accumulation_steps": 1, 11 | "learning_rate": 1e-4, 12 | "optim": "adamw", 13 | "betas": [ 14 | 0.9, 15 | 0.98 16 | ], 17 | "dropout": 0.1, 18 | "weight_decay": 0.01, 19 | "grad_norm": 5.0, 20 | "seed": 0, 21 | "fp16": false, 22 | "n_workers": 4, 23 | "pin_mem": true, 24 | "init_pretrained": "lxmert", 25 | 26 | "train_datasets": { 27 | "R2R": { 28 | "name": "R2R", 29 | "train_traj_files": ["config_vln/R2R_annotation.jsonl"], 30 | "connectivity_file": "config_vln/R2R_connectivity.jsonl", 31 | "img_ft_file": "vln_features/img_features/CLIP-ViT-B-32-views-habitat.hdf5", 32 | "dep_ft_file": "vln_features/depth_features/resnet-views-habitat.hdf5", 33 | "tasks": [ 34 | "sap" 35 | ], 36 | "mix_ratio": [ 37 | 1 38 | ] 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /vlnce_baselines/models/dp/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tokishx/DifNav/762f387c499e2a12bd44a948b049ca62f02b2af5/vlnce_baselines/models/dp/utils/__init__.py -------------------------------------------------------------------------------- /vlnce_baselines/models/dp/utils/save.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) Microsoft Corporation. 3 | Licensed under the MIT license. 4 | 5 | saving utilities 6 | """ 7 | import json 8 | import os 9 | import torch 10 | 11 | 12 | def save_training_meta(args): 13 | os.makedirs(os.path.join(args.output_dir, 'logs'), exist_ok=True) 14 | os.makedirs(os.path.join(args.output_dir, 'ckpts'), exist_ok=True) 15 | 16 | with open(os.path.join(args.output_dir, 'logs', 'training_args.json'), 'w') as writer: 17 | json.dump(vars(args), writer, indent=4) 18 | model_config = json.load(open(args.model_config)) 19 | with open(os.path.join(args.output_dir, 'logs', 'model_config.json'), 'w') as writer: 20 | json.dump(model_config, writer, indent=4) 21 | 22 | 23 | class ModelSaver(object): 24 | def __init__(self, output_dir, prefix='model_step', suffix='pt'): 25 | self.output_dir = output_dir 26 | self.prefix = prefix 27 | self.suffix = suffix 28 | 29 | def save(self, model, step, optimizer=None): 30 | output_model_file = os.path.join(self.output_dir, 31 | f"{self.prefix}_{step}.{self.suffix}") 32 | state_dict = {} 33 | for k, v in model.state_dict().items(): 34 | if k.startswith('module.'): 35 | k = k[7:] 36 | if isinstance(v, torch.Tensor): 37 | state_dict[k] = v.cpu() 38 | else: 39 | state_dict[k] = v 40 | torch.save(state_dict, output_model_file) 41 | if optimizer is not None: 42 | dump = {'step': step, 'optimizer': optimizer.state_dict()} 43 | if hasattr(optimizer, '_amp_stash'): 44 | pass # TODO fp16 optimizer 45 | torch.save(dump, f'{self.output_dir}/train_state_{step}.pt') 46 | 47 | -------------------------------------------------------------------------------- /vlnce_baselines/models/dp/utils/visualize_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import torch 4 | 5 | VIZ_IMAGE_SIZE = (640, 480) 6 | RED = np.array([1, 0, 0]) 7 | GREEN = np.array([0, 1, 0]) 8 | BLUE = np.array([0, 0, 1]) 9 | CYAN = np.array([0, 1, 1]) 10 | YELLOW = np.array([1, 1, 0]) 11 | MAGENTA = np.array([1, 0, 1]) 12 | 13 | 14 | def numpy_to_img(arr: np.ndarray) -> Image: 15 | img = Image.fromarray(np.transpose(np.uint8(255 * arr), (1, 2, 0))) 16 | img = img.resize(VIZ_IMAGE_SIZE) 17 | return img 18 | 19 | 20 | def to_numpy(tensor: torch.Tensor) -> np.ndarray: 21 | return tensor.detach().cpu().numpy() 22 | 23 | 24 | def from_numpy(array: np.ndarray) -> torch.Tensor: 25 | return torch.from_numpy(array).float() 26 | -------------------------------------------------------------------------------- /vlnce_baselines/models/vlnbert/vlnbert_init.py: -------------------------------------------------------------------------------- 1 | # Recurrent VLN-BERT, 2020, by Yicong.Hong@anu.edu.au 2 | 3 | from pytorch_transformers import (BertConfig, BertTokenizer) 4 | 5 | def get_vlnbert_models(config=None): 6 | config_class = BertConfig 7 | 8 | from vlnce_baselines.models.vlnbert.vlnbert_PREVALENT import VLNBert 9 | model_class = VLNBert 10 | model_name_or_path = 'pretrained/Prevalent/pretrained_model/pytorch_model.bin' 11 | vis_config = config_class.from_pretrained('bert_config/bert-base-uncased') 12 | vis_config.img_feature_dim = 2176 13 | vis_config.img_feature_type = "" 14 | vis_config.vl_layers = 4 15 | vis_config.la_layers = 9 16 | visual_model = model_class.from_pretrained(model_name_or_path, config=vis_config) 17 | 18 | return visual_model 19 | --------------------------------------------------------------------------------