├── .flake8 ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── conf ├── __init__.py ├── affordance │ ├── aff_detection │ │ ├── clip.yaml │ │ ├── mask_data.yaml │ │ ├── pixel_data.yaml │ │ ├── r3m.yaml │ │ ├── rn18_bert_mask.yaml │ │ ├── rn18_bert_pixel.yaml │ │ ├── rn18_clip_mask.yaml │ │ ├── rn18_clip_pixel.yaml │ │ ├── rn18_sbert_pixel.yaml │ │ ├── rn50_bert_pixel.yaml │ │ ├── rn50_clip_pixel.yaml │ │ └── streams │ │ │ ├── clip_lingunet.yaml │ │ │ ├── r3m_rn18_sbert.yaml │ │ │ ├── rn18_clip_lingunet.yaml │ │ │ ├── rn50_sbert_lingunet.yaml │ │ │ ├── rn_bert_lingunet.yaml │ │ │ └── rn_sbert_lingunet.yaml │ ├── cameras │ │ ├── cameras │ │ │ ├── gripper.yaml │ │ │ ├── opposing.yaml │ │ │ ├── static.yaml │ │ │ ├── static_calvin.yaml │ │ │ └── tactile.yaml │ │ ├── high_res.yaml │ │ ├── no_cameras.yaml │ │ ├── static_and_gripper.yaml │ │ ├── static_and_gripper_calvin.yaml │ │ └── static_and_tactile.yaml │ ├── cfg_datacollection.yaml │ ├── cfg_merge_dataset.yaml │ ├── labeling │ │ ├── cameras │ │ │ ├── cameras │ │ │ │ ├── gripper.yaml │ │ │ │ └── static.yaml │ │ │ └── static_and_gripper.yaml │ │ ├── env │ │ │ └── env_labeling.yaml │ │ ├── real_world.yaml │ │ ├── real_world_lang.yaml │ │ ├── robot │ │ │ └── panda.yaml │ │ ├── scene │ │ │ └── empty_table.yaml │ │ ├── simulation.yaml │ │ └── simulation_lang.yaml │ ├── test_affordance.yaml │ ├── train_affordance.yaml │ ├── train_depth.yaml │ └── transforms │ │ ├── clip.yaml │ │ ├── clip_color.yaml │ │ ├── clip_randShift.yaml │ │ ├── clip_randShift_color.yaml │ │ ├── clip_real_world.yaml │ │ ├── gray.yaml │ │ ├── r3m.yaml │ │ ├── rgb.yaml │ │ ├── rgb_color.yaml │ │ ├── rgb_randShift.yaml │ │ └── rgb_randShift_color.yaml ├── annotations │ ├── new_playtable.yaml │ └── new_playtable_validation.yaml ├── callbacks │ ├── calvin_default.yaml │ ├── checkpoint │ │ ├── all.yaml │ │ ├── clip_loss.yaml │ │ ├── kl.yaml │ │ ├── lh_sr.yaml │ │ ├── state_recon.yaml │ │ ├── task_sr.yaml │ │ └── val_action.yaml │ ├── kl_schedule │ │ ├── constant.yaml │ │ ├── linear.yaml │ │ └── sigmoid.yaml │ ├── real_world_default.yaml │ ├── rollout │ │ ├── default.yaml │ │ └── tasks │ │ │ └── new_playtable_tasks.yaml │ ├── rollout_lh │ │ └── default.yaml │ ├── shm_signal │ │ └── default.yaml │ └── tsne_plot │ │ └── default.yaml ├── cfg_high_level.yaml ├── cfg_high_level_rw.yaml ├── cfg_low_level.yaml ├── cfg_low_level_rw.yaml ├── datamodule │ ├── calvin_default.yaml │ ├── datasets │ │ ├── lang_dataset │ │ │ ├── lang.yaml │ │ │ └── lang_shm.yaml │ │ ├── lang_only.yaml │ │ ├── vision_dataset │ │ │ ├── vision.yaml │ │ │ └── vision_shm.yaml │ │ ├── vision_lang.yaml │ │ ├── vision_lang_shm.yaml │ │ ├── vision_only.yaml │ │ └── vision_only_shm.yaml │ ├── observation_space │ │ ├── all_mods_abs_act.yaml │ │ ├── lang_rgb_static_abs_act.yaml │ │ ├── lang_rgb_static_gripper_abs_act.yaml │ │ ├── lang_rgb_static_gripper_rel_act.yaml │ │ ├── lang_rgb_static_gripper_rel_gripper_act.yaml │ │ ├── lang_rgb_static_rel_act.yaml │ │ ├── lang_rgb_static_robot_scene_abs_act.yaml │ │ ├── lang_rgb_static_tactile_abs_act.yaml │ │ ├── lang_rgbd_both_abs_act.yaml │ │ ├── lang_rgbd_both_rel_act.yaml │ │ ├── lang_rgbd_static_gripper_rel_act.yaml │ │ ├── lang_rgbd_static_robot_abs_act.yaml │ │ ├── rgb_static_abs_act.yaml │ │ ├── rgb_static_gripper_rel_gripper_act.yaml │ │ ├── rgb_static_robot_scene_abs_act.yaml │ │ └── state_only.yaml │ ├── proprioception_dims │ │ ├── none.yaml │ │ ├── robot_full.yaml │ │ ├── robot_no_joints.yaml │ │ ├── robot_no_joints_no_gripper_width.yaml │ │ └── robot_scene.yaml │ ├── real_world_default.yaml │ └── transforms │ │ ├── clip.yaml │ │ ├── play_basic.yaml │ │ ├── rand_shift.yaml │ │ ├── real_world.yaml │ │ ├── real_world_no_rand_shift.yaml │ │ ├── real_world_r3m.yaml │ │ └── real_world_square.yaml ├── inference │ └── config_inference.yaml ├── inference_real.yaml ├── lang_ann.yaml ├── logger │ ├── tb_logger.yaml │ └── wandb.yaml ├── loss │ └── default.yaml ├── model │ ├── action_decoder │ │ ├── deterministic.yaml │ │ ├── logistic_decoder_rnn_calvin.yaml │ │ └── logistic_decoder_rnn_real_world.yaml │ ├── calvin_hulc++.yaml │ ├── clip_lang.yaml │ ├── distribution │ │ ├── continuous.yaml │ │ └── discrete.yaml │ ├── gcbc.yaml │ ├── language_encoder │ │ ├── default.yaml │ │ ├── none.yaml │ │ └── sbert.yaml │ ├── language_goal │ │ ├── default.yaml │ │ └── none.yaml │ ├── lr_scheduler │ │ ├── constant.yaml │ │ ├── cosine_schedule_with_warmup.yaml │ │ └── linear_schedule_with_warmup.yaml │ ├── optimizer │ │ ├── adam.yaml │ │ ├── adamw.yaml │ │ └── sgd.yaml │ ├── perceptual_encoder │ │ ├── RGBD_both.yaml │ │ ├── default.yaml │ │ ├── depth_gripper │ │ │ ├── default.yaml │ │ │ └── none.yaml │ │ ├── depth_static │ │ │ ├── default.yaml │ │ │ └── none.yaml │ │ ├── gripper_cam.yaml │ │ ├── proprio │ │ │ ├── identity.yaml │ │ │ └── none.yaml │ │ ├── resnet_aff.yaml │ │ ├── rgb_gripper │ │ │ ├── default.yaml │ │ │ ├── none.yaml │ │ │ ├── r3m.yaml │ │ │ ├── resnet.yaml │ │ │ └── resnet_aff.yaml │ │ ├── rgb_static │ │ │ ├── clip.yaml │ │ │ ├── default.yaml │ │ │ ├── r3m.yaml │ │ │ ├── resnet.yaml │ │ │ ├── resnet_aff.yaml │ │ │ └── vision_conv.yaml │ │ ├── state_decoder │ │ │ ├── default.yaml │ │ │ └── none.yaml │ │ ├── static_RGBD.yaml │ │ ├── static_RGB_tactile.yaml │ │ └── tactile │ │ │ ├── default.yaml │ │ │ └── none.yaml │ ├── plan_proposal │ │ └── default.yaml │ ├── plan_recognition │ │ ├── bilstm.yaml │ │ ├── birnn.yaml │ │ └── transformers.yaml │ ├── proj_vis_lang │ │ ├── default.yaml │ │ └── none.yaml │ ├── real_world_hulc++.yaml │ ├── sbert.yaml │ └── visual_goal │ │ └── default.yaml ├── paths │ └── general_paths.yaml ├── simulation │ ├── agent │ │ ├── base.yaml │ │ ├── baseline.yaml │ │ └── play_lmp.yaml │ ├── cameras │ │ ├── cameras │ │ │ ├── gripper.yaml │ │ │ ├── opposing.yaml │ │ │ ├── static.yaml │ │ │ ├── static_calvin.yaml │ │ │ └── tactile.yaml │ │ ├── high_res.yaml │ │ ├── no_cameras.yaml │ │ ├── static_and_gripper.yaml │ │ ├── static_and_gripper_calvin.yaml │ │ └── static_and_tactile.yaml │ ├── env │ │ └── env.yaml │ ├── robot │ │ ├── panda.yaml │ │ ├── panda_digit.yaml │ │ └── panda_longer_finger.yaml │ └── scene │ │ ├── calvin_scene_A.yaml │ │ ├── calvin_scene_A_eval.yaml │ │ ├── calvin_scene_B.yaml │ │ ├── calvin_scene_C.yaml │ │ ├── calvin_scene_D.yaml │ │ └── calvin_scene_D_eval.yaml ├── trainer │ └── play_trainer.yaml ├── training │ └── default_training.yaml └── utils │ └── combine_dataset.yaml ├── dataset ├── README.md └── download_data.sh ├── docs ├── affordance.md ├── affordance_condensed.md └── pipeline.md ├── hulc2 ├── .gitignore ├── __init__.py ├── affordance │ ├── base_detector.py │ ├── dataset_creation │ │ ├── core │ │ │ ├── data_reader.py │ │ │ ├── real_cameras.py │ │ │ └── utils.py │ │ ├── create_percentage_data_splits.py │ │ ├── data_labeler.py │ │ ├── data_labeler_lang.py │ │ ├── find_norm_values.py │ │ └── merge_datasets.py │ ├── datasets │ │ ├── mask_label.py │ │ ├── pixel_label.py │ │ └── transforms.py │ ├── models │ │ ├── core │ │ │ ├── __init__.py │ │ │ ├── clip.py │ │ │ ├── fusion.py │ │ │ ├── language_network.py │ │ │ ├── resnet.py │ │ │ ├── unet.py │ │ │ ├── unet_decoder.py │ │ │ └── utils.py │ │ ├── depth │ │ │ ├── depth_gaussian.py │ │ │ ├── depth_logistics.py │ │ │ └── depth_module.py │ │ ├── lang_fusion │ │ │ ├── aff_lang_depth_pixel.py │ │ │ ├── one_stream_attention_lang_fusion_mask.py │ │ │ └── one_stream_attention_lang_fusion_pixel.py │ │ ├── language_encoders │ │ │ ├── base_lang_encoder.py │ │ │ ├── bert_lang_encoder.py │ │ │ ├── clip_lang_encoder.py │ │ │ ├── distilbert_lang_encoder.py │ │ │ └── sbert_lang_encoder.py │ │ └── visual_lang_encoders │ │ │ ├── base_lingunet.py │ │ │ ├── r3m_rn18.py │ │ │ ├── rn50_clip_lingunet.py │ │ │ ├── rn50_unet.py │ │ │ └── rn_lingunet.py │ ├── pixel_aff_lang_detector.py │ ├── run_on_cluster │ │ ├── cluster.py │ │ ├── sbatch_eval.sh │ │ ├── sbatch_train.sh │ │ └── slurm_eval.py │ ├── scripts │ │ ├── get_aff_preds.py │ │ ├── get_best_eval_model.py │ │ ├── make_seq_videos.py │ │ └── transform_old_episodes_split.py │ ├── test_affordance.py │ ├── test_move_to_pt.py │ ├── train_affordance.py │ ├── train_depth.py │ └── utils │ │ ├── data_utils.py │ │ ├── flowlib.py │ │ └── losses.py ├── agents │ ├── base_agent.py │ ├── lmp_agent.py │ └── real_world_agent.py ├── datasets │ ├── __init__.py │ ├── base_dataset.py │ ├── hulc2_real_world_data_module.py │ ├── hulc2_sim_data_module.py │ ├── npz_dataset.py │ ├── play_data_module.py │ ├── random.py │ ├── shm_dataset.py │ ├── shm_dataset_skip.py │ └── utils │ │ ├── __init__.py │ │ ├── episode_utils.py │ │ └── shared_memory_loader.py ├── env_wrappers │ ├── aff_lfp_real_world_wrapper.py │ ├── play_aff_lmp_wrapper.py │ └── play_lmp_wrapper.py ├── evaluation │ ├── __init__.py │ ├── create_plots.py │ ├── evaluate_policy.py │ ├── evaluate_policy_singlestep.py │ ├── evaluation.py │ ├── manager_aff_lmp.py │ ├── manager_lmp.py │ ├── multistep_sequences.py │ ├── rollouts_interactive.py │ ├── run_multiple.py │ ├── test_policy_interactive.py │ └── utils.py ├── models │ ├── __init__.py │ ├── auxiliary_loss_networks │ │ ├── __init__.py │ │ ├── bc_z_lang_decoder.py │ │ ├── mia_lang_discriminator.py │ │ ├── proj_vis_lang.py │ │ └── state_decoder.py │ ├── decoders │ │ ├── __init__.py │ │ ├── action_decoder.py │ │ ├── clip_proj.py │ │ ├── deterministic_decoder.py │ │ ├── logistic_decoder_rnn.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── gripper_control.py │ │ │ └── rnn.py │ ├── encoders │ │ ├── __init__.py │ │ ├── clip_lang_encoder.py │ │ ├── goal_encoders.py │ │ ├── lang_encoder.py │ │ └── language_network.py │ ├── gcbc.py │ ├── hulc2.py │ ├── perceptual_encoders │ │ ├── __init__.py │ │ ├── clip.py │ │ ├── concat_encoders.py │ │ ├── poe_encoder.py │ │ ├── proprio_encoder.py │ │ ├── tactile_encoder.py │ │ ├── vision_clip.py │ │ ├── vision_network.py │ │ ├── vision_network_conv.py │ │ ├── vision_network_gripper.py │ │ ├── vision_r3m.py │ │ ├── vision_resnet.py │ │ └── vision_resnet_aff.py │ └── plan_encoders │ │ ├── __init__.py │ │ ├── plan_proposal_net.py │ │ └── plan_recognition_net.py ├── rollout │ ├── gpt3_planning.py │ ├── real_world_eval_aff.py │ ├── real_world_eval_combined.py │ ├── real_world_rollout.py │ ├── real_world_rollout_lang.py │ ├── real_world_rollout_vision.py │ ├── rollout.py │ ├── rollout_long_horizon.py │ └── rollout_video.py ├── scripts │ ├── get_annotations.py │ ├── utils │ │ ├── colors.yaml │ │ ├── config │ │ │ ├── lang_model │ │ │ │ ├── bert.yaml │ │ │ │ └── clip.yaml │ │ │ └── retrieve_data.yaml │ │ ├── tasks.yaml │ │ └── utils.py │ └── viz_annotations.py ├── training.py ├── utils │ ├── __init__.py │ ├── automatic_lang_annotator_mp.py │ ├── bpe_simple_vocab_16e6.txt.gz │ ├── clip_tokenizer.py │ ├── combine_dataset.py │ ├── compute_proprioception_statistics.py │ ├── convert_real_raw_data_splits.py │ ├── create_splits.py │ ├── data_utils.py │ ├── data_visualization.py │ ├── dataset_pipeline.sh │ ├── dataset_task_statistics.py │ ├── distributions.py │ ├── img_utils.py │ ├── kl_callbacks.py │ ├── language_annotator.py │ ├── preprocess_real_data.py │ ├── real_world_dataset_pipeline.sh │ ├── relabel_with_new_lang_model.py │ ├── render_low_freq.py │ ├── simple_tokenizer.py │ ├── split_dataset.py │ ├── tensor_utils.py │ ├── transforms.py │ ├── utils.py │ ├── visualizations.py │ ├── visualize_annotations.py │ ├── visualize_calvin_dataset.py │ └── visualize_real_data.py ├── visualization │ └── tsne_plot.py ├── wrap_training.py └── wrappers │ ├── hulc2_wrapper.py │ └── panda_lfp_wrapper.py ├── install.sh ├── media └── hulc2.gif ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── setup.py ├── setup_local.py └── slurm_scripts ├── sbatch_eval.sh ├── sbatch_lfp.sh ├── slurm_eval.py └── slurm_training.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .git 3 | # Default is 79 in PEP 8 4 | max-line-length = 120 5 | select = E,F,W,C 6 | ignore=W503, # line break before binary operator, need for black 7 | E203, # whitespace before ':'. Opposite convention enforced by black 8 | E731, # do not assign a lambda expression, use a def 9 | E722, 10 | F401, 11 | F841, 12 | E402, # module level import not at top of file 13 | E741, # ambiguous variable name 14 | E501, # line too long. Handled by black 15 | C406, # Unnecessary list literal - rewrite as a dict literal 16 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "calvin_env"] 2 | path = calvin_env 3 | url = git@github.com:JessicaBorja/calvin_env.git 4 | [submodule "r3m"] 5 | path = r3m 6 | url = git@github.com:mees/r3m.git 7 | [submodule "LangAnnotationApp"] 8 | path = LangAnnotationApp 9 | url = git@github.com:mees/LanguageAnnotationWebApp.git 10 | [submodule "robot_io"] 11 | path = robot_io 12 | url = git@github.com:mees/robot_io.git 13 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.8 3 | repos: 4 | - repo: https://github.com/psf/black 5 | rev: 22.10.0 6 | hooks: 7 | - id: black 8 | language_version: python3.8 9 | 10 | - repo: https://gitlab.com/pycqa/flake8 11 | rev: 3.8.4 12 | hooks: 13 | - id: flake8 14 | 15 | - repo: https://github.com/pycqa/isort 16 | rev: 5.7.0 17 | hooks: 18 | - id: isort 19 | 20 | - repo: https://github.com/pre-commit/mirrors-mypy 21 | rev: v0.812 22 | hooks: 23 | - id: mypy 24 | args: [--ignore-missing-imports, --warn-no-return, --warn-redundant-casts, --disallow-incomplete-defs] 25 | additional_dependencies: [pytorch-lightning==1.5.9, torch==1.10.1, numpy] 26 | 27 | - repo: https://github.com/pre-commit/pre-commit-hooks 28 | rev: v4.0.1 29 | hooks: 30 | - id: check-yaml 31 | - id: trailing-whitespace 32 | - id: end-of-file-fixer 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Oier Mees 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /conf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/__init__.py -------------------------------------------------------------------------------- /conf/affordance/aff_detection/clip.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | _target_: hulc2.affordance.clip_detector.CLIPPointDetector 3 | resize: 224 4 | clip_model: "RN50" # ["RN50", "RN101", "RN50x4", "RN50x16"] 5 | saliency_layer: "layer4" # ["layer4", "layer3", "layer2", "layer1"] 6 | blur: False 7 | viz: True 8 | 9 | img_size: 224 10 | defaults: 11 | - /transforms: rgb 12 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/mask_data.yaml: -------------------------------------------------------------------------------- 1 | wandb_saver: 2 | val_loss: 3 | monitor: 'Validation/total_loss' 4 | save_top_k: 2 5 | mode: min 6 | verbose: True 7 | val_miou: 8 | monitor: 'Validation/miou' 9 | save_top_k: 2 10 | mode: max 11 | verbose: True 12 | save_last: True 13 | 14 | model: 15 | cfg: 16 | hough_voting: 17 | skip_pixels: 3 18 | inlier_threshold: 0.8 19 | angle_discretization: 100 20 | inlier_distance: 16 21 | percentage_threshold: 0.4 22 | object_center_kernel_radius: 16 23 | 24 | dataset: 25 | _target_: hulc2.affordance.datasets.mask_label.MaskLabelLabelDataLang 26 | transforms: ${aff_detection.streams.transforms} 27 | radius: 28 | static: 16 29 | gripper: 10 30 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/pixel_data.yaml: -------------------------------------------------------------------------------- 1 | wandb_saver: 2 | val_loss: 3 | monitor: 'Validation/total_loss' 4 | save_top_k: 2 5 | mode: min 6 | verbose: True 7 | val_err: 8 | monitor: 'Validation/px_dist_err' 9 | save_top_k: 2 10 | mode: min 11 | verbose: True 12 | save_last: True 13 | 14 | dataset: 15 | _target_: hulc2.affordance.datasets.pixel_label.PixeLabelDataLang 16 | transforms: ${aff_detection.streams.transforms} 17 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/r3m.yaml: -------------------------------------------------------------------------------- 1 | # R3M Resnet 18 with respective decoder channels 2 | # SBERT as sentence encoder 3 | # Single pixel prediction 4 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector 5 | _recursive_: False 6 | name: r3m_rn18_sbert_pixel 7 | img_size: 224 8 | 9 | model_cfg: 10 | freeze_encoder: 11 | lang: True 12 | aff: True 13 | depth: True 14 | attn_stream_fusion_type: 'add' 15 | lang_fusion_type: 'mult' 16 | streams: ${aff_detection.streams} 17 | batchnorm: False 18 | encoder_name: r3m_resnet18 19 | unet_cfg: 20 | decoder_channels: [256, 128, 64, 32] 21 | 22 | defaults: 23 | - pixel_data 24 | - streams: r3m_rn18_sbert 25 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/rn18_bert_mask.yaml: -------------------------------------------------------------------------------- 1 | # Unet Resnet 18 with respective decoder channels 2 | # BERT as sentence encoder 3 | # Affordance binary mask prediction 4 | name: rn18_bert_mask 5 | 6 | model: 7 | _target_: hulc2.affordance.mask_aff_lang_detector.MaskAffLangDetector 8 | _recursive_: False 9 | cfg: 10 | attn_stream_fusion_type: 'add' 11 | lang_fusion_type: 'mult' 12 | streams: ${aff_detection.streams} 13 | batchnorm: False 14 | loss: 15 | centers: 2.5 16 | dice: 5 17 | ce_loss: 1 18 | affordance: 19 | add_dice: true 20 | ce_class_weights: [0.2, 0.8] 21 | unet_cfg: 22 | decoder_channels: [256, 128, 64, 32] 23 | 24 | img_size: 224 25 | defaults: 26 | - mask_data 27 | - streams: rn18_bert_lingunet 28 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/rn18_bert_pixel.yaml: -------------------------------------------------------------------------------- 1 | # Unet Resnet 18 with respective decoder channels 2 | # BERT as sentence encoder 3 | # Single pixel prediction 4 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector 5 | _recursive_: False 6 | name: rn18_bert_pixel 7 | img_size: 224 8 | 9 | model_cfg: 10 | freeze_encoder: 11 | lang: True 12 | aff: True 13 | depth: True 14 | attn_stream_fusion_type: 'add' 15 | lang_fusion_type: 'mult' 16 | streams: ${aff_detection.streams} 17 | batchnorm: False 18 | encoder_name: resnet18 19 | unet_cfg: 20 | decoder_channels: [256, 128, 64, 32] 21 | 22 | defaults: 23 | - pixel_data 24 | - streams: rn_bert_lingunet 25 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/rn18_clip_mask.yaml: -------------------------------------------------------------------------------- 1 | # Unet Resnet 18 with respective decoder channels 2 | # CLIP as sentence encoder 3 | # Affordance binary mask prediction 4 | name: rn18_clip_mask 5 | 6 | model: 7 | _target_: hulc2.affordance.mask_aff_lang_detector.MaskAffLangDetector 8 | _recursive_: False 9 | cfg: 10 | attn_stream_fusion_type: 'add' 11 | lang_fusion_type: 'mult' 12 | streams: ${aff_detection.streams} 13 | batchnorm: False 14 | loss: 15 | centers: 2.5 16 | dice: 5 17 | ce_loss: 1 18 | affordance: 19 | add_dice: true 20 | ce_class_weights: [0.2, 0.8] 21 | unet_cfg: 22 | decoder_channels: [256, 128, 64, 32] 23 | 24 | img_size: 224 25 | defaults: 26 | - mask_data 27 | - streams: rn18_clip_lingunet 28 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/rn18_clip_pixel.yaml: -------------------------------------------------------------------------------- 1 | # Unet Resnet 18 with respective decoder channels 2 | # CLIP as sentence encoder 3 | # Single pixel prediction 4 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector 5 | _recursive_: False 6 | name: rn18_clip_pixel 7 | img_size: 224 8 | 9 | model_cfg: 10 | freeze_encoder: 11 | lang: True 12 | aff: True 13 | depth: True 14 | attn_stream_fusion_type: 'add' 15 | lang_fusion_type: 'mult' 16 | streams: ${aff_detection.streams} 17 | batchnorm: False 18 | unet_cfg: # [256, 128, 64, 32, 16] 19 | decoder_channels: [256, 128, 64, 32] 20 | 21 | defaults: 22 | - pixel_data 23 | - streams: rn18_clip_lingunet 24 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/rn18_sbert_pixel.yaml: -------------------------------------------------------------------------------- 1 | # Unet Resnet 18 with respective decoder channels 2 | # BERT as sentence encoder 3 | # Single pixel prediction 4 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector 5 | _recursive_: False 6 | name: rn18_bert_pixel 7 | img_size: 224 8 | 9 | model_cfg: 10 | freeze_encoder: 11 | lang: True 12 | aff: True 13 | depth: True 14 | attn_stream_fusion_type: 'add' 15 | lang_fusion_type: 'mult' 16 | streams: ${aff_detection.streams} 17 | # streams: 18 | # lang_enc: sbert 19 | batchnorm: False 20 | encoder_name: resnet18 21 | unet_cfg: 22 | decoder_channels: [512, 256, 128, 64, 32] 23 | 24 | defaults: 25 | - pixel_data 26 | - streams: rn_sbert_lingunet 27 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/rn50_bert_pixel.yaml: -------------------------------------------------------------------------------- 1 | # Unet Resnet 18 with respective decoder channels 2 | # BERT as sentence encoder 3 | # Single pixel prediction 4 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector 5 | _recursive_: False 6 | name: rn50_bert_pixel 7 | img_size: 224 8 | 9 | model_cfg: 10 | freeze_encoder: 11 | lang: True 12 | aff: True 13 | depth: True 14 | attn_stream_fusion_type: 'add' 15 | lang_fusion_type: 'mult' 16 | streams: ${aff_detection.streams} 17 | streams: 18 | lang_enc: sbert 19 | batchnorm: False 20 | encoder_name: resnet50 21 | unet_cfg: 22 | decoder_channels: [256, 128, 64, 32] 23 | 24 | defaults: 25 | - pixel_data 26 | - streams: rn_bert_lingunet 27 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/rn50_clip_pixel.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector 2 | _recursive_: False 3 | name: rn50_clip_pixel 4 | img_size: 224 5 | 6 | model_cfg: 7 | batchnorm: False # important: False because batch_size=1 8 | attn_stream_fusion_type: 'add' 9 | lang_fusion_type: 'mult' 10 | streams: ${aff_detection.streams} 11 | freeze_encoder: 12 | lang: True 13 | aff: True 14 | depth: True 15 | defaults: 16 | - pixel_data 17 | - streams: clip_lingunet 18 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/streams/clip_lingunet.yaml: -------------------------------------------------------------------------------- 1 | vision_net: clip 2 | lang_enc: clip 3 | defaults: 4 | - /transforms: clip_color 5 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/streams/r3m_rn18_sbert.yaml: -------------------------------------------------------------------------------- 1 | vision_net: r3m_rn18 2 | lang_enc: sbert 3 | defaults: 4 | - /transforms: r3m 5 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/streams/rn18_clip_lingunet.yaml: -------------------------------------------------------------------------------- 1 | vision_net: rn 2 | lang_enc: clip 3 | defaults: 4 | - /transforms: clip_randShift_color 5 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/streams/rn50_sbert_lingunet.yaml: -------------------------------------------------------------------------------- 1 | vision_net: rn 2 | lang_enc: sbert 3 | defaults: 4 | - /transforms: rgb_randShift_color 5 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/streams/rn_bert_lingunet.yaml: -------------------------------------------------------------------------------- 1 | vision_net: rn 2 | lang_enc: sbert 3 | defaults: 4 | - /transforms: rgb_randShift_color 5 | -------------------------------------------------------------------------------- /conf/affordance/aff_detection/streams/rn_sbert_lingunet.yaml: -------------------------------------------------------------------------------- 1 | vision_net: rn 2 | lang_enc: sbert 3 | defaults: 4 | - /transforms: rgb_randShift_color 5 | -------------------------------------------------------------------------------- /conf/affordance/cameras/cameras/gripper.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.gripper_camera.GripperCamera 2 | name: gripper 3 | fov: 75 4 | aspect: 1 5 | nearval: 0.01 6 | farval: 2 7 | width: 84 8 | height: 84 9 | -------------------------------------------------------------------------------- /conf/affordance/cameras/cameras/opposing.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.static_camera.StaticCamera 2 | name: opposing 3 | fov: 75 4 | aspect: 1 5 | nearval: 0.01 6 | farval: 2 7 | width: 200 8 | height: 200 9 | look_at: [ 0.4, 0.5, 0.6 ] 10 | look_from: [ 0.4, 1.5, 0.9 ] 11 | -------------------------------------------------------------------------------- /conf/affordance/cameras/cameras/static.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.static_camera.StaticCamera 2 | name: static 3 | fov: 10 4 | aspect: 1 5 | nearval: 0.01 6 | farval: 10 7 | width: 300 8 | height: 300 9 | look_at: [ -0.026242351159453392, -0.0302329882979393, 0.3920000493526459] 10 | look_from: [ 2.871459009488717, -2.166602199425597, 2.555159848480571] 11 | up_vector: [ 0.4041403970338857, 0.22629790978217404, 0.8862616969685161] 12 | -------------------------------------------------------------------------------- /conf/affordance/cameras/cameras/static_calvin.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.static_camera.StaticCamera 2 | name: static 3 | fov: 10 4 | aspect: 1 5 | nearval: 0.01 6 | farval: 10 7 | width: 200 8 | height: 200 9 | look_at: [-0.026242351159453392, -0.0302329882979393, 0.3920000493526459] 10 | look_from: [2.871459009488717, -2.166602199425597, 2.555159848480571] 11 | up_vector: [0.4041403970338857, 0.22629790978217404, 0.8862616969685161] 12 | -------------------------------------------------------------------------------- /conf/affordance/cameras/cameras/tactile.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.tactile_sensor.TactileSensor 2 | name: tactile 3 | width: 120 4 | height: 160 5 | digit_link_ids: [10, 12] # ${robot.digit_link_ids} 6 | visualize_gui: true 7 | config_path: conf/digit_sensor/config_digit.yml 8 | -------------------------------------------------------------------------------- /conf/affordance/cameras/high_res.yaml: -------------------------------------------------------------------------------- 1 | static: 2 | _target_: calvin_env.camera.static_camera.StaticCamera 3 | name: static 4 | fov: 10 5 | aspect: 1 6 | nearval: 0.01 7 | farval: 10 8 | width: 500 9 | height: 500 10 | look_at: [-0.026242351159453392, -0.0302329882979393, 0.3920000493526459] 11 | look_from: [ 2.871459009488717, -2.166602199425597, 2.555159848480571] 12 | up_vector: [ 0.4041403970338857, 0.22629790978217404, 0.8862616969685161] 13 | 14 | gripper: 15 | _target_: calvin_env.camera.gripper_camera.GripperCamera 16 | name: gripper 17 | fov: 75 18 | aspect: 1 19 | nearval: 0.01 20 | farval: 2 21 | width: 300 22 | height: 300 23 | -------------------------------------------------------------------------------- /conf/affordance/cameras/no_cameras.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/affordance/cameras/no_cameras.yaml -------------------------------------------------------------------------------- /conf/affordance/cameras/static_and_gripper.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cameras@static: static 3 | - cameras@gripper: gripper 4 | -------------------------------------------------------------------------------- /conf/affordance/cameras/static_and_gripper_calvin.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cameras@static: static_calvin 3 | - cameras@gripper: gripper 4 | -------------------------------------------------------------------------------- /conf/affordance/cameras/static_and_tactile.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cameras@static: static 3 | - cameras@tactile: tactile 4 | -------------------------------------------------------------------------------- /conf/affordance/cfg_datacollection.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - labeling: simulation_lang 3 | - labeling/env@env: env_labeling 4 | - labeling/scene@scene: empty_table 5 | - labeling/robot@robot: panda 6 | - labeling/cameras@cameras: static_and_gripper 7 | - ../paths@paths: general_paths 8 | - override hydra/hydra_logging: colorlog 9 | - override hydra/job_logging: colorlog 10 | 11 | #Environment 12 | output_size: 13 | static: [150, 200] 14 | gripper: 84 15 | mask_on_close: False 16 | save_viz: False 17 | euler_obs: True 18 | frames_before_saving: 5000 19 | viz: false 20 | 21 | language: 22 | folder: lang_paraphrase-MiniLM-L3-v2 23 | file: auto_lang_ann.npy 24 | 25 | # To write all data to a single split 26 | output_cfg: 27 | single_split: null # "validation", "training" 28 | multiclass: False 29 | 30 | # For collecting dataset with playdata 31 | dataset_name: real_world/500k_all_tasks_dataset_15hz 32 | play_data_dir: /export/home/meeso/${dataset_name} 33 | 34 | #Output directory where dataset will be stored 35 | output_dir: ${paths.datasets}/${dataset_name} 36 | 37 | # Finding classes in playdata 38 | task_discovery: 39 | dist_thresh: 0.03 # Max distance to consider that object is the same 40 | sample_freq: 20 # track objects every sample_freq ts 41 | frames_after_move: 3 # Find movement diretion after frames_after_move frames 42 | max_n_episodes: 2 # Find clusters in subset of data 43 | 44 | # Prediction 45 | task_detector: 46 | cluster_info_path: null 47 | dataset_dir: ${output_dir} 48 | k_largest: 2 49 | dims: [0, 1, 2] # x, y, z, r_x, r_y, r_z 50 | clustering_method: Kmeans 51 | params: 52 | n_clusters: 2 53 | random_state: 0 54 | # clustering_method: DBSCAN 55 | # params: 56 | # eps: 0.3 57 | # min_samples: 4 58 | # eps: 0.08 59 | # min_samples: 5 60 | 61 | 62 | 63 | hydra: 64 | run: 65 | dir: ./hydra_outputs/datacollection/${now:%Y-%m-%d}_${now:%H-%M-%S} 66 | -------------------------------------------------------------------------------- /conf/affordance/cfg_merge_dataset.yaml: -------------------------------------------------------------------------------- 1 | # Paths to where episodes_split.json is stored for each dataset 2 | # Relative to the main hulc2 directory 3 | data_lst: 4 | - ../../../datasets/calvin_langDepthEndPt/training 5 | - ../../../datasets/calvin_langDepthEndPt/validation 6 | -------------------------------------------------------------------------------- /conf/affordance/labeling/cameras/cameras/gripper.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.gripper_camera.GripperCamera 2 | name: gripper 3 | fov: 75 4 | aspect: 1 5 | nearval: 0.01 6 | farval: 2 7 | width: 84 8 | height: 84 9 | -------------------------------------------------------------------------------- /conf/affordance/labeling/cameras/cameras/static.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.static_camera.StaticCamera 2 | name: static 3 | fov: 10 4 | aspect: 1 5 | nearval: 0.01 6 | farval: 10 7 | width: 200 8 | height: 200 9 | look_at: [ -0.026242351159453392, -0.0302329882979393, 0.3920000493526459] 10 | look_from: [ 2.871459009488717, -2.166602199425597, 2.555159848480571] 11 | up_vector: [ 0.4041403970338857, 0.22629790978217404, 0.8862616969685161] 12 | -------------------------------------------------------------------------------- /conf/affordance/labeling/cameras/static_and_gripper.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cameras@static: static 3 | - cameras@gripper: gripper 4 | -------------------------------------------------------------------------------- /conf/affordance/labeling/env/env_labeling.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.calvin_env.envs.play_table_env.PlayTableSimEnv 2 | _recursive_: false 3 | cameras: ${cameras} 4 | seed: 0 5 | bullet_time_step: 240.0 6 | use_vr: false 7 | show_gui: false 8 | robot_cfg: ${robot} 9 | scene_cfg: ${scene} 10 | use_scene_info: false 11 | use_egl: false 12 | control_freq: 30 13 | -------------------------------------------------------------------------------- /conf/affordance/labeling/real_world.yaml: -------------------------------------------------------------------------------- 1 | split_by_episodes: True 2 | mode: "real_world_raw" 3 | back_frames: [0, 60] 4 | fixed_pt_del_radius: 0.08 # Meters 5 | remove_blank_mask_instances: True 6 | min_labels: 4 7 | label_size: 8 | static: 17 9 | gripper: 30 10 | -------------------------------------------------------------------------------- /conf/affordance/labeling/real_world_lang.yaml: -------------------------------------------------------------------------------- 1 | split_by_episodes: True 2 | mode: "real_world_processed" 3 | back_frames: [0, 100] 4 | fixed_pt_del_radius: 0.08 # Meters 5 | remove_blank_mask_instances: True 6 | min_labels: 1 7 | label_size: 8 | static: 10 9 | gripper: 30 10 | -------------------------------------------------------------------------------- /conf/affordance/labeling/robot/panda.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.robot.robot.Robot 2 | filename: franka_panda/panda.urdf 3 | base_position: ${scene.robot_base_position} 4 | base_orientation: ${scene.robot_base_orientation} 5 | initial_joint_positions: ${scene.robot_initial_joint_positions} 6 | max_joint_force: 200.0 7 | gripper_force: 200 8 | arm_joint_ids: [0, 1, 2, 3, 4, 5, 6] 9 | gripper_joint_ids: [9, 10] 10 | gripper_joint_limits: [0, 0.04] 11 | tcp_link_id: 13 12 | end_effector_link_id: 7 13 | gripper_cam_link: 12 14 | use_nullspace: false 15 | max_velocity: 2 16 | use_ik_fast: false 17 | magic_scaling_factor_pos: 1 # 1.6 18 | magic_scaling_factor_orn: 1 # 2.2 19 | use_target_pose: true 20 | euler_obs: true 21 | workspace_limits: [[-0.20, 0.35, 0.61], [0.7, 0.85, 1.2]] 22 | max_rel_pos: 0.02 23 | max_rel_orn: 0.05 24 | -------------------------------------------------------------------------------- /conf/affordance/labeling/scene/empty_table.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene 2 | _recursive_: false 3 | data_path: ${paths.vr_data} 4 | global_scaling: 0.8 5 | euler_obs: True 6 | robot_base_position: [0.3, 0.15, 0.45] 7 | robot_base_orientation: [0, 0, 1.5707963] 8 | robot_initial_joint_positions: [-0.3457686708019129, -0.15454379621111053, -0.6607497652179231, -2.431721569843283, -0.12811896258574057, 2.3050911768605884, -0.128854091294185] 9 | surfaces: [] 10 | 11 | objects: 12 | fixed_objects: 13 | table: 14 | file: table/hightable.urdf 15 | initial_pos: [0.3, 0.7, 0.02] 16 | initial_orn: [0, 0, 0] 17 | fixed: true 18 | bin: 19 | file: ais_objects/bin_10_30_50/bin_10_30_50.urdf 20 | initial_pos: [0.7, 0.75, 0.6] 21 | initial_orn: [1.57, 0, 0] 22 | fixed: true 23 | movable_objects: 24 | bowl: 25 | file: 024_bowl/google_16k/textured.urdf 26 | initial_pos: [0.18, 0.58, 0.6230520401985216] 27 | initial_orn: [0, 0, 0] 28 | fixed: false 29 | -------------------------------------------------------------------------------- /conf/affordance/labeling/simulation.yaml: -------------------------------------------------------------------------------- 1 | split_by_episodes: True 2 | mode: "simulation" 3 | back_frames: [5, 50] 4 | fixed_pt_del_radius: 0.09 # Meters 5 | remove_blank_mask_instances: False 6 | min_labels: 4 7 | label_size: 8 | static: 14 9 | gripper: 30 10 | -------------------------------------------------------------------------------- /conf/affordance/labeling/simulation_lang.yaml: -------------------------------------------------------------------------------- 1 | split_by_episodes: True 2 | mode: "simulation" 3 | back_frames: [0, 60] 4 | fixed_pt_del_radius: 0.09 # Meters 5 | remove_blank_mask_instances: True 6 | min_labels: 4 7 | label_size: 8 | static: 14 9 | gripper: 13 10 | -------------------------------------------------------------------------------- /conf/affordance/test_affordance.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - paths: general_paths 3 | - override hydra/hydra_logging: colorlog 4 | - override hydra/job_logging: colorlog 5 | 6 | # 7 | save_viz: True 8 | debug: False 9 | 10 | # folders 11 | checkpoint: 12 | train_folder: ~/logs/hulc2/aff_model/2022-07-02/17-01-30_aff_model 13 | model_name: val_err.ckpt 14 | 15 | dataset_name: calvin_lang_MoCEndPt 16 | aff_detection: 17 | dataset: 18 | _recursive_: False 19 | data_dir: ${paths.datasets}/${dataset_name} 20 | cam: static 21 | data_percent: 1.0 22 | episodes_file: episodes_split.json 23 | img_resize: 24 | static: 224 # clip img size 25 | gripper: 96 26 | all: 100 27 | hough_voting: 28 | skip_pixels: 3 29 | inlier_threshold: 0.8 30 | angle_discretization: 100 31 | inlier_distance: 16 32 | percentage_threshold: 0.4 33 | object_center_kernel_radius: 16 34 | 35 | 36 | dataloader: 37 | num_workers: 4 38 | batch_size: 4 39 | pin_memory: true 40 | 41 | #-- Hydra config --# 42 | hydra_outputs: ./hydra_outputs/aff_preds/ 43 | hydra: 44 | run: 45 | dir: ${hydra_outputs}/${now:%Y-%m-%d}/${now:%H-%M-%S} # Output 46 | -------------------------------------------------------------------------------- /conf/affordance/train_affordance.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - aff_detection: r3m 3 | - ../paths@paths: general_paths 4 | - override hydra/hydra_logging: colorlog 5 | - override hydra/job_logging: colorlog 6 | 7 | save_viz: True 8 | 9 | # folders 10 | run_name: ${aff_detection.name} 11 | load_from_last_ckpt: True 12 | checkpoint: 13 | path: ./ 14 | model_name: last.ckpt 15 | 16 | trainer: 17 | # script configs 18 | accelerator: gpu 19 | devices: 1 20 | strategy: ddp 21 | max_epochs: 30 22 | check_val_every_n_epoch: 1 23 | num_sanity_val_steps: 1 24 | precision: 16 25 | 26 | dataset_name: calvin_lang_MoCEndPt 27 | aff_detection: 28 | depth_dist: gaussian # logistic / gaussian 29 | normalize_depth: True 30 | optimizer: 31 | lr: 1e-4 32 | loss_weights: 33 | aff: 0.1 34 | depth: 0.9 35 | dataset: 36 | _recursive_: False 37 | data_dir: ${paths.datasets}/${dataset_name} 38 | cam: static 39 | data_percent: 1.0 40 | img_resize: 41 | static: 224 42 | gripper: 96 43 | all: 100 44 | 45 | dataloader: 46 | num_workers: 4 47 | batch_size: 32 48 | pin_memory: true 49 | 50 | wandb: 51 | logger: 52 | name: ${run_name} 53 | entity: affordance 54 | project: aff_lang 55 | offline: False 56 | group: ${aff_detection.dataset.cam}_${aff_detection.dataset.data_percent}p 57 | saver: ${aff_detection.wandb_saver} 58 | 59 | #-- Hydra config --# 60 | hydra_outputs: ./hydra_outputs/affordance_model/ 61 | hydra: 62 | run: 63 | dir: ${hydra_outputs}/${now:%Y-%m-%d}/${now:%H-%M-%S} # Output 64 | -------------------------------------------------------------------------------- /conf/affordance/train_depth.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - paths: general_paths 3 | - transforms: clip_real_world 4 | - override hydra/hydra_logging: colorlog 5 | - override hydra/job_logging: colorlog 6 | 7 | save_viz: True 8 | 9 | # folders 10 | run_name: depth 11 | load_from_last_ckpt: True 12 | checkpoint: 13 | path: ./ # ${hydra_outputs}/2022-01-16/14-42-24_aff_rl 14 | model_name: last.ckpt # epoch=49-step=34449.ckpt 15 | 16 | trainer: 17 | # script configs 18 | gpus: -1 19 | max_epochs: 15 20 | check_val_every_n_epoch: 1 21 | num_sanity_val_steps: 1 22 | strategy: ddp 23 | precision: 32 24 | 25 | dataset_name: calvin_lang_MoCEndPt 26 | model: 27 | lr: 1e-05 28 | depth_dist: logistic # logistic / gaussian 29 | lang_fusion_type: mult 30 | normalize_depth: False 31 | 32 | aff_detection: 33 | img_size: 224 34 | 35 | dataset: 36 | _recursive_: False 37 | _target_: hulc2.affordance.datasets.pixel_label.PixeLabelDataLang 38 | transforms: ${transforms} 39 | radius: 40 | static: 16 41 | gripper: 10 42 | data_dir: ${paths.datasets}/${dataset_name} 43 | cam: static 44 | img_resize: 45 | static: 224 # clip img size 46 | gripper: 96 47 | all: 100 48 | 49 | dataloader: 50 | num_workers: 4 51 | batch_size: 32 52 | pin_memory: true 53 | 54 | wandb: 55 | logger: 56 | name: ${run_name} 57 | entity: jessibd 58 | project: depth_est 59 | offline: False 60 | saver: 61 | val_loss: 62 | monitor: 'Validation/total_loss' 63 | save_top_k: 2 64 | mode: min 65 | verbose: True 66 | val_err: 67 | monitor: 'Validation/depth_err' 68 | save_top_k: 2 69 | mode: min 70 | verbose: True 71 | save_last: True 72 | 73 | #-- Hydra config --# 74 | hydra_outputs: ./hydra_outputs/affordance_model/ 75 | hydra: 76 | run: 77 | dir: ${hydra_outputs}/${now:%Y-%m-%d}/${now:%H-%M-%S} # Output 78 | -------------------------------------------------------------------------------- /conf/affordance/transforms/clip.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 5 | # - _target_: hulc2.affordance.datasets.transforms.ColorTransform 6 | # contrast: 0.05 7 | # brightness: 0.05 8 | # hue: 0.02 9 | # prob: 1 10 | - _target_: torchvision.transforms.Normalize 11 | mean: [0.48145466, 0.4578275, 0.40821073] 12 | std: [0.26862954, 0.26130258, 0.27577711] 13 | 14 | validation: 15 | - _target_: torchvision.transforms.Resize 16 | size: ${aff_detection.img_size} 17 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 18 | - _target_: torchvision.transforms.Normalize 19 | mean: [0.48145466, 0.4578275, 0.40821073] 20 | std: [0.26862954, 0.26130258, 0.27577711] 21 | -------------------------------------------------------------------------------- /conf/affordance/transforms/clip_color.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 5 | - _target_: hulc2.affordance.datasets.transforms.ColorTransform 6 | contrast: 0.05 7 | brightness: 0.05 8 | hue: 0.01 9 | prob: 1 10 | - _target_: torchvision.transforms.Normalize 11 | mean: [0.48145466, 0.4578275, 0.40821073] 12 | std: [0.26862954, 0.26130258, 0.27577711] 13 | 14 | validation: 15 | - _target_: torchvision.transforms.Resize 16 | size: ${aff_detection.img_size} 17 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 18 | - _target_: torchvision.transforms.Normalize 19 | mean: [0.48145466, 0.4578275, 0.40821073] 20 | std: [0.26862954, 0.26130258, 0.27577711] 21 | -------------------------------------------------------------------------------- /conf/affordance/transforms/clip_randShift.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | - _target_: hulc2.affordance.datasets.transforms.RandomShiftsAug 5 | pad: 5 6 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 7 | # - _target_: hulc2.affordance.datasets.transforms.ColorTransform 8 | # contrast: 0.05 9 | # brightness: 0.05 10 | # hue: 0.02 11 | # prob: 1 12 | - _target_: torchvision.transforms.Normalize 13 | mean: [0.48145466, 0.4578275, 0.40821073] 14 | std: [0.26862954, 0.26130258, 0.27577711] 15 | 16 | validation: 17 | - _target_: torchvision.transforms.Resize 18 | size: ${aff_detection.img_size} 19 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 20 | - _target_: torchvision.transforms.Normalize 21 | mean: [0.48145466, 0.4578275, 0.40821073] 22 | std: [0.26862954, 0.26130258, 0.27577711] 23 | -------------------------------------------------------------------------------- /conf/affordance/transforms/clip_randShift_color.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | - _target_: hulc2.affordance.datasets.transforms.RandomShiftsAug 5 | pad: 5 6 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 7 | - _target_: hulc2.affordance.datasets.transforms.ColorTransform 8 | contrast: 0.05 9 | brightness: 0.05 10 | hue: 0.01 11 | prob: 1 12 | - _target_: torchvision.transforms.Normalize 13 | mean: [0.48145466, 0.4578275, 0.40821073] 14 | std: [0.26862954, 0.26130258, 0.27577711] 15 | 16 | validation: 17 | - _target_: torchvision.transforms.Resize 18 | size: ${aff_detection.img_size} 19 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 20 | - _target_: torchvision.transforms.Normalize 21 | mean: [0.48145466, 0.4578275, 0.40821073] 22 | std: [0.26862954, 0.26130258, 0.27577711] 23 | -------------------------------------------------------------------------------- /conf/affordance/transforms/clip_real_world.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | # - _target_: hulc2.affordance.datasets.transforms.RandomShiftsAug 5 | # pad: 3 6 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 7 | - _target_: hulc2.affordance.datasets.transforms.ColorTransform 8 | contrast: 0.05 9 | brightness: 0.1 10 | hue: 0.02 11 | prob: 1 12 | - _target_: torchvision.transforms.Normalize 13 | mean: [0.48145466, 0.4578275, 0.40821073] 14 | std: [0.26862954, 0.26130258, 0.27577711] 15 | 16 | validation: 17 | - _target_: torchvision.transforms.Resize 18 | size: ${aff_detection.img_size} 19 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 20 | - _target_: torchvision.transforms.Normalize 21 | mean: [0.48145466, 0.4578275, 0.40821073] 22 | std: [0.26862954, 0.26130258, 0.27577711] 23 | -------------------------------------------------------------------------------- /conf/affordance/transforms/gray.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | - _target_: hulc2.affordance.datasets.transforms.ColorTransform 5 | contrast: 0.05 6 | brightness: 0.05 7 | hue: 0.02 8 | prob: 1 9 | - _target_: torchvision.transforms.Grayscale 10 | num_output_channels: 1 11 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 12 | - _target_: torchvision.transforms.Normalize 13 | mean: [0.5,] 14 | std: [0.5,] 15 | - _target_: hulc2.affordance.datasets.transforms.AddGaussianNoise 16 | mean: [0.0] 17 | std: [0.01] 18 | clip: [-1, 1] 19 | 20 | validation: 21 | - _target_: torchvision.transforms.Resize 22 | size: ${aff_detection.img_size} 23 | - _target_: torchvision.transforms.Grayscale 24 | num_output_channels: 1 25 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor 26 | - _target_: torchvision.transforms.Normalize 27 | mean: [0.5,] 28 | std: [0.5,] 29 | -------------------------------------------------------------------------------- /conf/affordance/transforms/r3m.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | - _target_: hulc2.affordance.datasets.transforms.RandomShiftsAug 5 | pad: 5 6 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 7 | - _target_: hulc2.affordance.datasets.transforms.ColorTransform 8 | contrast: 0.05 9 | brightness: 0.05 10 | hue: 0.02 11 | prob: 1 12 | - _target_: torchvision.transforms.Normalize 13 | mean: [0.485, 0.456, 0.406] 14 | std: [0.229, 0.224, 0.225] 15 | 16 | validation: 17 | # - _target_: torch.nn.Identity 18 | - _target_: torchvision.transforms.Resize 19 | size: ${aff_detection.img_size} 20 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 21 | - _target_: torchvision.transforms.Normalize 22 | mean: [0.485, 0.456, 0.406] 23 | std: [0.229, 0.224, 0.225] 24 | -------------------------------------------------------------------------------- /conf/affordance/transforms/rgb.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 5 | - _target_: torchvision.transforms.Normalize 6 | mean: [0.5,] 7 | std: [0.5,] 8 | - _target_: hulc2.affordance.datasets.transforms.AddGaussianNoise 9 | mean: [0.0] 10 | std: [0.005] 11 | clip: [-1, 1] 12 | 13 | validation: 14 | - _target_: torchvision.transforms.Resize 15 | size: ${aff_detection.img_size} 16 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor 17 | - _target_: torchvision.transforms.Normalize 18 | mean: [0.5,] 19 | std: [0.5,] 20 | -------------------------------------------------------------------------------- /conf/affordance/transforms/rgb_color.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 5 | - _target_: hulc2.affordance.datasets.transforms.ColorTransform 6 | contrast: 0.05 7 | brightness: 0.05 8 | hue: 0.02 9 | prob: 1 10 | - _target_: torchvision.transforms.Normalize 11 | mean: [0.5,] 12 | std: [0.5,] 13 | - _target_: hulc2.affordance.datasets.transforms.AddGaussianNoise 14 | mean: [0.0] 15 | std: [0.005] 16 | clip: [-1, 1] 17 | 18 | validation: 19 | - _target_: torchvision.transforms.Resize 20 | size: ${aff_detection.img_size} 21 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor 22 | - _target_: torchvision.transforms.Normalize 23 | mean: [0.5,] 24 | std: [0.5,] 25 | -------------------------------------------------------------------------------- /conf/affordance/transforms/rgb_randShift.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 5 | - _target_: hulc2.affordance.datasets.transforms.ColorTransform 6 | contrast: 0.05 7 | brightness: 0.05 8 | hue: 0.02 9 | prob: 1 10 | - _target_: torchvision.transforms.Normalize 11 | mean: [0.5,] 12 | std: [0.5,] 13 | - _target_: hulc2.affordance.datasets.transforms.AddGaussianNoise 14 | mean: [0.0] 15 | std: [0.005] 16 | clip: [-1, 1] 17 | 18 | validation: 19 | - _target_: torchvision.transforms.Resize 20 | size: ${aff_detection.img_size} 21 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor 22 | - _target_: torchvision.transforms.Normalize 23 | mean: [0.5,] 24 | std: [0.5,] 25 | -------------------------------------------------------------------------------- /conf/affordance/transforms/rgb_randShift_color.yaml: -------------------------------------------------------------------------------- 1 | training: 2 | - _target_: torchvision.transforms.Resize 3 | size: ${aff_detection.img_size} 4 | - _target_: hulc2.affordance.datasets.transforms.RandomShiftsAug 5 | pad: 5 6 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor # Scale 0-255 to 0-1 7 | - _target_: hulc2.affordance.datasets.transforms.ColorTransform 8 | contrast: 0.05 9 | brightness: 0.05 10 | hue: 0.02 11 | prob: 1 12 | - _target_: torchvision.transforms.Normalize 13 | mean: [0.5,] 14 | std: [0.5,] 15 | # - _target_: hulc2.affordance.datasets.transforms.AddGaussianNoise 16 | # mean: [0.0] 17 | # std: [0.005] 18 | # clip: [-1, 1] 19 | 20 | validation: 21 | - _target_: torchvision.transforms.Resize 22 | size: ${aff_detection.img_size} 23 | - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor 24 | - _target_: torchvision.transforms.Normalize 25 | mean: [0.5,] 26 | std: [0.5,] 27 | -------------------------------------------------------------------------------- /conf/annotations/new_playtable_validation.yaml: -------------------------------------------------------------------------------- 1 | # rotation 2 | rotate_red_block_right: ["take the red block and rotate it to the right"] 3 | rotate_red_block_left: ["take the red block and rotate it to the left"] 4 | rotate_blue_block_right: ["take the blue block and rotate it to the right"] 5 | rotate_blue_block_left: ["take the blue block and rotate it to the left"] 6 | rotate_pink_block_right: ["take the pink block and rotate it to the right"] 7 | rotate_pink_block_left: ["take the pink block and rotate it to the left"] 8 | 9 | # sliding 10 | push_red_block_right: ["go push the red block right"] 11 | push_red_block_left: ["go push the red block left"] 12 | push_blue_block_right: ["go push the blue block right"] 13 | push_blue_block_left: ["go push the blue block left"] 14 | push_pink_block_right: ["go push the pink block right"] 15 | push_pink_block_left: ["go push the pink block left"] 16 | 17 | # open/close 18 | move_slider_left: [ "push the sliding door to the left side"] 19 | move_slider_right: [ "push the sliding door to the right side"] 20 | open_drawer: ["pull the handle to open the drawer"] 21 | close_drawer: ["push the handle to close the drawer"] 22 | 23 | # lifting 24 | lift_red_block_table: ["grasp and lift the red block"] 25 | lift_blue_block_table: ["grasp and lift the blue block"] 26 | lift_pink_block_table: ["grasp and lift the pink block"] 27 | 28 | lift_red_block_slider: [ "lift the red block from the sliding cabinet"] 29 | lift_blue_block_slider: [ "lift the blue block from the sliding cabinet"] 30 | lift_pink_block_slider: [ "lift the pink block from the sliding cabinet"] 31 | 32 | lift_red_block_drawer: ["Take the red block from the drawer"] 33 | lift_blue_block_drawer: ["Take the blue block from the drawer"] 34 | lift_pink_block_drawer: ["Take the pink block from the drawer"] 35 | 36 | place_in_slider: [ "store the grasped block in the sliding cabinet"] 37 | place_in_drawer: [ "store the grasped block in the drawer"] 38 | 39 | push_into_drawer: ["slide the block that it falls into the drawer"] 40 | 41 | stack_block: ["stack the grasped block"] 42 | unstack_block: ["remove the stacked block"] 43 | 44 | turn_on_lightbulb: ["use the switch to turn on the light bulb"] 45 | turn_off_lightbulb: ["use the switch to turn off the light bulb"] 46 | turn_on_led: ["press the button to turn on the led light"] 47 | turn_off_led: ["press the button to turn off the led light"] 48 | -------------------------------------------------------------------------------- /conf/callbacks/calvin_default.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - rollout: default 3 | - rollout_lh: default 4 | - checkpoint: all 5 | - tsne_plot: default 6 | - kl_schedule: constant 7 | - shm_signal: default 8 | -------------------------------------------------------------------------------- /conf/callbacks/checkpoint/all.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint 2 | save_top_k: -1 3 | verbose: True 4 | dirpath: saved_models 5 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}' 6 | -------------------------------------------------------------------------------- /conf/callbacks/checkpoint/clip_loss.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint 2 | save_top_k: 3 3 | verbose: True 4 | monitor: val/val_pred_clip_loss 5 | mode: min 6 | dirpath: saved_models 7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}' 8 | -------------------------------------------------------------------------------- /conf/callbacks/checkpoint/kl.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint 2 | save_top_k: 3 3 | verbose: True 4 | monitor: train/kl_loss 5 | mode: max 6 | dirpath: saved_models 7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}' 8 | -------------------------------------------------------------------------------- /conf/callbacks/checkpoint/lh_sr.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint 2 | save_top_k: 3 3 | verbose: True 4 | monitor: eval_lh/avg_seq_len 5 | mode: max 6 | dirpath: saved_models 7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}' 8 | every_n_epochs: ${callbacks.rollout_lh.rollout_freq} 9 | -------------------------------------------------------------------------------- /conf/callbacks/checkpoint/state_recon.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint 2 | save_top_k: 3 3 | verbose: True 4 | monitor: val/state_recon_loss 5 | mode: min 6 | dirpath: saved_models 7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}' 8 | -------------------------------------------------------------------------------- /conf/callbacks/checkpoint/task_sr.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint 2 | save_top_k: 3 3 | verbose: True 4 | monitor: tasks/average_sr 5 | mode: max 6 | dirpath: saved_models 7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}' 8 | every_n_epochs: ${callbacks.rollout.rollout_freq} 9 | -------------------------------------------------------------------------------- /conf/callbacks/checkpoint/val_action.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint 2 | save_top_k: -1 3 | verbose: True 4 | monitor: val_act/action_loss_pp 5 | mode: min 6 | dirpath: saved_models 7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}' 8 | -------------------------------------------------------------------------------- /conf/callbacks/kl_schedule/constant.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.utils.kl_callbacks.KLConstantSchedule 2 | -------------------------------------------------------------------------------- /conf/callbacks/kl_schedule/linear.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.utils.kl_callbacks.KLLinearSchedule 2 | start_epoch: 10 3 | end_epoch: 50 4 | max_kl_beta: ${loss.kl_beta} 5 | -------------------------------------------------------------------------------- /conf/callbacks/kl_schedule/sigmoid.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: hulc2.utils.kl_callbacks.KLSigmoidSchedule 3 | start_epoch: 10 4 | end_epoch: 50 5 | max_kl_beta: ${loss.kl_beta} 6 | -------------------------------------------------------------------------------- /conf/callbacks/real_world_default.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - checkpoint: all 3 | - kl_schedule: constant 4 | - shm_signal: default 5 | -------------------------------------------------------------------------------- /conf/callbacks/rollout/default.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - tasks: new_playtable_tasks 3 | _target_: hulc2.rollout.rollout.Rollout 4 | _recursive_: false 5 | env_cfg: 6 | _target_: hulc2.wrappers.hulc2_wrapper.Hulc2Wrapper 7 | skip_epochs: 1 8 | rollout_freq: 5 9 | video: true 10 | num_rollouts_per_task: 10 11 | check_percentage_of_batch: 1 # which percentage of sequences do we want to check for possible tasks 12 | replan_freq: 30 13 | ep_len: 120 14 | empty_cache: false 15 | log_video_to_file: false 16 | save_dir: ./videos 17 | start_robot_neutral: false 18 | add_goal_thumbnail: true 19 | min_window_size: ${datamodule.datasets.vision_dataset.min_window_size} 20 | max_window_size: ${datamodule.datasets.vision_dataset.max_window_size} 21 | id_selection_strategy: "select_longest" 22 | lang_folder: ${datamodule.datasets.lang_dataset.lang_folder} 23 | -------------------------------------------------------------------------------- /conf/callbacks/rollout_lh/default.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - /callbacks/rollout/tasks@tasks: new_playtable_tasks 3 | - /annotations@val_annotations: new_playtable_validation 4 | _target_: hulc2.rollout.rollout_long_horizon.RolloutLongHorizon 5 | _recursive_: false 6 | env_cfg: 7 | _target_: calvin_env.envs.play_lmp_wrapper.PlayLMPWrapper 8 | skip_epochs: 1 9 | rollout_freq: 1 10 | num_videos: 16 11 | num_sequences: 128 12 | replan_freq: 30 13 | ep_len: 360 14 | empty_cache: false 15 | log_video_to_file: false 16 | save_dir: ./videos 17 | lang_folder: ${datamodule.datasets.lang_dataset.lang_folder} 18 | debug: false 19 | -------------------------------------------------------------------------------- /conf/callbacks/shm_signal/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.datasets.utils.shared_memory_loader.SignalCallback 2 | -------------------------------------------------------------------------------- /conf/callbacks/tsne_plot/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.visualization.tsne_plot.TSNEPlot 2 | perplexity: 40 3 | n_jobs: 8 4 | plot_percentage: 0.2 5 | opacity: 0.3 6 | marker_size: 5 7 | -------------------------------------------------------------------------------- /conf/cfg_high_level.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - paths: general_paths 3 | - simulation/robot@robot: panda_longer_finger 4 | - simulation/scene@scene: calvin_scene_D 5 | - simulation/env@env: env 6 | - simulation/cameras@cameras: high_res 7 | - simulation/agent@agent: play_lmp 8 | - override hydra/hydra_logging: colorlog 9 | - override hydra/job_logging: colorlog 10 | 11 | data_path: ${paths.vr_data} 12 | model_name: full 13 | 14 | max_timesteps: 364 15 | gripper_offset: [0.0, -0.025, 0.05] 16 | policy_checkpoint: 17 | train_folder: ./trained_agents/lfp 18 | model_name: epoch=30.ckpt 19 | 20 | agent: 21 | viz_obs: True 22 | 23 | aff_detection: 24 | checkpoint: 25 | train_folder: ~/logs/hulc2/aff_ablation/2022-06-15/18-23-49_aff_ablation 26 | # train_folder: ./hydra_outputs/affordance_model/2022-03-09/01-38-55_aff_rl 27 | model_name: val_err.ckpt 28 | 29 | hough_voting: 30 | skip_pixels: 4 31 | inlier_threshold: 0.7 32 | angle_discretization: 100 33 | inlier_distance: 15 34 | percentage_threshold: 0.3 35 | object_center_kernel_radius: 10 36 | 37 | save_dir: ./hydra_outputs/calvin 38 | hydra: 39 | run: 40 | dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S} 41 | -------------------------------------------------------------------------------- /conf/cfg_high_level_rw.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - datamodule: default 3 | - robot: panda_frankx_interface_policy 4 | - env: robot_io_env 5 | - cams: camera_manager 6 | - agent: real_world 7 | - paths: general_paths 8 | - override hydra/hydra_logging: colorlog 9 | - override hydra/job_logging: colorlog 10 | - override datamodule/datasets: vision_only 11 | - _self_ 12 | 13 | data_path: ${paths.vr_data} 14 | model_name: real_world 15 | train_folder: ??? 16 | max_timesteps: 100 17 | agent: 18 | _target_: hulc2.agents.real_world_agent.AffHULCAgent 19 | _recursive_: False 20 | save_viz: False 21 | viz_obs: True 22 | offset: [-0.05, -0.05, 0.13] # Relative to end effector 23 | aff_cfg: 24 | train_folder: ./real_world_checkpoints/aff_model_single 25 | model_name: last.ckpt 26 | 27 | model_free: 28 | train_folder: ./real_world_checkpoints/lang_lfp_single 29 | checkpoint: 17 30 | seed: 42 31 | env: 32 | freq: 15 33 | panda_env_wrapper: 34 | max_rel_pos: 0.02 35 | max_rel_orn: 0.05 36 | 37 | save_dir: ./hydra_outputs/real_world_inference 38 | hydra: 39 | run: 40 | dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S} 41 | searchpath: 42 | - pkg://robot_io.conf 43 | - pkg://hulc2.conf 44 | 45 | sweep: 46 | dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} 47 | subdir: ${hydra.job.override_dirname} 48 | -------------------------------------------------------------------------------- /conf/cfg_low_level.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - callbacks: calvin_default 4 | - datamodule: calvin_default 5 | - model: calvin_hulc++ 6 | - loss: default 7 | - training: default_training 8 | - trainer: play_trainer 9 | - logger: wandb 10 | 11 | - override hydra/job_logging: colorlog 12 | - override hydra/hydra_logging: colorlog 13 | 14 | data_percent: 1 15 | seed: 42 16 | log_dir: ../ 17 | slurm: false 18 | 19 | hydra: 20 | run: 21 | dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} 22 | sweep: 23 | dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} 24 | subdir: ${hydra.job.override_dirname} 25 | job: 26 | config: 27 | override_dirname: 28 | exclude_keys: 29 | - log_dir 30 | - datamodule.root_data_dir 31 | - trainer.gpus 32 | - model.tsne_plot 33 | - datamodule.num_workers 34 | - trainer.limit_train_batches 35 | - trainer.limit_val_batches 36 | - model.action_decoder.load_action_bounds 37 | -------------------------------------------------------------------------------- /conf/cfg_low_level_rw.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - callbacks: real_world_default 4 | - datamodule: real_world_default 5 | - model: real_world_hulc++ 6 | - loss: default 7 | - training: default_training 8 | - trainer: play_trainer 9 | - logger: wandb 10 | 11 | - override hydra/job_logging: colorlog 12 | - override hydra/hydra_logging: colorlog 13 | 14 | data_percent: 1 15 | seed: 42 16 | log_dir: ../ 17 | slurm: false 18 | 19 | hydra: 20 | run: 21 | dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} 22 | sweep: 23 | dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} 24 | subdir: ${hydra.job.override_dirname} 25 | job: 26 | config: 27 | override_dirname: 28 | exclude_keys: 29 | - log_dir 30 | - datamodule.root_data_dir 31 | - trainer.gpus 32 | - model.tsne_plot 33 | - datamodule.num_workers 34 | - trainer.limit_train_batches 35 | - trainer.limit_val_batches 36 | - model.action_decoder.load_action_bounds 37 | -------------------------------------------------------------------------------- /conf/datamodule/calvin_default.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - datasets: vision_lang_shm 3 | - transforms: rand_shift 4 | - proprioception_dims: robot_no_joints #robot_full 5 | - observation_space: lang_rgb_static_gripper_rel_act 6 | _target_: hulc2.datasets.hulc2_sim_data_module.Hulc2SimdDataModule 7 | _recursive_: false 8 | root_data_dir: ??? 9 | action_space: 7 10 | action_max: [1., 1., 1., 1., 1., 1., 1.,] 11 | action_min: [-1., -1., -1., -1., -1., -1., -1] 12 | shuffle_val: false 13 | -------------------------------------------------------------------------------- /conf/datamodule/datasets/lang_dataset/lang.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.datasets.npz_dataset.NpzDataset 2 | key: "lang" 3 | save_format: "npz" 4 | batch_size: 32 5 | min_window_size: 20 6 | max_window_size: 32 7 | proprio_state: ${datamodule.proprioception_dims} 8 | obs_space: ${datamodule.observation_space} 9 | skip_frames: 1 10 | pad: true 11 | lang_folder: "lang_paraphrase-MiniLM-L3-v2" 12 | aux_lang_loss_window: 8 13 | num_workers: 2 14 | data_percent: ${data_percent} 15 | load_lang_embeddings: false 16 | -------------------------------------------------------------------------------- /conf/datamodule/datasets/lang_dataset/lang_shm.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.datasets.shm_dataset.ShmDataset 2 | key: "lang" 3 | save_format: "npz" 4 | batch_size: 32 5 | min_window_size: 20 6 | max_window_size: 32 7 | proprio_state: ${datamodule.proprioception_dims} 8 | obs_space: ${datamodule.observation_space} 9 | skip_frames: 1 10 | pad: true 11 | lang_folder: "lang_paraphrase-MiniLM-L3-v2" 12 | aux_lang_loss_window: 8 13 | num_workers: 2 14 | data_percent: ${data_percent} 15 | load_lang_embeddings: false 16 | -------------------------------------------------------------------------------- /conf/datamodule/datasets/lang_only.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - lang_dataset: lang 3 | -------------------------------------------------------------------------------- /conf/datamodule/datasets/vision_dataset/vision.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.datasets.npz_dataset.NpzDataset 2 | key: "vis" 3 | save_format: "npz" 4 | batch_size: 32 5 | min_window_size: 20 6 | max_window_size: 32 7 | proprio_state: ${datamodule.proprioception_dims} 8 | obs_space: ${datamodule.observation_space} 9 | pad: true 10 | lang_folder: "lang_paraphrase-MiniLM-L3-v2" 11 | num_workers: 8 12 | data_percent: ${data_percent} 13 | load_lang_embeddings: false 14 | -------------------------------------------------------------------------------- /conf/datamodule/datasets/vision_dataset/vision_shm.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.datasets.shm_dataset.ShmDataset 2 | key: "vis" 3 | save_format: "npz" 4 | batch_size: 32 5 | min_window_size: 20 6 | max_window_size: 32 7 | proprio_state: ${datamodule.proprioception_dims} 8 | obs_space: ${datamodule.observation_space} 9 | pad: true 10 | lang_folder: "lang_paraphrase-MiniLM-L3-v2" 11 | num_workers: 2 12 | data_percent: ${data_percent} 13 | load_lang_embeddings: false 14 | -------------------------------------------------------------------------------- /conf/datamodule/datasets/vision_lang.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - vision_dataset: vision 3 | - lang_dataset: lang 4 | -------------------------------------------------------------------------------- /conf/datamodule/datasets/vision_lang_shm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - vision_dataset: vision_shm 3 | - lang_dataset: lang_shm 4 | -------------------------------------------------------------------------------- /conf/datamodule/datasets/vision_only.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - vision_dataset: vision 3 | -------------------------------------------------------------------------------- /conf/datamodule/datasets/vision_only_shm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - vision_dataset: vision_shm 3 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/all_mods_abs_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static', 'rgb_gripper', 'rgb_tactile'] 2 | depth_obs: ['depth_static', 'depth_gripper', 'depth_tactile'] 3 | state_obs: ['robot_obs', 'scene_obs'] 4 | actions: ['actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgb_static_abs_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static'] 2 | depth_obs: [] 3 | state_obs: ['robot_obs'] 4 | actions: ['actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgb_static_gripper_abs_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static', 'rgb_gripper'] 2 | depth_obs: [] 3 | state_obs: ['robot_obs'] 4 | actions: ['actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgb_static_gripper_rel_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static', 'rgb_gripper'] 2 | depth_obs: [] 3 | state_obs: ['robot_obs'] 4 | actions: ['rel_actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgb_static_gripper_rel_gripper_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static', 'rgb_gripper'] 2 | depth_obs: [] 3 | state_obs: ['robot_obs'] 4 | actions: ['rel_actions_gripper'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgb_static_rel_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static'] 2 | depth_obs: [] 3 | state_obs: ['robot_obs'] 4 | actions: ['rel_actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgb_static_robot_scene_abs_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static'] 2 | depth_obs: [] 3 | state_obs: ['robot_obs', 'scene_obs'] 4 | actions: ['actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgb_static_tactile_abs_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static', 'rgb_tactile'] 2 | depth_obs: [] 3 | state_obs: ['robot_obs'] 4 | actions: ['actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgbd_both_abs_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static', 'rgb_gripper'] 2 | depth_obs: ['depth_static', 'depth_gripper'] 3 | state_obs: ['robot_obs'] 4 | actions: ['actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgbd_both_rel_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static', 'rgb_gripper'] 2 | depth_obs: ['depth_static', 'depth_gripper'] 3 | state_obs: ['robot_obs'] 4 | actions: ['rel_actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgbd_static_gripper_rel_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static', 'rgb_gripper'] 2 | depth_obs: ['depth_gripper'] 3 | state_obs: ['robot_obs'] 4 | actions: ['rel_actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/lang_rgbd_static_robot_abs_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static'] 2 | depth_obs: ['depth_static'] 3 | state_obs: ['robot_obs'] 4 | actions: ['actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/rgb_static_abs_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static'] 2 | depth_obs: [] 3 | state_obs: ['robot_obs'] 4 | actions: ['actions'] 5 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/rgb_static_gripper_rel_gripper_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static', 'rgb_gripper'] 2 | depth_obs: [] 3 | state_obs: ['robot_obs'] 4 | actions: ['rel_actions_gripper'] 5 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/rgb_static_robot_scene_abs_act.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: ['rgb_static'] 2 | depth_obs: [] 3 | state_obs: ['robot_obs', 'scene_obs'] 4 | actions: ['actions'] 5 | -------------------------------------------------------------------------------- /conf/datamodule/observation_space/state_only.yaml: -------------------------------------------------------------------------------- 1 | rgb_obs: [] 2 | depth_obs: [] 3 | state_obs: ['robot_obs'] 4 | actions: ['actions'] 5 | language: ['language'] 6 | -------------------------------------------------------------------------------- /conf/datamodule/proprioception_dims/none.yaml: -------------------------------------------------------------------------------- 1 | n_state_obs: 0 2 | keep_indices: [[0, 0]] 3 | robot_orientation_idx: [3, 6] 4 | normalize: False 5 | normalize_robot_orientation: False 6 | -------------------------------------------------------------------------------- /conf/datamodule/proprioception_dims/robot_full.yaml: -------------------------------------------------------------------------------- 1 | n_state_obs: 15 2 | keep_indices: [[0, 15]] 3 | robot_orientation_idx: [3, 6] 4 | normalize: True 5 | normalize_robot_orientation: True 6 | -------------------------------------------------------------------------------- /conf/datamodule/proprioception_dims/robot_no_joints.yaml: -------------------------------------------------------------------------------- 1 | n_state_obs: 8 2 | keep_indices: [[0, 7], [14,15]] 3 | robot_orientation_idx: [3, 6] 4 | normalize: True 5 | normalize_robot_orientation: True 6 | -------------------------------------------------------------------------------- /conf/datamodule/proprioception_dims/robot_no_joints_no_gripper_width.yaml: -------------------------------------------------------------------------------- 1 | n_state_obs: 7 2 | keep_indices: [[0, 6], [14,15]] 3 | robot_orientation_idx: [3, 6] 4 | normalize: True 5 | normalize_robot_orientation: True 6 | -------------------------------------------------------------------------------- /conf/datamodule/proprioception_dims/robot_scene.yaml: -------------------------------------------------------------------------------- 1 | n_state_obs: 54 2 | keep_indices: [[0, 54]] 3 | robot_orientation_idx: [3, 6] 4 | normalize: True 5 | normalize_robot_orientation: True 6 | -------------------------------------------------------------------------------- /conf/datamodule/real_world_default.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - datasets: vision_lang_shm 3 | - transforms: real_world_r3m 4 | - proprioception_dims: robot_no_joints #robot_full 5 | - observation_space: lang_rgb_static_gripper_rel_gripper_act 6 | _target_: hulc2.datasets.hulc2_real_world_data_module.Hulc2RealWorldDataModule 7 | _recursive_: false 8 | root_data_dir: ??? 9 | action_space: 7 10 | action_max: [1., 1., 1., 1., 1., 1., 1.,] 11 | action_min: [-1., -1., -1., -1., -1., -1., -1] 12 | shuffle_val: false 13 | -------------------------------------------------------------------------------- /conf/datamodule/transforms/real_world.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | rgb_static: 3 | # - _target_: torchvision.transforms.Resize 4 | # size: 200 5 | # - _target_: hulc2.utils.transforms.RandomShiftsAug 6 | # pad: 10 7 | - _target_: hulc2.utils.transforms.ScaleImageTensor 8 | - _target_: hulc2.utils.transforms.ColorTransform 9 | contrast: 0.05 10 | brightness: 0.05 11 | hue: 0.02 12 | prob: 1 13 | - _target_: torchvision.transforms.Normalize 14 | mean: [0.5,] 15 | std: [0.5,] 16 | rgb_gripper: 17 | - _target_: torchvision.transforms.Resize 18 | size: 84 19 | - _target_: hulc2.utils.transforms.ScaleImageTensor 20 | - _target_: hulc2.utils.transforms.ColorTransform 21 | contrast: 0.05 22 | brightness: 0.05 23 | hue: 0.02 24 | prob: 1 25 | - _target_: hulc2.utils.transforms.RandomShiftsAug 26 | pad: 4 27 | - _target_: torchvision.transforms.Normalize 28 | mean: [0.5,] 29 | std: [0.5,] 30 | depth_static: 31 | # - _target_: torchvision.transforms.Resize 32 | # size: [200, 200] 33 | - _target_: hulc2.utils.transforms.AddDepthNoise 34 | shape: [1000.0] 35 | rate: [1000.0] 36 | depth_gripper: 37 | - _target_: torchvision.transforms.Resize 38 | size: 84 39 | - _target_: hulc2.utils.transforms.AddGaussianNoise 40 | mean: [ 0.0 ] 41 | std: [ 0.01 ] 42 | robot_obs: 43 | - _target_: hulc2.utils.transforms.NormalizeVector 44 | 45 | # language: 46 | # - _target_: hulc2.utils.transforms.AddGaussianNoise 47 | # mean: [ 0.0 ] 48 | # std: [ 0.01 ] 49 | 50 | 51 | val: 52 | rgb_static: 53 | # - _target_: torchvision.transforms.Resize 54 | # size: 200 55 | - _target_: hulc2.utils.transforms.ScaleImageTensor 56 | - _target_: torchvision.transforms.Normalize 57 | mean: [0.5,] 58 | std: [0.5,] 59 | rgb_gripper: 60 | - _target_: torchvision.transforms.Resize 61 | size: 84 62 | - _target_: hulc2.utils.transforms.ScaleImageTensor 63 | - _target_: torchvision.transforms.Normalize 64 | mean: [0.5,] 65 | std: [0.5,] 66 | depth_static: 67 | - _target_: torchvision.transforms.Resize 68 | size: 200 69 | depth_gripper: 70 | - _target_: torchvision.transforms.Resize 71 | size: 84 72 | robot_obs: 73 | - _target_: hulc2.utils.transforms.NormalizeVector 74 | -------------------------------------------------------------------------------- /conf/datamodule/transforms/real_world_no_rand_shift.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | rgb_static: 3 | # - _target_: torchvision.transforms.Resize 4 | # size: 200 5 | # - _target_: hulc2.utils.transforms.RandomShiftsAug 6 | # pad: 10 7 | - _target_: hulc2.utils.transforms.ScaleImageTensor # Scale image between 0-1 (float) 8 | - _target_: hulc2.utils.transforms.ColorTransform # Maintains the range and image type 9 | contrast: 0.05 10 | brightness: 0.05 11 | hue: 0.02 12 | prob: 1 13 | - _target_: torchvision.transforms.Normalize 14 | mean: [0.5,] 15 | std: [0.5,] 16 | rgb_gripper: 17 | - _target_: torchvision.transforms.Resize 18 | size: 84 19 | - _target_: hulc2.utils.transforms.ScaleImageTensor 20 | - _target_: hulc2.utils.transforms.ColorTransform 21 | contrast: 0.05 22 | brightness: 0.05 23 | hue: 0.02 24 | prob: 1 25 | - _target_: torchvision.transforms.Normalize 26 | mean: [0.5,] 27 | std: [0.5,] 28 | depth_static: 29 | # - _target_: torchvision.transforms.Resize 30 | # size: [200, 200] 31 | - _target_: hulc2.utils.transforms.AddDepthNoise 32 | shape: [1000.0] 33 | rate: [1000.0] 34 | depth_gripper: 35 | - _target_: torchvision.transforms.Resize 36 | size: 84 37 | - _target_: hulc2.utils.transforms.AddGaussianNoise 38 | mean: [ 0.0 ] 39 | std: [ 0.01 ] 40 | robot_obs: 41 | - _target_: hulc2.utils.transforms.NormalizeVector 42 | 43 | # language: 44 | # - _target_: hulc2.utils.transforms.AddGaussianNoise 45 | # mean: [ 0.0 ] 46 | # std: [ 0.01 ] 47 | 48 | 49 | val: 50 | rgb_static: 51 | # - _target_: torchvision.transforms.Resize 52 | # size: 200 53 | - _target_: hulc2.utils.transforms.ScaleImageTensor 54 | - _target_: torchvision.transforms.Normalize 55 | mean: [0.5,] 56 | std: [0.5,] 57 | rgb_gripper: 58 | - _target_: torchvision.transforms.Resize 59 | size: 84 60 | - _target_: hulc2.utils.transforms.ScaleImageTensor 61 | - _target_: torchvision.transforms.Normalize 62 | mean: [0.5,] 63 | std: [0.5,] 64 | depth_static: 65 | - _target_: torchvision.transforms.Resize 66 | size: 200 67 | depth_gripper: 68 | - _target_: torchvision.transforms.Resize 69 | size: 84 70 | robot_obs: 71 | - _target_: hulc2.utils.transforms.NormalizeVector 72 | -------------------------------------------------------------------------------- /conf/datamodule/transforms/real_world_r3m.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | rgb_static: 3 | # - _target_: torchvision.transforms.Resize 4 | # size: 200 5 | # - _target_: hulc2.utils.transforms.RandomShiftsAug 6 | # pad: 10 7 | - _target_: hulc2.utils.transforms.ScaleImageTensor 8 | - _target_: hulc2.utils.transforms.ColorTransform 9 | contrast: 0.05 10 | brightness: 0.05 11 | hue: 0.02 12 | prob: 1 13 | - _target_: hulc2.utils.transforms.UpScaleImageTensor 14 | rgb_gripper: 15 | - _target_: torchvision.transforms.Resize 16 | size: 84 17 | - _target_: hulc2.utils.transforms.ScaleImageTensor 18 | - _target_: hulc2.utils.transforms.ColorTransform 19 | contrast: 0.05 20 | brightness: 0.05 21 | hue: 0.02 22 | prob: 1 23 | - _target_: hulc2.utils.transforms.RandomShiftsAug 24 | pad: 4 25 | - _target_: torchvision.transforms.Normalize 26 | mean: [0.5,] 27 | std: [0.5,] 28 | depth_static: 29 | # - _target_: torchvision.transforms.Resize 30 | # size: [200, 200] 31 | - _target_: hulc2.utils.transforms.AddDepthNoise 32 | shape: [1000.0] 33 | rate: [1000.0] 34 | depth_gripper: 35 | - _target_: torchvision.transforms.Resize 36 | size: 84 37 | - _target_: hulc2.utils.transforms.AddGaussianNoise 38 | mean: [ 0.0 ] 39 | std: [ 0.01 ] 40 | robot_obs: 41 | - _target_: hulc2.utils.transforms.NormalizeVector 42 | 43 | # language: 44 | # - _target_: hulc2.utils.transforms.AddGaussianNoise 45 | # mean: [ 0.0 ] 46 | # std: [ 0.01 ] 47 | 48 | 49 | val: 50 | # rgb_static: 51 | # - _target_: torchvision.transforms.Resize 52 | # size: 200 53 | # - _target_: hulc2.utils.transforms.ScaleImageTensor 54 | # - _target_: torchvision.transforms.Normalize 55 | # mean: [0.5,] 56 | # std: [0.5,] 57 | rgb_gripper: 58 | - _target_: torchvision.transforms.Resize 59 | size: 84 60 | - _target_: hulc2.utils.transforms.ScaleImageTensor 61 | - _target_: torchvision.transforms.Normalize 62 | mean: [0.5,] 63 | std: [0.5,] 64 | depth_static: 65 | - _target_: torchvision.transforms.Resize 66 | size: 200 67 | depth_gripper: 68 | - _target_: torchvision.transforms.Resize 69 | size: 84 70 | robot_obs: 71 | - _target_: hulc2.utils.transforms.NormalizeVector 72 | -------------------------------------------------------------------------------- /conf/datamodule/transforms/real_world_square.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | rgb_static: 3 | - _target_: torchvision.transforms.Resize 4 | size: [150, 150] 5 | - _target_: hulc2.utils.transforms.RandomShiftsAug 6 | pad: 6 7 | - _target_: hulc2.utils.transforms.ScaleImageTensor 8 | - _target_: hulc2.utils.transforms.ColorTransform 9 | contrast: 0.05 10 | brightness: 0.05 11 | hue: 0.02 12 | prob: 1 13 | - _target_: torchvision.transforms.Normalize 14 | mean: [0.5,] 15 | std: [0.5,] 16 | rgb_gripper: 17 | - _target_: torchvision.transforms.Resize 18 | size: 84 19 | - _target_: hulc2.utils.transforms.ScaleImageTensor 20 | - _target_: hulc2.utils.transforms.ColorTransform 21 | contrast: 0.05 22 | brightness: 0.05 23 | hue: 0.02 24 | prob: 1 25 | - _target_: hulc2.utils.transforms.RandomShiftsAug 26 | pad: 4 27 | - _target_: torchvision.transforms.Normalize 28 | mean: [0.5,] 29 | std: [0.5,] 30 | depth_static: 31 | # - _target_: torchvision.transforms.Resize 32 | # size: [200, 200] 33 | - _target_: hulc2.utils.transforms.AddDepthNoise 34 | shape: [1000.0] 35 | rate: [1000.0] 36 | depth_gripper: 37 | - _target_: torchvision.transforms.Resize 38 | size: 84 39 | - _target_: hulc2.utils.transforms.AddGaussianNoise 40 | mean: [ 0.0 ] 41 | std: [ 0.01 ] 42 | robot_obs: 43 | - _target_: hulc2.utils.transforms.NormalizeVector 44 | 45 | # language: 46 | # - _target_: hulc2.utils.transforms.AddGaussianNoise 47 | # mean: [ 0.0 ] 48 | # std: [ 0.01 ] 49 | 50 | 51 | val: 52 | rgb_static: 53 | - _target_: torchvision.transforms.Resize 54 | size: [150, 150] 55 | - _target_: hulc2.utils.transforms.ScaleImageTensor 56 | - _target_: torchvision.transforms.Normalize 57 | mean: [0.5,] 58 | std: [0.5,] 59 | rgb_gripper: 60 | - _target_: torchvision.transforms.Resize 61 | size: 84 62 | - _target_: hulc2.utils.transforms.ScaleImageTensor 63 | - _target_: torchvision.transforms.Normalize 64 | mean: [0.5,] 65 | std: [0.5,] 66 | depth_static: 67 | - _target_: torchvision.transforms.Resize 68 | size: 200 69 | depth_gripper: 70 | - _target_: torchvision.transforms.Resize 71 | size: 84 72 | robot_obs: 73 | - _target_: hulc2.utils.transforms.NormalizeVector 74 | -------------------------------------------------------------------------------- /conf/inference/config_inference.yaml: -------------------------------------------------------------------------------- 1 | train_folder: ??? # config path to the config.yaml of the training folder (in .hydra) 2 | load_checkpoint: ??? 3 | seed: 42 4 | log_dir: /tmp 5 | visualize: True 6 | ep_len: 120 7 | replan_freq: 30 8 | processes: 1 9 | 10 | hydra: 11 | run: 12 | dir: ${log_dir}/inference_runs/${now:%Y-%m-%d}/${now:%H-%M-%S} 13 | 14 | defaults: 15 | - override hydra/job_logging: colorlog 16 | - override hydra/hydra_logging: colorlog 17 | -------------------------------------------------------------------------------- /conf/inference_real.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - datamodule: default 3 | - env: env 4 | - robot: panda_frankx_interface_policy 5 | - cams: camera_manager 6 | - override hydra/job_logging: colorlog 7 | - override hydra/hydra_logging: colorlog 8 | 9 | seed: 42 10 | log_dir: ../ 11 | slurm: false 12 | env: 13 | freq: 15 14 | train_folder: ??? 15 | checkpoint: ??? 16 | 17 | hydra: 18 | searchpath: 19 | - pkg://robot_io.conf 20 | run: 21 | dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} 22 | sweep: 23 | dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} 24 | subdir: ${hydra.job.override_dirname} 25 | job: 26 | config: 27 | override_dirname: 28 | exclude_keys: 29 | - log_dir 30 | - datamodule.root_data_dir 31 | - trainer.gpus 32 | - model.tsne_plot 33 | - datamodule.num_workers 34 | - trainer.limit_train_batches 35 | - trainer.limit_val_batches 36 | - model.action_decoder.load_action_bounds 37 | -------------------------------------------------------------------------------- /conf/lang_ann.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - callbacks: default 3 | - datamodule: default 4 | - model: sbert 5 | - loss: default 6 | - training: default_training 7 | - trainer: play_trainer 8 | - logger: wandb 9 | - annotations@train_instructions: new_playtable 10 | - annotations@val_instructions: new_playtable_validation 11 | 12 | - override hydra/job_logging: colorlog 13 | - override hydra/hydra_logging: colorlog 14 | #- override datamodule/observation_space: state_only 15 | #- override datamodule/datasets: vision_only 16 | - _self_ 17 | 18 | seed: 42 19 | log_dir: ../ 20 | slurm: false 21 | eps: 0.01 22 | postprocessing: true 23 | lang_folder: "lang_paraphrase-MiniLM-L3-v2_singleTasks" 24 | with_text: false 25 | reannotate: false 26 | prior_steps_window: 16 27 | validation_scene: calvin_scene_D 28 | datamodule: 29 | datasets: 30 | vision_dataset: 31 | min_window_size: 64 32 | max_window_size: 64 33 | 34 | hydra: 35 | run: 36 | dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname} 37 | sweep: 38 | dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} 39 | subdir: ${hydra.job.override_dirname} 40 | job: 41 | config: 42 | override_dirname: 43 | exclude_keys: 44 | - log_dir 45 | - datamodule.root_data_dir 46 | - trainer.gpus 47 | - model.tsne_plot 48 | - datamodule.num_workers 49 | - trainer.limit_train_batches 50 | - trainer.limit_val_batches 51 | - model.decoder.load_action_bounds 52 | -------------------------------------------------------------------------------- /conf/logger/tb_logger.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.loggers.TensorBoardLogger 2 | save_dir: . 3 | name: play_lmp 4 | version: "" 5 | -------------------------------------------------------------------------------- /conf/logger/wandb.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.loggers.WandbLogger 2 | save_dir: . 3 | name: play_lmp 4 | group: play_lmp 5 | log_model: false 6 | project: "multi_play" 7 | id: ??? 8 | -------------------------------------------------------------------------------- /conf/loss/default.yaml: -------------------------------------------------------------------------------- 1 | kl_beta: 0.01 2 | state_recon_beta: 0.5 3 | kl_balancing_mix: 0.8 4 | bc_z_auxiliary_loss_beta: 1.0 5 | mia_auxiliary_loss_beta: 1.0 6 | clip_auxiliary_loss_beta: 3.0 7 | -------------------------------------------------------------------------------- /conf/model/action_decoder/deterministic.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.decoders.deterministic_decoder.DeterministicDecoder 2 | hidden_size: 2048 3 | out_features: ${datamodule.action_space} 4 | policy_rnn_dropout_p: 0.0 5 | perceptual_features: ?? 6 | latent_goal_features: ${model.visual_goal.latent_goal_features} 7 | plan_features: ??? 8 | criterion: HuberLoss # MSELoss 9 | num_layers: 2 10 | rnn_model: rnn_decoder 11 | perceptual_emb_slice: [64, 128] 12 | gripper_control: true 13 | -------------------------------------------------------------------------------- /conf/model/action_decoder/logistic_decoder_rnn_calvin.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.decoders.logistic_decoder_rnn.LogisticDecoderRNN 2 | n_mixtures: 10 3 | hidden_size: 2048 4 | out_features: ${datamodule.action_space} 5 | log_scale_min: -7.0 6 | act_max_bound: ${datamodule.action_max} 7 | act_min_bound: ${datamodule.action_min} 8 | dataset_dir: ${datamodule.root_data_dir} 9 | load_action_bounds: false 10 | num_classes: 10 11 | latent_goal_features: ${model.visual_goal.latent_goal_features} 12 | plan_features: ??? 13 | perceptual_features: ??? 14 | gripper_alpha: 1.0 15 | perceptual_emb_slice: [64, 128] 16 | policy_rnn_dropout_p: 0.0 17 | num_layers: 2 18 | rnn_model: rnn_decoder 19 | gripper_control: true 20 | discrete_gripper: true 21 | -------------------------------------------------------------------------------- /conf/model/action_decoder/logistic_decoder_rnn_real_world.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.decoders.logistic_decoder_rnn.LogisticDecoderRNN 2 | n_mixtures: 10 3 | hidden_size: 2048 4 | out_features: ${datamodule.action_space} 5 | log_scale_min: -7.0 6 | act_max_bound: ${datamodule.action_max} 7 | act_min_bound: ${datamodule.action_min} 8 | dataset_dir: ${datamodule.root_data_dir} 9 | load_action_bounds: false 10 | num_classes: 10 11 | latent_goal_features: ${model.visual_goal.latent_goal_features} 12 | plan_features: ??? 13 | perceptual_features: ??? 14 | gripper_alpha: 1.0 15 | perceptual_emb_slice: [0, 128] 16 | policy_rnn_dropout_p: 0.0 17 | num_layers: 2 18 | rnn_model: rnn_decoder 19 | gripper_control: false 20 | discrete_gripper: true 21 | -------------------------------------------------------------------------------- /conf/model/calvin_hulc++.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - perceptual_encoder: gripper_cam 3 | - plan_proposal: default 4 | - plan_recognition: transformers 5 | - distribution: discrete 6 | - visual_goal: default 7 | - language_encoder: sbert 8 | - language_goal: default 9 | - action_decoder: logistic_decoder_rnn_calvin 10 | - optimizer: adam 11 | - lr_scheduler: constant 12 | - proj_vis_lang: default 13 | 14 | _target_: hulc2.models.hulc2.Hulc2 15 | _recursive_: false 16 | 17 | kl_beta: ${loss.kl_beta} 18 | kl_balancing_mix: ${loss.kl_balancing_mix} 19 | replan_freq: 30 20 | use_clip_auxiliary_loss: true 21 | clip_auxiliary_loss_beta: ${loss.clip_auxiliary_loss_beta} 22 | -------------------------------------------------------------------------------- /conf/model/clip_lang.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.encoders.clip_lang_encoder.LangClip 2 | freeze_backbone: true 3 | model_name: "RN50" # "RN101", "RN50x4", "RN50x16", "ViT-B/32", "ViT-B/16" 4 | -------------------------------------------------------------------------------- /conf/model/distribution/continuous.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.utils.distributions.Distribution 2 | dist: "continuous" 3 | plan_features: 256 4 | -------------------------------------------------------------------------------- /conf/model/distribution/discrete.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.utils.distributions.Distribution 2 | dist: "discrete" 3 | category_size: 32 4 | class_size: 32 5 | -------------------------------------------------------------------------------- /conf/model/gcbc.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - calvin_hulc++ 3 | 4 | _target_: hulc2.models.gcbc.GCBC 5 | -------------------------------------------------------------------------------- /conf/model/language_encoder/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.encoders.lang_encoder.LanguageEncoder 2 | language_features: 384 3 | hidden_size: 2048 4 | out_features: 256 5 | word_dropout_p: 0.0 6 | activation_function: ReLU #ELU 7 | -------------------------------------------------------------------------------- /conf/model/language_encoder/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/language_encoder/none.yaml -------------------------------------------------------------------------------- /conf/model/language_encoder/sbert.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.affordance.models.language_encoders.sbert_lang_encoder.SBertLang 2 | freeze_backbone: True 3 | nlp_model: "paraphrase-MiniLM-L3-v2" 4 | -------------------------------------------------------------------------------- /conf/model/language_goal/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.encoders.goal_encoders.LanguageGoalEncoder 2 | in_features: 384 3 | hidden_size: 2048 4 | latent_goal_features: 32 5 | l2_normalize_goal_embeddings: False 6 | activation_function: ReLU #ELU 7 | word_dropout_p: 0.0 8 | -------------------------------------------------------------------------------- /conf/model/language_goal/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/language_goal/none.yaml -------------------------------------------------------------------------------- /conf/model/lr_scheduler/constant.yaml: -------------------------------------------------------------------------------- 1 | _target_: transformers.get_constant_schedule 2 | -------------------------------------------------------------------------------- /conf/model/lr_scheduler/cosine_schedule_with_warmup.yaml: -------------------------------------------------------------------------------- 1 | _target_: transformers.get_cosine_schedule_with_warmup 2 | num_training_steps: -1 # -1 specifies to infer number of training steps 3 | num_warmup_steps: 0.1 # float values determines percentage of training steps to use as warmup 4 | num_cycles: 0.5 5 | -------------------------------------------------------------------------------- /conf/model/lr_scheduler/linear_schedule_with_warmup.yaml: -------------------------------------------------------------------------------- 1 | _target_: transformers.get_linear_schedule_with_warmup 2 | num_training_steps: -1 # -1 specifies to infer number of training steps 3 | num_warmup_steps: 0.1 # float values determines percentage of training steps to use as warmup 4 | -------------------------------------------------------------------------------- /conf/model/optimizer/adam.yaml: -------------------------------------------------------------------------------- 1 | _target_: torch.optim.Adam 2 | lr: ${training.lr} 3 | #weight_decay: 1e-6 4 | -------------------------------------------------------------------------------- /conf/model/optimizer/adamw.yaml: -------------------------------------------------------------------------------- 1 | _target_: torch.optim.AdamW 2 | lr: ${training.lr} 3 | weight_decay: 1e-6 4 | #amsgrad: False 5 | -------------------------------------------------------------------------------- /conf/model/optimizer/sgd.yaml: -------------------------------------------------------------------------------- 1 | _target_: torch.optim.SGD 2 | lr: ${training.lr} 3 | momentum: 0.9 4 | #weight_decay: 0.0005 5 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/RGBD_both.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders 2 | _recursive_: false 3 | 4 | defaults: 5 | - rgb_static: default 6 | - rgb_gripper: default 7 | - depth_static: default 8 | - depth_gripper: default 9 | - proprio: none 10 | - tactile: none 11 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders 2 | _recursive_: false 3 | 4 | defaults: 5 | - rgb_static: default 6 | - rgb_gripper: none 7 | - depth_static: none 8 | - depth_gripper: none 9 | - proprio: none 10 | - tactile: none 11 | - state_decoder: none 12 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/depth_gripper/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_network_gripper.VisionNetwork 2 | input_width: 84 3 | input_height: 84 4 | activation_function: ReLU #ELU 5 | dropout_vis_fc: 0.0 6 | l2_normalize_output: false 7 | visual_features: 64 8 | conv_encoder: nature_cnn 9 | num_c: 1 10 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/depth_gripper/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/depth_gripper/none.yaml -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/depth_static/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_network.VisionNetwork 2 | input_width: 200 3 | input_height: 200 4 | activation_function: ReLU #ELU 5 | dropout_vis_fc: 0.0 6 | l2_normalize_output: false 7 | visual_features: 64 8 | num_c: 1 9 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/depth_static/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/depth_static/none.yaml -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/gripper_cam.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders 2 | _recursive_: false 3 | 4 | defaults: 5 | - rgb_static: r3m 6 | - rgb_gripper: default 7 | - depth_static: none 8 | - depth_gripper: none 9 | - proprio: none 10 | - tactile: none 11 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/proprio/identity.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.proprio_encoder.IdentityEncoder 2 | proprioception_dims: ${datamodule.proprioception_dims} 3 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/proprio/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/proprio/none.yaml -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/resnet_aff.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders 2 | _recursive_: false 3 | 4 | defaults: 5 | - rgb_static: resnet_aff 6 | - rgb_gripper: resnet_aff 7 | - depth_static: none 8 | - depth_gripper: none 9 | - proprio: none 10 | - tactile: none 11 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_gripper/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_network_gripper.VisionNetwork 2 | input_width: 84 3 | input_height: 84 4 | activation_function: ReLU #ELU 5 | dropout_vis_fc: 0.0 6 | l2_normalize_output: false 7 | visual_features: 64 8 | conv_encoder: nature_cnn 9 | num_c: 3 10 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_gripper/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/rgb_gripper/none.yaml -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_gripper/r3m.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_r3m.VisionR3M 2 | visual_features: 64 3 | freeze_backbone: True 4 | resnet_model: "resnet18" 5 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_gripper/resnet.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_resnet.VisionResnet 2 | visual_features: 64 3 | freeze_backbone: True 4 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_gripper/resnet_aff.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_resnet_aff.VisionResnetAff 2 | visual_features: 64 3 | freeze_backbone: True 4 | input_shape: [84, 84, 3] 5 | depth: 3 6 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_static/clip.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_clip.VisionClip 2 | visual_features: 64 3 | freeze_backbone: true 4 | model_name: "RN50" # "RN101", "RN50x4", "RN50x16", "ViT-B/32", "ViT-B/16" 5 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_static/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_network.VisionNetwork 2 | input_width: 200 3 | input_height: 150 4 | activation_function: ReLU #ELU 5 | dropout_vis_fc: 0.0 6 | l2_normalize_output: false 7 | visual_features: 64 8 | num_c: 3 9 | use_sinusoid: false 10 | spatial_softmax_temp: 1.0 11 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_static/r3m.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_r3m.VisionR3M 2 | visual_features: 64 3 | freeze_backbone: True 4 | resnet_model: "resnet18" 5 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_static/resnet.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_resnet.VisionResnet 2 | visual_features: 64 3 | freeze_backbone: True 4 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_static/resnet_aff.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_resnet_aff.VisionResnetAff 2 | visual_features: 64 3 | freeze_backbone: True 4 | input_shape: [200, 200, 3] 5 | depth: 3 6 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/rgb_static/vision_conv.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.vision_network_conv.VisionNetworkConv 2 | activation_function: ReLU #ELU 3 | dropout_vis_fc: 0.0 4 | l2_normalize_output: false 5 | visual_features: 64 6 | num_c: 3 7 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/state_decoder/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.auxiliary_loss_networks.StateDecoder 2 | visual_features: 64 3 | n_state_obs: 8 4 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/state_decoder/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/state_decoder/none.yaml -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/static_RGBD.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders 2 | _recursive_: false 3 | 4 | defaults: 5 | - rgb_static: default 6 | - rgb_gripper: none 7 | - depth_static: default 8 | - depth_gripper: none 9 | - proprio: none 10 | - tactile: none 11 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/static_RGB_tactile.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders 2 | _recursive_: false 3 | 4 | defaults: 5 | - rgb_static: default 6 | - rgb_gripper: none 7 | - depth_static: none 8 | - depth_gripper: none 9 | - proprio: none 10 | - tactile: default 11 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/tactile/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin.models.perceptual_encoders.tactile_encoder.TactileEncoder 2 | visual_features: 64 3 | -------------------------------------------------------------------------------- /conf/model/perceptual_encoder/tactile/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/tactile/none.yaml -------------------------------------------------------------------------------- /conf/model/plan_proposal/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.plan_encoders.plan_proposal_net.PlanProposalNetwork 2 | perceptual_features: ??? 3 | latent_goal_features: ${model.visual_goal.latent_goal_features} 4 | plan_features: ??? 5 | activation_function: ReLU #ELU 6 | hidden_size: 2048 7 | -------------------------------------------------------------------------------- /conf/model/plan_recognition/bilstm.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.plan_encoders.plan_recognition_net.PlanRecognitionBiLSTMNetwork 2 | in_features: ??? 3 | plan_features: 256 4 | action_space: ${datamodule.action_space} 5 | birnn_dropout_p: 0.0 6 | -------------------------------------------------------------------------------- /conf/model/plan_recognition/birnn.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.plan_encoders.plan_recognition_net.PlanRecognitionBiRNNNetwork 2 | in_features: ??? 3 | plan_features: 256 4 | action_space: ${datamodule.action_space} 5 | birnn_dropout_p: 0.0 6 | -------------------------------------------------------------------------------- /conf/model/plan_recognition/transformers.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.plan_encoders.plan_recognition_net.PlanRecognitionTransformersNetwork 2 | num_heads: 8 3 | num_layers: 2 4 | encoder_hidden_size: 2048 5 | fc_hidden_size: 4096 6 | in_features: ?? 7 | plan_features: ??? 8 | action_space: ${datamodule.action_space} 9 | dropout_p: 0.1 10 | encoder_normalize: false 11 | positional_normalize: false 12 | position_embedding: true 13 | max_position_embeddings: ${datamodule.datasets.lang_dataset.max_window_size} 14 | -------------------------------------------------------------------------------- /conf/model/proj_vis_lang/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.auxiliary_loss_networks.proj_vis_lang.ProjVisLang 2 | im_dim: ${model.plan_recognition.fc_hidden_size} 3 | lang_dim: ${model.language_goal.latent_goal_features} 4 | output_dim: ${model.language_goal.latent_goal_features} 5 | proj_lang: true 6 | -------------------------------------------------------------------------------- /conf/model/proj_vis_lang/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/proj_vis_lang/none.yaml -------------------------------------------------------------------------------- /conf/model/real_world_hulc++.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - perceptual_encoder: gripper_cam 3 | - plan_proposal: default 4 | - plan_recognition: transformers 5 | - distribution: discrete 6 | - visual_goal: default 7 | - language_encoder: sbert 8 | - language_goal: default 9 | - action_decoder: logistic_decoder_rnn_real_world 10 | - optimizer: adam 11 | - lr_scheduler: constant 12 | - proj_vis_lang: none 13 | 14 | _target_: hulc2.models.hulc2.Hulc2 15 | _recursive_: false 16 | 17 | kl_beta: ${loss.kl_beta} 18 | kl_balancing_mix: ${loss.kl_balancing_mix} 19 | replan_freq: 30 20 | use_clip_auxiliary_loss: false 21 | clip_auxiliary_loss_beta: ${loss.clip_auxiliary_loss_beta} 22 | -------------------------------------------------------------------------------- /conf/model/sbert.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.encoders.language_network.SBert 2 | freeze_backbone: True 3 | nlp_model: "paraphrase-MiniLM-L3-v2" 4 | -------------------------------------------------------------------------------- /conf/model/visual_goal/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.models.encoders.goal_encoders.VisualGoalEncoder 2 | in_features: ??? 3 | hidden_size: 2048 4 | latent_goal_features: 32 5 | l2_normalize_goal_embeddings: False 6 | activation_function: ReLU #ELU 7 | -------------------------------------------------------------------------------- /conf/paths/general_paths.yaml: -------------------------------------------------------------------------------- 1 | parent_folder: ../ 2 | vr_data: ${paths.parent_folder}/VREnv/data/ 3 | 4 | # Trained affordance models 5 | trained_models: ${paths.vapo_path}/trained_models/ 6 | 7 | # For training affordance model and policy 8 | datasets: /tmp/datasets/ 9 | -------------------------------------------------------------------------------- /conf/simulation/agent/base.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.agents.base_agent.BaseAgent 2 | _recursive_: False 3 | -------------------------------------------------------------------------------- /conf/simulation/agent/baseline.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.agents.lmp_agent.PlayLMPAgent 2 | _recursive_: False 3 | dataset_path: ${paths.datasets}/unprocessed/task_D_D 4 | checkpoint: 5 | train_folder: ./trained_agents/D_D_static_rgb_baseline 6 | model_name: mcil_baseline.ckpt 7 | offset: ${gripper_offset} 8 | -------------------------------------------------------------------------------- /conf/simulation/agent/play_lmp.yaml: -------------------------------------------------------------------------------- 1 | _target_: hulc2.agents.lmp_agent.PlayLMPAgent 2 | _recursive_: False 3 | dataset_path: ${paths.datasets}/unprocessed/task_D_D 4 | move_outside: False 5 | checkpoint: 6 | train_folder: ./trained_agents/lfp 7 | model_name: epoch=30.ckpt 8 | offset: ${gripper_offset} 9 | -------------------------------------------------------------------------------- /conf/simulation/cameras/cameras/gripper.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.gripper_camera.GripperCamera 2 | name: gripper 3 | fov: 75 4 | aspect: 1 5 | nearval: 0.01 6 | farval: 2 7 | width: 84 8 | height: 84 9 | -------------------------------------------------------------------------------- /conf/simulation/cameras/cameras/opposing.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.static_camera.StaticCamera 2 | name: opposing 3 | fov: 75 4 | aspect: 1 5 | nearval: 0.01 6 | farval: 2 7 | width: 200 8 | height: 200 9 | look_at: [ 0.4, 0.5, 0.6 ] 10 | look_from: [ 0.4, 1.5, 0.9 ] 11 | -------------------------------------------------------------------------------- /conf/simulation/cameras/cameras/static.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.static_camera.StaticCamera 2 | name: static 3 | fov: 10 4 | aspect: 1 5 | nearval: 0.01 6 | farval: 10 7 | width: 300 8 | height: 300 9 | look_at: [ -0.026242351159453392, -0.0302329882979393, 0.3920000493526459] 10 | look_from: [ 2.871459009488717, -2.166602199425597, 2.555159848480571] 11 | up_vector: [ 0.4041403970338857, 0.22629790978217404, 0.8862616969685161] 12 | -------------------------------------------------------------------------------- /conf/simulation/cameras/cameras/static_calvin.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.static_camera.StaticCamera 2 | name: static 3 | fov: 10 4 | aspect: 1 5 | nearval: 0.01 6 | farval: 10 7 | width: 200 8 | height: 200 9 | look_at: [-0.026242351159453392, -0.0302329882979393, 0.3920000493526459] 10 | look_from: [2.871459009488717, -2.166602199425597, 2.555159848480571] 11 | up_vector: [0.4041403970338857, 0.22629790978217404, 0.8862616969685161] 12 | -------------------------------------------------------------------------------- /conf/simulation/cameras/cameras/tactile.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.camera.tactile_sensor.TactileSensor 2 | name: tactile 3 | width: 120 4 | height: 160 5 | digit_link_ids: [10, 12] # ${robot.digit_link_ids} 6 | visualize_gui: true 7 | config_path: conf/digit_sensor/config_digit.yml 8 | -------------------------------------------------------------------------------- /conf/simulation/cameras/high_res.yaml: -------------------------------------------------------------------------------- 1 | static: 2 | _target_: calvin_env.camera.static_camera.StaticCamera 3 | name: static 4 | fov: 10 5 | aspect: 1 6 | nearval: 0.01 7 | farval: 10 8 | width: 500 9 | height: 500 10 | look_at: [-0.026242351159453392, -0.0302329882979393, 0.3920000493526459] 11 | look_from: [ 2.871459009488717, -2.166602199425597, 2.555159848480571] 12 | up_vector: [ 0.4041403970338857, 0.22629790978217404, 0.8862616969685161] 13 | 14 | gripper: 15 | _target_: calvin_env.camera.gripper_camera.GripperCamera 16 | name: gripper 17 | fov: 75 18 | aspect: 1 19 | nearval: 0.01 20 | farval: 2 21 | width: 300 22 | height: 300 23 | -------------------------------------------------------------------------------- /conf/simulation/cameras/no_cameras.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/simulation/cameras/no_cameras.yaml -------------------------------------------------------------------------------- /conf/simulation/cameras/static_and_gripper.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cameras@static: static 3 | - cameras@gripper: gripper 4 | -------------------------------------------------------------------------------- /conf/simulation/cameras/static_and_gripper_calvin.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cameras@static: static_calvin 3 | - cameras@gripper: gripper 4 | -------------------------------------------------------------------------------- /conf/simulation/cameras/static_and_tactile.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - cameras@static: static 3 | - cameras@tactile: tactile 4 | -------------------------------------------------------------------------------- /conf/simulation/env/env.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.calvin_env.envs.play_table_env.PlayTableSimEnv 2 | _recursive_: false 3 | cameras: ${cameras} 4 | seed: 0 5 | bullet_time_step: 240.0 6 | use_vr: False 7 | show_gui: False 8 | robot_cfg: ${robot} 9 | scene_cfg: ${scene} 10 | use_scene_info: false 11 | use_egl: true 12 | control_freq: 30 13 | -------------------------------------------------------------------------------- /conf/simulation/robot/panda.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.robot.robot.Robot 2 | filename: franka_panda/panda.urdf 3 | base_position: ${scene.robot_base_position} 4 | base_orientation: ${scene.robot_base_orientation} 5 | initial_joint_positions: ${scene.robot_initial_joint_positions} 6 | max_joint_force: 200.0 7 | gripper_force: 200 8 | arm_joint_ids: [0, 1, 2, 3, 4, 5, 6] 9 | gripper_joint_ids: [9, 10] 10 | gripper_joint_limits: [0, 0.04] 11 | tcp_link_id: 13 12 | end_effector_link_id: 7 13 | gripper_cam_link: 12 14 | use_nullspace: false 15 | max_velocity: 2 16 | use_ik_fast: false 17 | magic_scaling_factor_pos: 1 # 1.6 18 | magic_scaling_factor_orn: 1 # 2.2 19 | use_target_pose: true 20 | euler_obs: true 21 | # workspace_limits: [[-0.25, 0.2, 0.61], [0.9, 1, 1.2]] 22 | max_rel_pos: 0.03 23 | max_rel_orn: 0.1 24 | -------------------------------------------------------------------------------- /conf/simulation/robot/panda_digit.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - panda 3 | 4 | filename: franka_panda/panda_digit.urdf 5 | gripper_joint_ids: [9, 11] 6 | tcp_link_id: 15 7 | -------------------------------------------------------------------------------- /conf/simulation/robot/panda_longer_finger.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - panda 3 | 4 | filename: franka_panda/panda_longer_finger.urdf 5 | gripper_joint_ids: [9, 11] 6 | tcp_link_id: 15 7 | -------------------------------------------------------------------------------- /conf/simulation/scene/calvin_scene_A.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene 2 | _recursive_: false 3 | data_path: ${data_path} 4 | global_scaling: 0.8 5 | euler_obs: ${robot.euler_obs} 6 | robot_base_position: [-0.34, -0.46, 0.24] 7 | robot_base_orientation: [0, 0, 0] 8 | robot_initial_joint_positions: [-1.21779206, 1.03987646, 2.11978261, -2.34205014, -0.87015947, 1.64119353, 0.55344866] 9 | surfaces: 10 | table: [[-0.2, -0.15, 0.46], [0.35, -0.03, 0.46]] 11 | slider_left: [[-0.32, 0.05, 0.46], [-0.16, 0.12, 0.46]] 12 | slider_right: [[-0.05, 0.05, 0.46], [0.13, 0.12, 0.46]] 13 | objects: 14 | fixed_objects: 15 | table: 16 | file: calvin_table_A/urdf/calvin_table_A.urdf 17 | initial_pos: [0, 0, 0] 18 | initial_orn: [0, 0, 0] 19 | joints: 20 | base__slide: 21 | initial_state: 0 # Prismatic 22 | base__drawer: 23 | initial_state: 0 # Prismatic 24 | buttons: 25 | base__button: 26 | initial_state: 0 # Prismatic 27 | effect: led 28 | switches: 29 | base__switch: 30 | initial_state: 0 # Revolute 31 | effect: lightbulb 32 | lights: 33 | lightbulb: 34 | link: light_link 35 | color: [1, 1, 0, 1] # yellow 36 | led: 37 | link: led_link 38 | color: [0, 1, 0, 1] # green 39 | movable_objects: 40 | block_red: 41 | file: blocks/block_red_middle.urdf 42 | initial_pos: any 43 | initial_orn: any 44 | block_blue: 45 | file: blocks/block_blue_big.urdf 46 | initial_pos: any 47 | initial_orn: any 48 | block_pink: 49 | file: blocks/block_pink_small.urdf 50 | initial_pos: any 51 | initial_orn: any 52 | -------------------------------------------------------------------------------- /conf/simulation/scene/calvin_scene_A_eval.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene 2 | _recursive_: false 3 | data_path: ${data_path} 4 | global_scaling: 0.8 5 | euler_obs: ${robot.euler_obs} 6 | robot_base_position: [-0.34, -0.46, 0.24] 7 | robot_base_orientation: [0, 0, 0] 8 | robot_initial_joint_positions: [-1.21779206, 1.03987646, 2.11978261, -2.34205014, -0.87015947, 1.64119353, 0.55344866] 9 | surfaces: 10 | table: [[0.0, -0.15, 0.46], [0.35, -0.03, 0.46]] 11 | slider_left: [[-0.32, 0.05, 0.46], [-0.16, 0.12, 0.46]] 12 | slider_right: [[-0.05, 0.05, 0.46], [0.13, 0.12, 0.46]] 13 | objects: 14 | fixed_objects: 15 | table: 16 | file: calvin_table_A/urdf/calvin_table_A.urdf 17 | initial_pos: [0, 0, 0] 18 | initial_orn: [0, 0, 0] 19 | joints: 20 | base__slide: 21 | initial_state: 0 # Prismatic 22 | base__drawer: 23 | initial_state: 0 # Prismatic 24 | buttons: 25 | base__button: 26 | initial_state: 0 # Prismatic 27 | effect: led 28 | switches: 29 | base__switch: 30 | initial_state: 0 # Revolute 31 | effect: lightbulb 32 | lights: 33 | lightbulb: 34 | link: light_link 35 | color: [1, 1, 0, 1] # yellow 36 | led: 37 | link: led_link 38 | color: [0, 1, 0, 1] # green 39 | movable_objects: 40 | block_red: 41 | file: blocks/block_red_middle.urdf 42 | initial_pos: [0, -0.12, 0.46] 43 | initial_orn: [0, 0, 1.57] 44 | block_blue: 45 | file: blocks/block_blue_small.urdf 46 | initial_pos: [0.2, -0.12, 0.46] 47 | initial_orn: [0, 0, 0] 48 | block_pink: 49 | file: blocks/block_pink_big.urdf 50 | initial_pos: [0.10, 0.08, 0.46] 51 | initial_orn: [0, 0, 1.57] 52 | -------------------------------------------------------------------------------- /conf/simulation/scene/calvin_scene_B.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene 2 | _recursive_: false 3 | data_path: ${data_path} 4 | global_scaling: 0.8 5 | euler_obs: ${robot.euler_obs} 6 | robot_base_position: [-0.34, -0.46, 0.24] 7 | robot_base_orientation: [0, 0, 0] 8 | robot_initial_joint_positions: [-1.21779206, 1.03987646, 2.11978261, -2.34205014, -0.87015947, 1.64119353, 0.55344866] 9 | surfaces: 10 | table: [[-0.35, -0.15, 0.46], [0.15, -0.03, 0.46]] 11 | slider_left: [[-0.12, 0.05, 0.46], [0.06, 0.12, 0.46]] 12 | slider_right: [[0.15, 0.05, 0.46], [0.33, 0.12, 0.46]] 13 | objects: 14 | fixed_objects: 15 | table: 16 | file: calvin_table_B/urdf/calvin_table_B.urdf 17 | initial_pos: [0, 0, 0] 18 | initial_orn: [0, 0, 0] 19 | joints: 20 | base__slide: 21 | initial_state: 0 # Prismatic 22 | base__drawer: 23 | initial_state: 0 # Prismatic 24 | buttons: 25 | base__button: 26 | initial_state: 0 # Prismatic 27 | effect: led 28 | switches: 29 | base__switch: 30 | initial_state: 0 # Revolute 31 | effect: lightbulb 32 | lights: 33 | lightbulb: 34 | link: light_link 35 | color: [1, 1, 0, 1] # yellow 36 | led: 37 | link: led_link 38 | color: [0, 1, 0, 1] # green 39 | movable_objects: 40 | block_red: 41 | file: blocks/block_red_small.urdf 42 | initial_pos: any 43 | initial_orn: any 44 | block_blue: 45 | file: blocks/block_blue_big.urdf 46 | initial_pos: any 47 | initial_orn: any 48 | block_pink: 49 | file: blocks/block_pink_middle.urdf 50 | initial_pos: any 51 | initial_orn: any 52 | -------------------------------------------------------------------------------- /conf/simulation/scene/calvin_scene_C.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene 2 | _recursive_: false 3 | data_path: ${data_path} 4 | global_scaling: 0.8 5 | euler_obs: ${robot.euler_obs} 6 | robot_base_position: [-0.34, -0.46, 0.24] 7 | robot_base_orientation: [0, 0, 0] 8 | robot_initial_joint_positions: [-1.21779206, 1.03987646, 2.11978261, -2.34205014, -0.87015947, 1.64119353, 0.55344866] 9 | surfaces: 10 | table: [[0.0, -0.15, 0.46], [0.35, -0.03, 0.46]] 11 | slider_left: [[-0.12, 0.05, 0.46], [0.06, 0.12, 0.46]] 12 | slider_right: [[0.15, 0.05, 0.46], [0.3, 0.12, 0.46]] 13 | objects: 14 | fixed_objects: 15 | table: 16 | file: calvin_table_C/urdf/calvin_table_C.urdf 17 | initial_pos: [0, 0, 0] 18 | initial_orn: [0, 0, 0] 19 | joints: 20 | base__slide: 21 | initial_state: 0 # Prismatic 22 | base__drawer: 23 | initial_state: 0 # Prismatic 24 | buttons: 25 | base__button: 26 | initial_state: 0 # Prismatic 27 | effect: led 28 | switches: 29 | base__switch: 30 | initial_state: 0 # Revolute 31 | effect: lightbulb 32 | lights: 33 | lightbulb: 34 | link: light_link 35 | color: [1, 1, 0, 1] # yellow 36 | led: 37 | link: led_link 38 | color: [0, 1, 0, 1] # green 39 | movable_objects: 40 | block_red: 41 | file: blocks/block_red_big.urdf 42 | initial_pos: any 43 | initial_orn: any 44 | block_blue: 45 | file: blocks/block_blue_small.urdf 46 | initial_pos: any 47 | initial_orn: any 48 | block_pink: 49 | file: blocks/block_pink_middle.urdf 50 | initial_pos: any 51 | initial_orn: any 52 | -------------------------------------------------------------------------------- /conf/simulation/scene/calvin_scene_D.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene 2 | _recursive_: false 3 | data_path: ${data_path} 4 | global_scaling: 0.8 5 | euler_obs: ${robot.euler_obs} 6 | robot_base_position: [-0.34, -0.46, 0.24] 7 | robot_base_orientation: [0, 0, 0] 8 | robot_initial_joint_positions: [-1.2230011780331578, 1.322365213449541, 2.6154021466198802, -2.811095767734293, -0.5087544766657718, 1.531250568385011, 0.969207720370319] 9 | surfaces: 10 | table: [[0.0, -0.15, 0.46], [0.35, -0.03, 0.46]] 11 | slider_left: [[-0.32, 0.05, 0.46], [-0.16, 0.12, 0.46]] 12 | slider_right: [[-0.05, 0.05, 0.46], [0.13, 0.12, 0.46]] 13 | objects: 14 | fixed_objects: 15 | table: 16 | file: calvin_table_D/urdf/calvin_table_D.urdf 17 | initial_pos: [0, 0, 0] 18 | initial_orn: [0, 0, 0] 19 | joints: 20 | base__slide: 21 | initial_state: 0 # Prismatic 22 | base__drawer: 23 | initial_state: 0 # Prismatic 24 | buttons: 25 | base__button: 26 | initial_state: 0 # Prismatic 27 | effect: led 28 | switches: 29 | base__switch: 30 | initial_state: 0 # Revolute 31 | effect: lightbulb 32 | lights: 33 | lightbulb: 34 | link: light_link 35 | color: [1, 1, 0, 1] # yellow 36 | led: 37 | link: led_link 38 | color: [0, 1, 0, 1] # green 39 | movable_objects: 40 | block_red: 41 | file: blocks/block_red_middle.urdf 42 | initial_pos: any 43 | initial_orn: any 44 | block_blue: 45 | file: blocks/block_blue_small.urdf 46 | initial_pos: any 47 | initial_orn: any 48 | block_pink: 49 | file: blocks/block_pink_big.urdf 50 | initial_pos: any 51 | initial_orn: any 52 | -------------------------------------------------------------------------------- /conf/simulation/scene/calvin_scene_D_eval.yaml: -------------------------------------------------------------------------------- 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene 2 | _recursive_: false 3 | data_path: ${data_path} 4 | global_scaling: 0.8 5 | euler_obs: ${robot.euler_obs} 6 | robot_base_position: [-0.34, -0.46, 0.24] 7 | robot_base_orientation: [0, 0, 0] 8 | robot_initial_joint_positions: [-1.21779206, 1.03987646, 2.11978261, -2.34205014, -0.87015947, 1.64119353, 0.55344866] 9 | surfaces: 10 | table: [[0.0, -0.15, 0.46], [0.35, -0.03, 0.46]] 11 | slider_left: [[-0.32, 0.05, 0.46], [-0.16, 0.12, 0.46]] 12 | slider_right: [[-0.05, 0.05, 0.46], [0.13, 0.12, 0.46]] 13 | objects: 14 | fixed_objects: 15 | table: 16 | file: calvin_table_D/urdf/calvin_table_D.urdf 17 | initial_pos: [0, 0, 0] 18 | initial_orn: [0, 0, 0] 19 | joints: 20 | base__slide: 21 | initial_state: 0 # Prismatic 22 | base__drawer: 23 | initial_state: 0 # Prismatic 24 | buttons: 25 | base__button: 26 | initial_state: 0 # Prismatic 27 | effect: led 28 | switches: 29 | base__switch: 30 | initial_state: 0 # Revolute 31 | effect: lightbulb 32 | lights: 33 | lightbulb: 34 | link: light_link 35 | color: [1, 1, 0, 1] # yellow 36 | led: 37 | link: led_link 38 | color: [0, 1, 0, 1] # green 39 | movable_objects: 40 | block_red: 41 | file: blocks/block_red_middle.urdf 42 | initial_pos: [0.05, -0.12, 0.46] 43 | initial_orn: [0, 0, 1.57] 44 | block_blue: 45 | file: blocks/block_blue_small.urdf 46 | initial_pos: [0.23, -0.12, 0.46] 47 | initial_orn: [0, 0, 0] 48 | block_pink: 49 | file: blocks/block_pink_big.urdf 50 | initial_pos: [0.10, 0.08, 0.46] 51 | initial_orn: [0, 0, 1.57] 52 | -------------------------------------------------------------------------------- /conf/trainer/play_trainer.yaml: -------------------------------------------------------------------------------- 1 | accelerator: gpu 2 | devices: 1 3 | precision: 16 4 | max_epochs: 100 5 | sync_batchnorm: false 6 | -------------------------------------------------------------------------------- /conf/training/default_training.yaml: -------------------------------------------------------------------------------- 1 | lr: 0.0002 2 | -------------------------------------------------------------------------------- /conf/utils/combine_dataset.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | defaults: 3 | - _self_ 4 | 5 | src_dirs: 6 | - "/work/dlclarge2/roseteb-thesis/dataset/validation" 7 | - "/work/dlclarge2/roseteb-thesis/dataset/erick_data3" 8 | 9 | dest: "/work/dlclarge2/roseteb-thesis/dataset/new_validation" 10 | -------------------------------------------------------------------------------- /dataset/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Download, Unzip, and Remove zip 4 | if [ "$1" = "D" ] 5 | then 6 | 7 | echo "Downloading task_D_D ..." 8 | wget http://calvin.cs.uni-freiburg.de/dataset/task_D_D.zip 9 | unzip task_D_D.zip && rm task_D_D.zip 10 | echo "saved folder: task_D_D" 11 | elif [ "$1" = "ABC" ] 12 | then 13 | 14 | echo "Downloading task_ABC_D ..." 15 | wget http://calvin.cs.uni-freiburg.de/dataset/task_ABC_D.zip 16 | unzip task_ABC_D.zip && rm task_ABC_D.zip 17 | echo "saved folder: task_ABC_D" 18 | 19 | elif [ "$1" = "ABCD" ] 20 | then 21 | 22 | echo "Downloading task_ABCD_D ..." 23 | wget http://calvin.cs.uni-freiburg.de/dataset/task_ABCD_D.zip 24 | unzip task_ABCD_D.zip && rm task_ABCD_D.zip 25 | echo "saved folder: task_ABCD_D" 26 | 27 | elif [ "$1" = "debug" ] 28 | then 29 | 30 | echo "Downloading debug dataset ..." 31 | wget http://calvin.cs.uni-freiburg.de/dataset/calvin_debug_dataset.zip 32 | unzip calvin_debug_dataset.zip && rm calvin_debug_dataset.zip 33 | echo "saved folder: calvin_debug_dataset" 34 | 35 | 36 | else 37 | echo "Failed: Usage download_data.sh D | ABC | ABCD | debug" 38 | exit 1 39 | fi 40 | -------------------------------------------------------------------------------- /hulc2/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | play_data/ 3 | __pycache__/ 4 | relay-policy-learning/ 5 | puppet/ 6 | mjrl/ 7 | results/ 8 | runs/ 9 | analysis/videos/ 10 | analysis/tsne_results/proposal_clusters/ 11 | analysis/tsne_results/unseen_data_collection/ 12 | -------------------------------------------------------------------------------- /hulc2/__init__.py: -------------------------------------------------------------------------------- 1 | """'Learning from Play implementation in pytorch 2 | :copyright: 2020 by Oier Mees 3 | :license: GPLv3, see LICENSE for more details. 4 | """ 5 | 6 | __version__ = "0.0.1" 7 | __project__ = "hulc2" 8 | __author__ = "Oier Mees" 9 | __license__ = "GPLv3" 10 | __email__ = "meeso@informatik.uni-freiburg.de" 11 | -------------------------------------------------------------------------------- /hulc2/affordance/base_detector.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | 5 | class BaseDetector: 6 | def __init__(self, cfg, *args, **kwargs): 7 | self.n_classes = 1 8 | cm = plt.get_cmap("jet") 9 | self._colors = cm(np.linspace(0, 1, self.n_classes)) 10 | self.clusters = {} 11 | 12 | @property 13 | def colors(self): 14 | return self._colors 15 | 16 | @colors.setter 17 | def colors(self, value): 18 | self._colors = value 19 | 20 | def predict(self, new_point): 21 | return 0 22 | -------------------------------------------------------------------------------- /hulc2/affordance/dataset_creation/create_percentage_data_splits.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | 5 | from hulc2.utils.utils import get_abspath, split_by_percentage 6 | 7 | 8 | def main(args): 9 | root_dir = get_abspath(args.root_dir) 10 | json_file = os.path.join(root_dir, "episodes_split.json") 11 | data_percent = [0.75, 0.50, 0.25] 12 | 13 | with open(json_file) as f: 14 | episodes_split = json.load(f) 15 | 16 | for percentage in data_percent: 17 | episodes_split_percentage = split_by_percentage(root_dir, episodes_split, percentage) 18 | jsons_filename = root_dir + "/episodes_split_%s.json" % str(percentage * 100) 19 | with open(jsons_filename, "w") as outfile: 20 | json.dump(episodes_split_percentage, outfile, indent=2) 21 | 22 | 23 | if __name__ == "__main__": 24 | parser = argparse.ArgumentParser(description="create episodes_split.json for different percentage of original data") 25 | parser.add_argument("--root_dir", default=None, type=str, help="path to processed dataset") 26 | args = parser.parse_args() 27 | main(args) 28 | -------------------------------------------------------------------------------- /hulc2/affordance/dataset_creation/merge_datasets.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | from pathlib import Path 5 | 6 | import yaml 7 | 8 | 9 | def to_abs(path): 10 | if os.path.isabs(path): 11 | return path 12 | else: 13 | repo_src_dir = Path(__file__).absolute().parents[1] 14 | return os.path.abspath(repo_src_dir / path) 15 | 16 | 17 | def parse_args(): 18 | parser = argparse.ArgumentParser(description="Process some integers.") 19 | parser.add_argument("--output_dir", type=str, default="", help="directory to output merged episodes_split.json") 20 | 21 | args = parser.parse_args() 22 | cfg_path = to_abs("../../conf/affordance/cfg_merge_dataset.yaml") 23 | with open(cfg_path, "r") as stream: 24 | directory_list = yaml.safe_load(stream)["data_lst"] 25 | 26 | if args.output_dir == "": 27 | output_dir = to_abs(os.path.dirname(directory_list[0])) 28 | else: 29 | output_dir = to_abs(args.output_dir) 30 | 31 | print("Writing to %s " % output_dir) 32 | return output_dir, directory_list 33 | 34 | 35 | # Merge datasets using json files 36 | def merge_datasets(): 37 | output_dir, directory_list = parse_args() 38 | 39 | new_data = {"training": {}, "validation": {}} 40 | for dir in directory_list: 41 | abs_dir = os.path.abspath(dir) 42 | json_path = os.path.join(abs_dir, "episodes_split.json") 43 | with open(json_path) as f: 44 | data = json.load(f) 45 | 46 | # Rename episode numbers if repeated 47 | data_keys = list(data.keys()) 48 | split_keys = ["validation", "training"] 49 | other_keys = [k for k in data_keys if k not in split_keys] 50 | episode = 0 51 | for split in split_keys: 52 | dataset_name = os.path.basename(os.path.normpath(dir)) 53 | for key in data[split].keys(): 54 | new_data[split]["/%s/%s" % (dataset_name, key)] = data[split][key] 55 | episode += 1 56 | for key in other_keys: 57 | new_data[key] = data[key] 58 | # Write output 59 | if not os.path.exists(output_dir): 60 | os.makedirs(output_dir) 61 | out_file = os.path.join(output_dir, "episodes_split.json") 62 | with open(out_file, "w") as outfile: 63 | json.dump(new_data, outfile, indent=2) 64 | 65 | 66 | if __name__ == "__main__": 67 | merge_datasets() 68 | -------------------------------------------------------------------------------- /hulc2/affordance/models/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/affordance/models/core/__init__.py -------------------------------------------------------------------------------- /hulc2/affordance/models/core/language_network.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | import numpy as np 4 | from sentence_transformers import SentenceTransformer 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | class SBert(nn.Module): 10 | def __init__(self, weights): 11 | super().__init__() 12 | self.model = SentenceTransformer(weights) 13 | 14 | def forward(self, x: List, show_progress_bar: bool = False) -> torch.Tensor: 15 | emb = self.model.encode(x, convert_to_tensor=True, show_progress_bar=show_progress_bar) 16 | return torch.unsqueeze(emb, 1) 17 | -------------------------------------------------------------------------------- /hulc2/affordance/models/lang_fusion/one_stream_attention_lang_fusion_mask.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from hulc2.affordance.models.lang_fusion.one_stream_attention_lang_fusion_pixel import AttentionLangFusionPixel 7 | 8 | 9 | class AttentionLangFusionMask(AttentionLangFusionPixel): 10 | def __init__(self, *args, **kwargs): 11 | super().__init__(*args, **kwargs) 12 | # self.output_dim = out_channels = n_classes 13 | if self.output_dim > 1: 14 | # Softmax over channels 15 | self.act_fnc = torch.nn.Softmax(1) 16 | else: 17 | self.act_fnc = torch.nn.Sigmoid() 18 | 19 | def forward(self, inp_img, lang_goal, softmax=True): 20 | """Forward pass.""" 21 | in_data = F.pad(inp_img, self.padding, mode="constant") 22 | in_tens = in_data.to(dtype=torch.float) # [B 3 H W] 23 | 24 | # Forward pass. 25 | aff_out, info = self.attend(in_tens, lang_goal) 26 | if softmax: 27 | aff_out = self.act_fnc(aff_out) 28 | 29 | c0 = np.array([self.padding[2], self.padding[0]]) # top(H), left(W) 30 | c1 = c0 + inp_img.shape[2:] 31 | aff_out = aff_out[:, :, c0[0] : c1[0], c0[1] : c1[1]] 32 | 33 | info["affordance"] = aff_out 34 | return info 35 | -------------------------------------------------------------------------------- /hulc2/affordance/models/lang_fusion/one_stream_attention_lang_fusion_pixel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | import hulc2.models as models 7 | 8 | 9 | class AttentionLangFusionPixel(nn.Module): 10 | def __init__(self, stream_fcn, in_shape, cfg, device, output_dim=1): 11 | super().__init__() 12 | self.fusion_type = cfg.attn_stream_fusion_type 13 | self.stream_fcn = stream_fcn 14 | self.cfg = cfg 15 | self.batchnorm = self.cfg.batchnorm 16 | 17 | self.padding = np.zeros((3, 2), dtype=int) # H, W, C 18 | max_dim = np.max(in_shape[:2]) 19 | pad = (max_dim - np.array(in_shape[:2])) / 2 20 | self.padding[:2] = pad.reshape(2, 1) # H, W, C 21 | 22 | in_shape = np.array(in_shape) 23 | in_shape += np.sum(self.padding, axis=1) 24 | in_shape = list(in_shape) 25 | 26 | # for torch: left, right,(W) top, bottom,(H) front, back(C) 27 | self.padding = self.padding[[1, 0, 2]] # C, H, W 28 | self.padding = tuple(self.padding.flatten()) 29 | self.in_shape = in_shape 30 | self.output_dim = output_dim 31 | self._build_nets() 32 | 33 | @property 34 | def decoder_layers(self): 35 | return self.attn_stream.decoder_layers 36 | 37 | def _build_nets(self): 38 | stream_one_fcn = self.stream_fcn 39 | stream_one_model = models.lang_img_nets[stream_one_fcn] 40 | 41 | self.stream_one = stream_one_model(self.in_shape, self.output_dim, self.cfg) 42 | print(f"Attn FCN: {stream_one_fcn}") 43 | 44 | def attend(self, x, l): 45 | x = self.stream_one(x, l) 46 | return x 47 | 48 | def forward(self, inp_img, lang_goal, softmax=True): 49 | """Forward pass.""" 50 | in_data = F.pad(inp_img, self.padding, mode="constant") 51 | in_tens = in_data.to(dtype=torch.float, device=self.stream_one.device) # [B 3 H W] 52 | 53 | # Forward pass. 54 | logits, _info = self.attend(in_tens, lang_goal) 55 | 56 | c0 = np.array([self.padding[2], self.padding[0]]) # top(H), left(W) 57 | c1 = c0 + inp_img.shape[2:] 58 | logits = logits[:, :, c0[0] : c1[0], c0[1] : c1[1]] 59 | 60 | logits = logits.permute(0, 2, 3, 1) # [B H W 1] 61 | output = logits.reshape(logits.shape[0], np.prod(logits.shape[1:])) 62 | if softmax: 63 | output = F.softmax(output, dim=-1) 64 | output = output.reshape(logits.shape) 65 | return output, _info 66 | -------------------------------------------------------------------------------- /hulc2/affordance/models/language_encoders/base_lang_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from transformers import DistilBertModel, DistilBertTokenizer 4 | 5 | 6 | class LangEncoder(nn.Module): 7 | def __init__(self, freeze_backbone=True, pretrained=True) -> None: 8 | super(LangEncoder, self).__init__() 9 | self.freeze_backbone = freeze_backbone 10 | self.pretrained = pretrained 11 | self._load_model() 12 | 13 | def _load_model(self): 14 | raise NotImplementedError() 15 | 16 | def encode_text(self, x): 17 | """ 18 | Returns: 19 | - text_encodings 20 | - text_embeddings 21 | - text_mask 22 | """ 23 | raise NotImplementedError() 24 | -------------------------------------------------------------------------------- /hulc2/affordance/models/language_encoders/bert_lang_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from transformers import BertConfig, BertModel, BertTokenizer 4 | 5 | from hulc2.affordance.models.language_encoders.base_lang_encoder import LangEncoder 6 | 7 | 8 | class BERTLang(LangEncoder): 9 | def __init__(self, freeze_backbone=True, pretrained=True) -> None: 10 | super(BERTLang, self).__init__(freeze_backbone, pretrained) 11 | 12 | def _load_model(self): 13 | self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") 14 | if self.pretrained: 15 | self.text_encoder = BertModel.from_pretrained("bert-base-uncased") 16 | else: 17 | distilbert_config = BertConfig() 18 | self.text_encoder = BertModel(distilbert_config) 19 | _embd_dim = 768 20 | self.text_fc = nn.Linear(_embd_dim, 1024) 21 | 22 | def encode_text(self, x): 23 | with torch.set_grad_enabled(not self.freeze_backbone): 24 | inputs = self.tokenizer(x, return_tensors="pt", padding=True, truncation=True) 25 | input_ids, attention_mask = inputs["input_ids"], inputs["attention_mask"] 26 | input_ids = input_ids.to(self.text_encoder.device) 27 | attention_mask = attention_mask.to(self.text_encoder.device) 28 | text_embeddings = self.text_encoder(input_ids, attention_mask) 29 | text_encodings = text_embeddings.last_hidden_state.mean(1) 30 | 31 | text_feat = self.text_fc(text_encodings) 32 | text_mask = torch.ones_like(input_ids) # [1, max_token_len] 33 | return text_feat, text_embeddings.last_hidden_state, text_mask 34 | -------------------------------------------------------------------------------- /hulc2/affordance/models/language_encoders/clip_lang_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from hulc2.affordance.models.core.clip import build_model, load_clip, tokenize 5 | from hulc2.affordance.models.language_encoders.base_lang_encoder import LangEncoder 6 | 7 | 8 | class CLIPLang(LangEncoder): 9 | def __init__(self, freeze_backbone=True, pretrained=True) -> None: 10 | super(CLIPLang, self).__init__(freeze_backbone, pretrained) 11 | 12 | def _load_model(self): 13 | model, _ = load_clip("RN50", jit=False) 14 | _clip_rn50 = build_model(model.state_dict()) 15 | del model 16 | if self.freeze_backbone: 17 | for param in _clip_rn50.parameters(): 18 | param.requires_grad = False 19 | # for param in _clip_rn50.layer4.parameters(): 20 | # param.requires_grad = True 21 | else: 22 | _clip_rn50 = _clip_rn50.float() 23 | # modules = list(net.children())[:-1] 24 | self.model = _clip_rn50 25 | 26 | def encode_text(self, x): 27 | with torch.set_grad_enabled(not self.freeze_backbone): 28 | tokens = tokenize(x) 29 | tokens = tokens.to(self.model.positional_embedding.device) 30 | text_feat, text_emb = self.model.encode_text_with_embeddings(tokens) 31 | 32 | text_mask = torch.where(tokens == 0, tokens, 1) 33 | return text_feat, text_emb, text_mask 34 | -------------------------------------------------------------------------------- /hulc2/affordance/models/language_encoders/distilbert_lang_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from transformers import DistilBertConfig, DistilBertModel, DistilBertTokenizer 4 | 5 | from hulc2.affordance.models.language_encoders.base_lang_encoder import LangEncoder 6 | 7 | 8 | class DistilBERTLang(LangEncoder): 9 | def __init__(self, freeze_backbone=True, pretrained=True) -> None: 10 | super(DistilBERTLang, self).__init__(freeze_backbone, pretrained) 11 | 12 | def _load_model(self): 13 | self.tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased") 14 | if self.pretrained: 15 | self.text_encoder = DistilBertModel.from_pretrained("distilbert-base-uncased") 16 | else: 17 | distilbert_config = DistilBertConfig() 18 | self.text_encoder = DistilBertModel(distilbert_config) 19 | _embd_dim = 768 20 | self.text_fc = nn.Linear(_embd_dim, 1024) 21 | 22 | def encode_text(self, x): 23 | with torch.set_grad_enabled(not self.freeze_backbone): 24 | inputs = self.tokenizer(x, return_tensors="pt", padding=True, truncation=True) 25 | input_ids, attention_mask = inputs["input_ids"], inputs["attention_mask"] 26 | input_ids = input_ids.to(self.text_encoder.device) 27 | attention_mask = attention_mask.to(self.text_encoder.device) 28 | text_embeddings = self.text_encoder(input_ids, attention_mask) 29 | text_encodings = text_embeddings.last_hidden_state.mean(1) 30 | 31 | text_feat = self.text_fc(text_encodings) 32 | text_mask = torch.ones_like(input_ids) # [1, max_token_len] 33 | return text_feat, text_embeddings.last_hidden_state, text_mask 34 | -------------------------------------------------------------------------------- /hulc2/affordance/models/visual_lang_encoders/base_lingunet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class BaseLingunet(nn.Module): 6 | """BaseClass with U-Net skip connections and [] language encoder""" 7 | 8 | def __init__(self, input_shape, output_dim, cfg, *args, **kwargs): 9 | super().__init__() 10 | self.input_shape = input_shape 11 | self.cfg = cfg 12 | self.lang_fusion_type = self.cfg["lang_fusion_type"] 13 | -------------------------------------------------------------------------------- /hulc2/affordance/run_on_cluster/sbatch_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Print some information about the job to STDOUT 3 | echo "Workingdir: $PWD"; 4 | echo "Started at $(date)"; 5 | echo "Running job $SLURM_JOB_NAME"; 6 | echo "cpus per node: $SLURM_JOB_CPUS_PER_NODE"; 7 | echo "gres: $SLURM_GRES"; 8 | echo "mem: $SLURM_MEM_PER_NODE"; 9 | echo "ntasks: $SLURM_NTASKS"; 10 | echo "JID $SLURM_JOB_ID on queue $SLURM_JOB_PARTITION"; 11 | 12 | export NCCL_DEBUG=INFO 13 | export PYTHONFAULTHANDLER=1 14 | export HYDRA_FULL_ERROR=1 15 | 16 | # Job to perform 17 | source ~/.bashrc 18 | conda activate $1 19 | srun python ${@:2} 20 | 21 | # Print some Information about the end-time to STDOUT 22 | echo "DONE"; 23 | echo "Finished at $(date)"; 24 | -------------------------------------------------------------------------------- /hulc2/affordance/run_on_cluster/sbatch_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Print some information about the job to STDOUT 3 | echo "Workingdir: $PWD"; 4 | echo "Started at $(date)"; 5 | echo "Running job $SLURM_JOB_NAME"; 6 | echo "cpus per node: $SLURM_JOB_CPUS_PER_NODE"; 7 | echo "gres: $SLURM_GRES"; 8 | echo "mem: $SLURM_MEM_PER_NODE"; 9 | echo "ntasks: $SLURM_NTASKS"; 10 | echo "JID $SLURM_JOB_ID on queue $SLURM_JOB_PARTITION"; 11 | 12 | export NCCL_DEBUG=INFO 13 | export PYTHONFAULTHANDLER=1 14 | export HYDRA_FULL_ERROR=1 15 | 16 | # Job to perform 17 | source ~/.bashrc 18 | conda activate $1 19 | srun python $2 hydra.run.dir=$3 ${@:5} 20 | 21 | # Print some Information about the end-time to STDOUT 22 | echo "DONE"; 23 | echo "Finished at $(date)"; 24 | -------------------------------------------------------------------------------- /hulc2/affordance/scripts/get_best_eval_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | 5 | import hydra 6 | 7 | from hulc2.utils.utils import get_abspath 8 | 9 | 10 | def main(json_file): 11 | with open(json_file) as f: 12 | data = json.load(f) 13 | best_model = max(data, key=lambda v: data[v]["avg_seq_len"]) 14 | print(best_model) 15 | print(data[best_model]["avg_seq_len"]) 16 | print(data[best_model]["chain_sr"]) 17 | 18 | 19 | if __name__ == "__main__": 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("-f", "--file", type=str) 22 | 23 | args = parser.parse_args() 24 | 25 | json_file = get_abspath(args.file) 26 | main(json_file) 27 | -------------------------------------------------------------------------------- /hulc2/affordance/scripts/transform_old_episodes_split.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import json 3 | import os 4 | 5 | 6 | def read_json(json_file): 7 | with open(json_file) as f: 8 | data = json.load(f) 9 | return data 10 | 11 | 12 | def main(root_dir): 13 | data_old_format = read_json(os.path.join(root_dir, "episodes_split.json")) 14 | data_new_format = {"training": {}, "validation": {}} 15 | 16 | for split in ["training", "validation"]: 17 | for ep in data_old_format[split]: 18 | data_new_format[split][ep] = {"gripper_cam": [], "static_cam": []} 19 | _gripper_data, _static_data = [], [] 20 | for frame in data_old_format[split][ep]: 21 | cam_type, _fram_name = frame.split("/") 22 | data_new_format[split][ep][cam_type].append(_fram_name) 23 | 24 | new_file = os.path.join(root_dir, "episodes_split_new.json") 25 | with open(new_file, "w") as outfile: 26 | json.dump(data_new_format, outfile, indent=2) 27 | 28 | 29 | if __name__ == "__main__": 30 | root_dir = "/mnt/ssd_shared/Users/Jessica/Documents/hulc2_ssd/datasets/real_world/500k_all_tasks_dataset_15hz" 31 | main(root_dir) 32 | -------------------------------------------------------------------------------- /hulc2/affordance/test_move_to_pt.py: -------------------------------------------------------------------------------- 1 | import hydra 2 | import torch 3 | 4 | from hulc2.env_wrappers.play_aff_lmp_wrapper import PlayLMPWrapper 5 | 6 | 7 | @hydra.main(config_path="../../conf", config_name="cfg_high_level") 8 | def main(cfg): 9 | # Load env 10 | env = hydra.utils.instantiate(cfg.env) 11 | env = PlayLMPWrapper(env, torch.device("cuda:0")) 12 | agent = hydra.utils.instantiate(cfg.agent, env=env, aff_cfg=cfg.aff_detection) 13 | obs = env.reset() 14 | 15 | captions = ["Lift the red block", "Stored the grasped block in the cabinet", "turn on the yellow light"] 16 | for caption in captions: # n instructions 17 | # caption = "use the switch to turn on the light bulb" # input("Type an instruction \n") 18 | # caption = "open the drawer" 19 | # obs = env.reset() 20 | agent.reset(caption) 21 | if agent.model_free.lang_encoder is not None: 22 | goal = {"lang": [caption]} 23 | else: 24 | goal = agent.encode(caption) 25 | for j in range(cfg.max_timesteps): 26 | action = agent.step(obs, goal) 27 | obs, _, _, info = env.step(action) 28 | agent.save_dir["rollout_counter"] += 1 29 | agent.save_sequence_txt("sequence", captions) 30 | agent.save_sequence() 31 | 32 | 33 | if __name__ == "__main__": 34 | main() 35 | -------------------------------------------------------------------------------- /hulc2/affordance/utils/data_utils.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import os 3 | 4 | import hydra 5 | import numpy as np 6 | from scipy.spatial.transform.rotation import Rotation as R 7 | 8 | 9 | def split_by_percentage(root_dir, episodes_split, data_percent): 10 | new_episodes_split = deepcopy(episodes_split) 11 | 12 | # Change training split 13 | split = "training" 14 | # Get original data start end ids 15 | start_end_ids = os.path.join(root_dir, "%s/ep_start_end_ids.npy" % split) 16 | orig_start_end_ids = np.load(start_end_ids) 17 | 18 | # Split the dataset the same as it is split in learning_fom_play_repo 19 | new_start_end_ids = get_split_data(orig_start_end_ids, data_percent) 20 | for episode_dir, cam_frames in episodes_split[split].items(): 21 | for cam, frames in cam_frames.items(): 22 | valid_frames = [] 23 | cam_frame_ids = np.array([int(f.split("_")[-1]) for f in frames]) 24 | 25 | # Check valid frames 26 | if len(cam_frame_ids) > 0: 27 | for start, end in new_start_end_ids: 28 | cond = np.logical_and(cam_frame_ids >= start, cam_frame_ids <= end) 29 | inside_ep = np.where(cond)[0] 30 | valid_frames.extend([i for i in inside_ep]) 31 | 32 | # Replace 33 | new_episodes_split[split][episode_dir][cam] = list(np.array(frames)[valid_frames]) 34 | return new_episodes_split 35 | 36 | 37 | def get_split_data(play_start_end_ids, data_percent): 38 | start_end_ids = np.array(play_start_end_ids) 39 | cumsum = np.cumsum([e - s for s, e in play_start_end_ids]) 40 | 41 | n_samples = int(cumsum[-1] * data_percent) 42 | max_idx = min(n_samples, cumsum[-1]) if n_samples > 0 else cumsum[-1] 43 | indices = [0] 44 | for i in range(len(cumsum) - 1): 45 | if cumsum[i] <= max_idx: 46 | indices.append(i + 1) 47 | 48 | # Valid play-data start_end_ids episodes 49 | start_end_ids = [start_end_ids[i] for i in indices] 50 | diff = cumsum[indices[-1]] - n_samples 51 | start_end_ids[-1][-1] = start_end_ids[-1][-1] - diff 52 | return np.array(start_end_ids) 53 | 54 | 55 | def depth_img_from_uint16(depth_img, max_depth=4): 56 | depth_img[np.isnan(depth_img)] = 0 57 | return (depth_img.astype("float") / (2**16 - 1)) * max_depth 58 | 59 | 60 | def euler_to_quat(euler_angles): 61 | """xyz euler angles to xyzw quat""" 62 | return R.from_euler("xyz", euler_angles).as_quat() 63 | 64 | 65 | def quat_to_euler(quat): 66 | """xyz euler angles to xyzw quat""" 67 | return R.from_quat(quat).as_euler("xyz") 68 | -------------------------------------------------------------------------------- /hulc2/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/datasets/__init__.py -------------------------------------------------------------------------------- /hulc2/datasets/random.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | import pytorch_lightning as pl 4 | from pytorch_lightning.trainer.supporters import CombinedLoader 5 | import torch 6 | from torch.utils.data import DataLoader 7 | import torchvision 8 | 9 | 10 | class RandomDataset(torch.utils.data.Dataset): 11 | def __init__(self, n_examples: int = 64, window_size: int = 32, split: str = "train", transforms: List = []): 12 | self.n_examples = n_examples 13 | self.split = split 14 | self.data = [ 15 | dict( 16 | images=torch.rand(window_size, 3, 200, 200), 17 | observations=torch.rand(window_size, 8), 18 | actions=torch.rand(window_size, 7), 19 | ) 20 | for x in range(n_examples) 21 | ] 22 | self.transform = torchvision.transforms.Compose(transforms) 23 | 24 | def __getitem__(self, idx): 25 | x = self.data[idx] 26 | seq_acts = x["actions"] 27 | seq_rgb_obs = (x["images"],) 28 | seq_depth_obs = (x["images"],) 29 | seq_state_obs = x["observations"] 30 | seq_lang = torch.empty(0) 31 | info = {} 32 | return seq_state_obs, seq_rgb_obs, tuple([]), seq_acts, seq_lang, info, idx 33 | 34 | def __len__(self): 35 | return self.n_examples 36 | 37 | 38 | class RandomDataModule(pl.LightningDataModule): 39 | def __init__(self, batch_size: int = 16, train_transforms: List = [], val_transforms: List = [], **kwargs: Dict): 40 | super().__init__() 41 | self.batch_size = batch_size 42 | self.train_dataset = RandomDataset(n_examples=32, window_size=16, split="train", transforms=train_transforms) 43 | self.val_dataset = RandomDataset(n_examples=32, window_size=16, split="val", transforms=val_transforms) 44 | self.modalities = ["vis"] 45 | 46 | def train_dataloader(self): 47 | return {"vis": DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=0)} 48 | 49 | def val_dataloader(self): 50 | val_dataloader = {"vis": DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=0)} 51 | return CombinedLoader(val_dataloader, "max_size_cycle") 52 | 53 | @property 54 | def len_train(self): 55 | return len(self.train_dataset) 56 | 57 | @property 58 | def len_valid(self): 59 | return len(self.val_dataset) 60 | -------------------------------------------------------------------------------- /hulc2/datasets/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/datasets/utils/__init__.py -------------------------------------------------------------------------------- /hulc2/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/evaluation/__init__.py -------------------------------------------------------------------------------- /hulc2/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Vision networks 2 | from hulc2.affordance.models.language_encoders.bert_lang_encoder import BERTLang 3 | 4 | # Language encoders 5 | from hulc2.affordance.models.language_encoders.clip_lang_encoder import CLIPLang 6 | from hulc2.affordance.models.language_encoders.distilbert_lang_encoder import DistilBERTLang 7 | from hulc2.affordance.models.language_encoders.sbert_lang_encoder import SBertLang 8 | from hulc2.affordance.models.visual_lang_encoders.r3m_rn18 import R3M 9 | from hulc2.affordance.models.visual_lang_encoders.rn50_clip_lingunet import CLIPLingUNet 10 | from hulc2.affordance.models.visual_lang_encoders.rn50_unet import RN50LingUNet 11 | from hulc2.affordance.models.visual_lang_encoders.rn_lingunet import RNLingunet 12 | 13 | lang_encoders = {"clip": CLIPLang, "bert": BERTLang, "distilbert": DistilBERTLang, "sbert": SBertLang} 14 | 15 | vision_encoders = { 16 | # Lang Nets 17 | "clip": CLIPLingUNet, 18 | "rn": RNLingunet, # RN50LingUNet, 19 | "rn18": RNLingunet, 20 | "r3m_rn18": R3M, 21 | } 22 | 23 | # Depth estimatiom models 24 | from hulc2.affordance.models.depth.depth_gaussian import DepthEstimationGaussian 25 | from hulc2.affordance.models.depth.depth_logistics import DepthEstimationLogistics 26 | 27 | deth_est_nets = { 28 | # Depth Nets 29 | "gaussian": DepthEstimationGaussian, 30 | "logistic": DepthEstimationLogistics, 31 | } 32 | -------------------------------------------------------------------------------- /hulc2/models/auxiliary_loss_networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/auxiliary_loss_networks/__init__.py -------------------------------------------------------------------------------- /hulc2/models/auxiliary_loss_networks/bc_z_lang_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class BCZLangDecoder(nn.Module): 6 | def __init__(self, in_features: int, lang_dim: int): 7 | super().__init__() 8 | # include proprio info??? 9 | self.mlp = nn.Sequential( 10 | nn.Linear(in_features=in_features, out_features=512), 11 | nn.ReLU(), 12 | nn.Linear(in_features=512, out_features=lang_dim), 13 | ) 14 | 15 | def forward(self, x: torch.Tensor) -> torch.Tensor: 16 | x = self.mlp(x) 17 | return x 18 | -------------------------------------------------------------------------------- /hulc2/models/auxiliary_loss_networks/mia_lang_discriminator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class MIALangDiscriminator(nn.Module): 6 | def __init__(self, in_features: int, lang_dim: int, dropout_p: float): 7 | super().__init__() 8 | self.mlp = nn.Sequential( 9 | nn.Linear(in_features=in_features + lang_dim, out_features=512), 10 | nn.ReLU(), 11 | nn.Dropout(dropout_p), 12 | nn.Linear(in_features=512, out_features=1), 13 | ) 14 | 15 | def forward(self, vis_emb: torch.Tensor, lang_emb: torch.Tensor) -> torch.Tensor: 16 | x = torch.cat([vis_emb, lang_emb], dim=-1) 17 | x = self.mlp(x) 18 | return x 19 | -------------------------------------------------------------------------------- /hulc2/models/auxiliary_loss_networks/proj_vis_lang.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class ProjVisLang(nn.Module): 8 | def __init__(self, im_dim: int, lang_dim: int, output_dim: int, proj_lang: bool = True): 9 | super().__init__() 10 | self.mlp_im = nn.Sequential( 11 | nn.Linear(in_features=im_dim, out_features=128), 12 | nn.ReLU(), 13 | nn.Linear(in_features=128, out_features=output_dim), 14 | ) 15 | self.mlp_lang = None 16 | if proj_lang: 17 | self.mlp_lang = nn.Sequential( 18 | nn.Linear(in_features=lang_dim, out_features=128), 19 | nn.ReLU(), 20 | nn.Linear(in_features=128, out_features=output_dim), 21 | ) 22 | 23 | def forward(self, vis_emb: torch.Tensor, lang_emb: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 24 | vis_emb = self.mlp_im(vis_emb) 25 | if self.mlp_lang is not None: 26 | lang_emb = self.mlp_lang(lang_emb) 27 | return vis_emb, lang_emb 28 | -------------------------------------------------------------------------------- /hulc2/models/auxiliary_loss_networks/state_decoder.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class StateDecoder(nn.Module): 8 | def __init__(self, visual_features: int, n_state_obs: int): 9 | super().__init__() 10 | self.mlp = nn.Sequential( 11 | nn.Linear(in_features=visual_features, out_features=40), 12 | nn.ReLU(), 13 | nn.Linear(in_features=40, out_features=40), 14 | nn.ReLU(), 15 | nn.Linear(in_features=40, out_features=n_state_obs), 16 | ) 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | x = self.mlp(x) 20 | return x 21 | -------------------------------------------------------------------------------- /hulc2/models/decoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/decoders/__init__.py -------------------------------------------------------------------------------- /hulc2/models/decoders/action_decoder.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | 3 | import torch 4 | from torch import nn 5 | 6 | 7 | class ActionDecoder(nn.Module): 8 | def act( 9 | self, 10 | latent_plan: torch.Tensor, 11 | perceptual_emb: torch.Tensor, 12 | latent_goal: torch.Tensor, 13 | robot_obs: Optional[torch.Tensor] = None, 14 | ) -> torch.Tensor: 15 | raise NotImplementedError 16 | 17 | def loss( 18 | self, 19 | latent_plan: torch.Tensor, 20 | perceptual_emb: torch.Tensor, 21 | latent_goal: torch.Tensor, 22 | actions: torch.Tensor, 23 | robot_obs: Optional[torch.Tensor] = None, 24 | ) -> torch.Tensor: 25 | raise NotImplementedError 26 | 27 | def loss_and_act( 28 | self, 29 | latent_plan: torch.Tensor, 30 | perceptual_emb: torch.Tensor, 31 | latent_goal: torch.Tensor, 32 | actions: torch.Tensor, 33 | robot_obs: Optional[torch.Tensor] = None, 34 | ) -> Tuple[torch.Tensor, torch.Tensor]: 35 | raise NotImplementedError 36 | 37 | def _sample(self, *args, **kwargs): 38 | raise NotImplementedError 39 | 40 | def forward( 41 | self, latent_plan: torch.Tensor, perceptual_emb: torch.Tensor, latent_goal: torch.Tensor 42 | ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 43 | raise NotImplementedError 44 | 45 | def clear_hidden_state(self) -> None: 46 | pass 47 | -------------------------------------------------------------------------------- /hulc2/models/decoders/clip_proj.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class ClipProj(nn.Module): 8 | def __init__(self, im_dim: int, lang_dim: int, output_dim: int, proj_lang: bool = True): 9 | super().__init__() 10 | self.mlp_im = nn.Sequential( 11 | nn.Linear(in_features=im_dim, out_features=128), 12 | nn.ReLU(), 13 | nn.Linear(in_features=128, out_features=output_dim), 14 | ) 15 | self.mlp_lang = None 16 | if proj_lang: 17 | self.mlp_lang = nn.Sequential( 18 | nn.Linear(in_features=lang_dim, out_features=128), 19 | nn.ReLU(), 20 | nn.Linear(in_features=128, out_features=output_dim), 21 | ) 22 | 23 | def forward(self, vis_emb: torch.Tensor, lang_emb: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 24 | vis_emb = self.mlp_im(vis_emb) 25 | if self.mlp_lang is not None: 26 | lang_emb = self.mlp_lang(lang_emb) 27 | return vis_emb, lang_emb 28 | -------------------------------------------------------------------------------- /hulc2/models/decoders/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/decoders/utils/__init__.py -------------------------------------------------------------------------------- /hulc2/models/decoders/utils/rnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | def rnn_decoder(in_features: int, hidden_size: int, num_layers: int, policy_rnn_dropout_p: float) -> torch.nn.Module: 6 | return nn.RNN( 7 | input_size=in_features, 8 | hidden_size=hidden_size, 9 | num_layers=num_layers, 10 | nonlinearity="relu", 11 | bidirectional=False, 12 | batch_first=True, 13 | dropout=policy_rnn_dropout_p, 14 | ) 15 | 16 | 17 | def lstm_decoder(in_features: int, hidden_size: int, num_layers: int, policy_rnn_dropout_p: float) -> torch.nn.Module: 18 | return nn.LSTM( 19 | input_size=in_features, 20 | hidden_size=hidden_size, 21 | num_layers=num_layers, 22 | bidirectional=False, 23 | batch_first=True, 24 | dropout=policy_rnn_dropout_p, 25 | ) 26 | 27 | 28 | def gru_decoder(in_features: int, hidden_size: int, num_layers: int, policy_rnn_dropout_p: float) -> torch.nn.Module: 29 | return nn.GRU( 30 | input_size=in_features, 31 | hidden_size=hidden_size, 32 | num_layers=num_layers, 33 | bidirectional=False, 34 | batch_first=True, 35 | dropout=policy_rnn_dropout_p, 36 | ) 37 | 38 | 39 | def mlp_decoder(in_features: int, hidden_size: int, num_layers: int, policy_rnn_dropout_p: float) -> torch.nn.Module: 40 | return nn.Sequential( 41 | nn.Linear(in_features=in_features, out_features=hidden_size), 42 | nn.ReLU(), 43 | nn.Linear(in_features=hidden_size, out_features=hidden_size), 44 | nn.ReLU(), 45 | nn.Linear(in_features=hidden_size, out_features=hidden_size), 46 | ) 47 | -------------------------------------------------------------------------------- /hulc2/models/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/encoders/__init__.py -------------------------------------------------------------------------------- /hulc2/models/encoders/clip_lang_encoder.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from hulc2.models.perceptual_encoders.clip import build_model, load_clip, tokenize 7 | 8 | 9 | class LangClip(nn.Module): 10 | def __init__(self, freeze_backbone: bool = True, model_name: str = "RN50"): 11 | super(LangClip, self).__init__() 12 | self.device = "cuda" if torch.cuda.is_available() else "cpu" 13 | # Load CLIP model 14 | print(f"loading language CLIP model with backbone: {model_name}") 15 | self._load_clip(model_name) 16 | if freeze_backbone: 17 | for param in self.clip_rn50.parameters(): 18 | param.requires_grad = False 19 | 20 | def _load_clip(self, model_name: str) -> None: 21 | model, _ = load_clip(model_name, device=self.device) 22 | self.clip_rn50 = build_model(model.state_dict()).to(self.device) 23 | del model 24 | 25 | def forward(self, x: List) -> torch.Tensor: 26 | with torch.no_grad(): 27 | tokens = tokenize(x).to(self.device) 28 | emb = self.clip_rn50.encode_text(tokens) 29 | return torch.unsqueeze(emb, 1) 30 | -------------------------------------------------------------------------------- /hulc2/models/encoders/goal_encoders.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class VisualGoalEncoder(nn.Module): 9 | def __init__( 10 | self, 11 | hidden_size: int, 12 | latent_goal_features: int, 13 | in_features: int, 14 | l2_normalize_goal_embeddings: bool, 15 | activation_function: str, 16 | ): 17 | super().__init__() 18 | self.l2_normalize_output = l2_normalize_goal_embeddings 19 | self.act_fn = getattr(nn, activation_function)() 20 | self.mlp = nn.Sequential( 21 | nn.Linear(in_features=in_features, out_features=hidden_size), 22 | self.act_fn, 23 | nn.Linear(in_features=hidden_size, out_features=hidden_size), 24 | self.act_fn, 25 | nn.Linear(in_features=hidden_size, out_features=latent_goal_features), 26 | ) 27 | self.ln = nn.LayerNorm(latent_goal_features) 28 | 29 | def forward(self, x: torch.Tensor) -> torch.Tensor: 30 | x = self.mlp(x) 31 | if self.l2_normalize_output: 32 | x = F.normalize(x, p=2, dim=1) 33 | x = self.ln(x) 34 | return x 35 | 36 | 37 | class LanguageGoalEncoder(nn.Module): 38 | def __init__( 39 | self, 40 | lang_net, 41 | in_features: int, 42 | hidden_size: int, 43 | latent_goal_features: int, 44 | l2_normalize_goal_embeddings: bool, 45 | word_dropout_p: float, 46 | activation_function: str, 47 | ): 48 | super().__init__() 49 | self.lang_net = lang_net 50 | self.l2_normalize_output = l2_normalize_goal_embeddings 51 | self.act_fn = getattr(nn, activation_function)() 52 | self.mlp = nn.Sequential( 53 | nn.Dropout(word_dropout_p), 54 | nn.Linear(in_features=in_features, out_features=hidden_size), 55 | self.act_fn, 56 | nn.Linear(in_features=hidden_size, out_features=hidden_size), 57 | self.act_fn, 58 | nn.Linear(in_features=hidden_size, out_features=latent_goal_features), 59 | ) 60 | self.ln = nn.LayerNorm(latent_goal_features) 61 | 62 | def forward(self, x: list) -> torch.Tensor: 63 | # Takes a list of strings and returns the embeddings 64 | if self.lang_net is not None: 65 | x = self.lang_net(x) 66 | 67 | x = self.mlp(x) 68 | if self.l2_normalize_output: 69 | x = F.normalize(x, p=2, dim=1) 70 | x = self.ln(x) 71 | return x 72 | -------------------------------------------------------------------------------- /hulc2/models/encoders/lang_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class LanguageEncoder(nn.Module): 6 | def __init__( 7 | self, 8 | language_features: int, 9 | hidden_size: int, 10 | out_features: int, 11 | word_dropout_p: float, 12 | activation_function: str, 13 | ): 14 | super().__init__() 15 | self.act_fn = getattr(nn, activation_function)() 16 | self.mlp = nn.Sequential( 17 | nn.Dropout(word_dropout_p), 18 | nn.Linear(in_features=language_features, out_features=hidden_size), 19 | self.act_fn, 20 | nn.Linear(in_features=hidden_size, out_features=hidden_size), 21 | self.act_fn, 22 | nn.Linear(in_features=hidden_size, out_features=out_features), 23 | ) 24 | 25 | def forward(self, x: torch.Tensor) -> torch.Tensor: 26 | x = self.mlp(x) 27 | return x 28 | -------------------------------------------------------------------------------- /hulc2/models/encoders/language_network.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | 4 | import numpy as np 5 | from sentence_transformers import SentenceTransformer 6 | import torch 7 | from torch import nn, Tensor 8 | from tqdm.autonotebook import trange 9 | 10 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 11 | 12 | 13 | class SBert(nn.Module): 14 | def __init__(self, nlp_model: str, freeze_backbone=True) -> None: 15 | super(SBert, self).__init__() 16 | self.freeze_backbone = freeze_backbone 17 | self.model = SentenceTransformer(nlp_model) 18 | _embd_dim = 384 19 | # self.text_fc = nn.Linear(_embd_dim, 1024) 20 | 21 | def forward(self, x: List) -> torch.Tensor: 22 | enc = self.encode(x) 23 | # enc = self.text_fc(enc) 24 | return enc # torch.unsqueeze(enc, 1) 25 | 26 | def encode(self, sentences: List[str], normalize_embeddings: bool = False) -> Tensor: 27 | """ 28 | Computes sentence embeddings 29 | 30 | :param sentences: the sentences to embed 31 | :param normalize_embeddings: If set to true, returned vectors will have length 1. In that case, the faster dot-product (util.dot_score) instead of cosine similarity can be used. 32 | 33 | :return: 34 | A stacked tensor is returned 35 | """ 36 | if self.freeze_backbone: 37 | self.model.eval() 38 | 39 | all_embeddings = [] 40 | length_sorted_idx = np.argsort([-self.model._text_length(sen) for sen in sentences]) 41 | sentences_sorted = [sentences[idx] for idx in length_sorted_idx] 42 | 43 | features = self.model.tokenize(sentences_sorted) 44 | features = self.batch_to_device(features, self.model._target_device) 45 | 46 | with torch.set_grad_enabled(not self.freeze_backbone): 47 | out_features = self.model.forward(features) 48 | embeddings = out_features["sentence_embedding"] 49 | embeddings = embeddings.detach() 50 | if normalize_embeddings: 51 | embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1) 52 | all_embeddings.extend(embeddings) 53 | 54 | # undo sort and convert to tensor 55 | all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)] 56 | all_embeddings = torch.stack(all_embeddings) 57 | return all_embeddings 58 | 59 | def batch_to_device(self, batch, target_device): 60 | """ 61 | send a pytorch batch to a device (CPU/GPU) 62 | """ 63 | for key in batch: 64 | if isinstance(batch[key], Tensor): 65 | batch[key] = batch[key].to(target_device) 66 | return batch 67 | -------------------------------------------------------------------------------- /hulc2/models/perceptual_encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/perceptual_encoders/__init__.py -------------------------------------------------------------------------------- /hulc2/models/perceptual_encoders/proprio_encoder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch import nn 3 | 4 | 5 | class IdentityEncoder(nn.Module): 6 | def __init__(self, proprioception_dims): 7 | super(IdentityEncoder, self).__init__() 8 | # remove a dimension if we convert robot orientation quaternion to euler angles 9 | self.n_state_obs = int(np.sum(np.diff([list(x) for x in [list(y) for y in proprioception_dims.keep_indices]]))) 10 | self.identity = nn.Identity() 11 | 12 | @property 13 | def out_features(self): 14 | return self.n_state_obs 15 | 16 | def forward(self, x): 17 | return self.identity(x) 18 | -------------------------------------------------------------------------------- /hulc2/models/perceptual_encoders/tactile_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | import torchvision.models as models 5 | 6 | 7 | class TactileEncoder(nn.Module): 8 | def __init__(self, visual_features: int, freeze_tactile_backbone: bool = True): 9 | super(TactileEncoder, self).__init__() 10 | # Load pre-trained resnet-18 11 | net = models.resnet18(pretrained=True) 12 | # Remove the last fc layer, and rebuild 13 | modules = list(net.children())[:-1] 14 | self.net = nn.Sequential(*modules) 15 | if freeze_tactile_backbone: 16 | for param in self.net.parameters(): 17 | param.requires_grad = False 18 | self.fc1 = nn.Linear(1024, 512) 19 | self.fc2 = nn.Linear(512, visual_features) 20 | 21 | def forward(self, x: torch.Tensor) -> torch.Tensor: 22 | x_l = self.net(x[:, :3, :, :]).squeeze() 23 | x_r = self.net(x[:, 3:, :, :]).squeeze() 24 | x = torch.cat((x_l, x_r), dim=-1) 25 | # Add fc layer for final prediction 26 | output = F.relu(self.fc1(x)) # batch, 512 27 | output = self.fc2(output) # batch, 64 28 | return output 29 | -------------------------------------------------------------------------------- /hulc2/models/perceptual_encoders/vision_clip.py: -------------------------------------------------------------------------------- 1 | from sentence_transformers import SentenceTransformer 2 | import torch 3 | from torch import nn 4 | import torch.nn.functional as F 5 | import torchvision 6 | 7 | from hulc2.models.perceptual_encoders.clip import build_model, load_clip, tokenize 8 | 9 | 10 | class VisionClip(nn.Module): 11 | def __init__( 12 | self, device: torch.device, visual_features: int, freeze_backbone: bool = True, model_name: str = "RN50" 13 | ): 14 | super(VisionClip, self).__init__() 15 | # Load CLIP model 16 | print(f"loading vision CLIP model with backbone: {model_name}") 17 | self.clip_model, _ = load_clip(model_name, device=device) 18 | if freeze_backbone: 19 | for param in self.clip_model.parameters(): 20 | param.requires_grad = False 21 | if "RN50" in model_name: 22 | self.fc1 = nn.Linear(1024, 512) 23 | self.fc2 = nn.Linear(512, visual_features) 24 | elif "ViT-B/32" in model_name: 25 | self.fc1 = nn.Linear(512, 256) 26 | self.fc2 = nn.Linear(256, visual_features) 27 | 28 | def forward(self, x: torch.Tensor) -> torch.Tensor: 29 | x = self.clip_model.encode_image(x) # type:ignore 30 | output = F.relu(self.fc1(x)) # batch, 512 31 | output = self.fc2(output) # batch, 64 32 | return output 33 | -------------------------------------------------------------------------------- /hulc2/models/perceptual_encoders/vision_network_conv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from typing import Dict, Optional, Tuple 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.nn.parameter import Parameter 9 | 10 | 11 | class VisionNetworkConv(nn.Module): 12 | # reference: https://arxiv.org/pdf/2005.07648.pdf 13 | def __init__( 14 | self, 15 | activation_function: str, 16 | dropout_vis_fc: float, 17 | l2_normalize_output: bool, 18 | visual_features: int, 19 | num_c: int, 20 | ): 21 | super(VisionNetworkConv, self).__init__() 22 | self.l2_normalize_output = l2_normalize_output 23 | self.act_fn = getattr(nn, activation_function)() 24 | # model 25 | self.conv_model = nn.Sequential( 26 | # input shape: [N, 3, 200, 200] 27 | nn.Conv2d(in_channels=num_c, out_channels=32, kernel_size=8, stride=4), # shape: [N, 32, 49, 49] 28 | nn.BatchNorm2d(32), 29 | self.act_fn, 30 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=2), # shape: [N, 64, 23, 23] 31 | nn.BatchNorm2d(64), 32 | self.act_fn, 33 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=4, stride=2), # shape: [N, 64, 10, 10] 34 | nn.BatchNorm2d(64), 35 | self.act_fn, 36 | nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2), # shape: [N, 128, 4, 4] 37 | nn.BatchNorm2d(128), 38 | self.act_fn, 39 | nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=1), # shape: [N, 256, 1, 1] 40 | nn.BatchNorm2d(256), 41 | self.act_fn, 42 | ) 43 | self.fc1 = nn.Sequential( 44 | nn.Linear(in_features=256, out_features=512), 45 | self.act_fn, 46 | nn.Dropout(dropout_vis_fc), 47 | ) # shape: [N, 512] 48 | self.fc2 = nn.Linear(in_features=512, out_features=visual_features) # shape: [N, 64] 49 | 50 | def forward(self, x: torch.Tensor) -> torch.Tensor: 51 | x = self.conv_model(x) 52 | x = torch.flatten(x, start_dim=1) 53 | x = self.fc1(x) 54 | x = self.fc2(x) 55 | if self.l2_normalize_output: 56 | x = F.normalize(x, p=2, dim=1) 57 | return x # shape: [N, 64] 58 | -------------------------------------------------------------------------------- /hulc2/models/perceptual_encoders/vision_r3m.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | from r3m import load_r3m 6 | 7 | 8 | class VisionR3M(nn.Module): 9 | def __init__( 10 | self, device: torch.device, visual_features: int, resnet_model: str = "resnet18", freeze_backbone: bool = True 11 | ): 12 | super(VisionR3M, self).__init__() 13 | # Load pre-trained R3M resnet-18 14 | self.r3m = load_r3m(resnet_model, device).module 15 | # set all grads to false 16 | for param in self.r3m.parameters(): 17 | param.requires_grad = False 18 | if not freeze_backbone: 19 | # finetune last layer 20 | for param in self.r3m.convnet.layer4.parameters(): 21 | param.requires_grad = True 22 | self.fc1 = nn.Linear(512, 256) 23 | self.fc2 = nn.Linear(256, visual_features) 24 | 25 | def forward(self, x: torch.Tensor) -> torch.Tensor: 26 | with torch.no_grad(): 27 | x = self.r3m(x) # batch, 512, 1, 1 28 | # Add fc layer for final prediction 29 | x = torch.flatten(x, start_dim=1) # batch, 512 30 | output = F.relu(self.fc1(x)) # batch, 256 31 | output = self.fc2(output) # batch, 64 32 | return output 33 | -------------------------------------------------------------------------------- /hulc2/models/perceptual_encoders/vision_resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | import torchvision.models as models 5 | 6 | 7 | class VisionResnet(nn.Module): 8 | def __init__(self, visual_features: int, freeze_backbone: bool = True): 9 | super(VisionResnet, self).__init__() 10 | # Load pre-trained resnet-18 11 | net = models.resnet18(pretrained=True) 12 | # Remove the last fc layer, and rebuild 13 | modules = list(net.children())[:-1] 14 | for param in net.parameters(): 15 | param.requires_grad = False 16 | 17 | # Only finetune last layer 18 | if not freeze_backbone: 19 | for param in net.layer4.parameters(): 20 | param.requires_grad = True 21 | self.net = nn.Sequential(*modules) 22 | self.fc1 = nn.Linear(512, 256) 23 | self.fc2 = nn.Linear(256, visual_features) 24 | 25 | def forward(self, x: torch.Tensor) -> torch.Tensor: 26 | x = self.net(x) # batch, 512, 1, 1 27 | # Add fc layer for final prediction 28 | x = torch.flatten(x, start_dim=1) # batch, 512 29 | output = F.relu(self.fc1(x)) # batch, 256 30 | output = self.fc2(output) # batch, 64 31 | return output 32 | -------------------------------------------------------------------------------- /hulc2/models/perceptual_encoders/vision_resnet_aff.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from segmentation_models_pytorch.encoders import get_encoder 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | import torchvision.models as models 7 | 8 | 9 | class VisionResnetAff(nn.Module): 10 | def __init__(self, visual_features: int, input_shape: list, depth: int = 3, freeze_backbone: bool = True): 11 | super(VisionResnetAff, self).__init__() 12 | # Load pre-trained resnet-18 13 | self.net = get_encoder("resnet18", in_channels=input_shape[-1], depth=depth, weights="imagenet") 14 | # Remove the last fc layer, and rebuild 15 | for param in self.net.parameters(): 16 | param.requires_grad = False 17 | if freeze_backbone: 18 | for param in self.net.layer4.parameters(): 19 | param.requires_grad = True 20 | 21 | out_shape = self.calc_img_enc_size(list(input_shape)) 22 | self.fc1 = nn.Linear(np.prod(out_shape), 512) 23 | self.fc2 = nn.Linear(512, 256) 24 | self.fc3 = nn.Linear(256, visual_features) 25 | 26 | def calc_img_enc_size(self, input_shape): 27 | test_tensor = torch.zeros(input_shape).permute(2, 0, 1) 28 | test_tensor = test_tensor.unsqueeze(0) 29 | shape = self.net(test_tensor)[-1].shape[1:] 30 | return shape 31 | 32 | def forward(self, x: torch.Tensor) -> torch.Tensor: 33 | x = self.net(x)[-1] # batch, 128, n, n 34 | # Add fc layer for final prediction 35 | x = torch.flatten(x, start_dim=1) # batch, n*n*128 36 | output = F.relu(self.fc1(x)) # batch, 512 37 | output = F.relu(self.fc2(output)) # batch, 256 38 | output = self.fc3(output) # batch, 64 39 | return output 40 | -------------------------------------------------------------------------------- /hulc2/models/plan_encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/plan_encoders/__init__.py -------------------------------------------------------------------------------- /hulc2/models/plan_encoders/plan_proposal_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import torch 3 | import torch.nn as nn 4 | 5 | from hulc2.utils.distributions import Distribution, State 6 | 7 | 8 | class PlanProposalNetwork(nn.Module): 9 | def __init__( 10 | self, 11 | perceptual_features: int, 12 | latent_goal_features: int, 13 | plan_features: int, 14 | activation_function: str, 15 | hidden_size: int, 16 | dist: Distribution, 17 | ): 18 | super(PlanProposalNetwork, self).__init__() 19 | self.perceptual_features = perceptual_features 20 | self.latent_goal_features = latent_goal_features 21 | self.plan_features = plan_features 22 | self.hidden_size = hidden_size 23 | self.in_features = self.perceptual_features + self.latent_goal_features 24 | self.act_fn = getattr(nn, activation_function)() 25 | self.dist = dist 26 | self.fc_model = nn.Sequential( 27 | nn.Linear(in_features=self.in_features, out_features=hidden_size), # shape: [N, 136] 28 | # nn.BatchNorm1d(hidden_size), 29 | self.act_fn, 30 | nn.Linear(in_features=hidden_size, out_features=hidden_size), 31 | # nn.BatchNorm1d(hidden_size), 32 | self.act_fn, 33 | nn.Linear(in_features=hidden_size, out_features=hidden_size), 34 | # nn.BatchNorm1d(hidden_size), 35 | self.act_fn, 36 | nn.Linear(in_features=hidden_size, out_features=hidden_size), 37 | # nn.BatchNorm1d(hidden_size), 38 | self.act_fn, 39 | ) 40 | self.fc_state = self.dist.build_state(self.hidden_size, self.plan_features) 41 | 42 | def forward(self, initial_percep_emb: torch.Tensor, latent_goal: torch.Tensor) -> State: 43 | x = torch.cat([initial_percep_emb, latent_goal], dim=-1) 44 | x = self.fc_model(x) 45 | my_state = self.fc_state(x) 46 | state = self.dist.forward_dist(my_state) 47 | return state 48 | -------------------------------------------------------------------------------- /hulc2/rollout/gpt3_planning.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import openai 4 | 5 | openai_key = os.environ["OPENAI_KEY"] 6 | openai.api_key = openai_key 7 | 8 | gpt3_prompt = """ 9 | state = {'drawer_open': False, 'blocks_on_table': ['red'], 'buttons_on': ['green']} 10 | # put away the red block. 11 | open_drawer() 12 | pick_and_place('red', 'drawer') 13 | close_drawer() 14 | ### 15 | state = {'drawer_open': False, 'blocks_on_table': [], 'buttons_on': ['yellow']} 16 | # turn off the lights. 17 | push_button('yellow') 18 | ### 19 | state = {'drawer_open': False, 'blocks_on_table': ['red', 'green', 'blue'], 'buttons_on': ['green', 'yellow']} 20 | """ 21 | 22 | gpt_version = "text-davinci-002" 23 | 24 | 25 | def LM(prompt, max_tokens=128, temperature=0, stop=None): 26 | response = openai.Completion.create( 27 | engine=gpt_version, prompt=prompt, max_tokens=max_tokens, temperature=temperature, stop=stop 28 | ) 29 | return response["choices"][0]["text"].strip() 30 | 31 | 32 | user_input = "tidy up the workspace and turn off all the lights" 33 | if user_input[-1] != ".": 34 | user_input += "." 35 | 36 | context = gpt3_prompt 37 | context += "# " + user_input + "\n" 38 | response = LM(context, stop=["###"]) 39 | context += response + "\n" 40 | 41 | step_text = "" 42 | 43 | 44 | def pick_and_place(obj1, obj2): 45 | global step_text 46 | step_text = f"Pick the {obj1} block and place it on the {obj2}." 47 | 48 | 49 | def open_drawer(): 50 | global step_text 51 | step_text = "pull the handle to open the drawer" 52 | 53 | 54 | def close_drawer(): 55 | global step_text 56 | step_text = "pull the handle to close the drawer" 57 | 58 | 59 | def push_button(obj1): 60 | global step_text 61 | if "green" in obj1: 62 | step_text = "press the button to turn on the led light" 63 | if "yellow" in obj1: 64 | step_text = "use the switch to turn on the light bulb" 65 | 66 | 67 | # Execute commands given by LM. 68 | step_cmds = response.split("\n") 69 | print("LM generated plan:") 70 | for step_cmd in step_cmds: 71 | step_cmd = step_cmd.replace("robot.", "") 72 | # print(step_cmd) 73 | exec(step_cmd) 74 | print("Step:", step_text) 75 | # obs = run_hucl(obs, step_text) 76 | -------------------------------------------------------------------------------- /hulc2/scripts/utils/colors.yaml: -------------------------------------------------------------------------------- 1 | red: 2 | - "red" 3 | 4 | blue: 5 | - "blue" 6 | 7 | green: 8 | - "green" 9 | 10 | yellow: 11 | - "yellow" 12 | 13 | purple: 14 | - "purple" 15 | 16 | orange: 17 | - "orange" 18 | 19 | pink: 20 | - "pink" 21 | -------------------------------------------------------------------------------- /hulc2/scripts/utils/config/lang_model/bert.yaml: -------------------------------------------------------------------------------- 1 | _target_: webapp.language_encoders.sbert.SBert 2 | _recursive_: False 3 | -------------------------------------------------------------------------------- /hulc2/scripts/utils/config/lang_model/clip.yaml: -------------------------------------------------------------------------------- 1 | _target_: webapp.language_encoders.clip.CLIPLang 2 | freeze_backbone: True 3 | model_name: RN50 4 | -------------------------------------------------------------------------------- /hulc2/scripts/utils/config/retrieve_data.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - lang_model: bert 3 | 4 | ignore_empty_tasks: True 5 | 6 | lang_model: 7 | nlp_model: paraphrase-MiniLM-L3-v2 8 | 9 | save_path: ./annotations 10 | database_path: webapp/database.db 11 | # Original dataset to be reduced... 12 | # Only needs to load ep_start_end_ids.npy 13 | dataset_dir: ../datasets/unprocessed/real_world/real_world_play_processed 14 | 15 | 16 | #-- Hydra config --# 17 | hydra_outputs: ./hydra_outputs/tmp/ 18 | hydra: 19 | run: 20 | dir: ${hydra_outputs}/${now:%Y-%m-%d}/${now:%H-%M-%S} # Output 21 | -------------------------------------------------------------------------------- /hulc2/scripts/utils/tasks.yaml: -------------------------------------------------------------------------------- 1 | open_drawer: 2 | - "Opening the drawer" 3 | 4 | close_drawer: 5 | - "Closing the drawer" 6 | 7 | move_slide_left: 8 | - "Moving sliding door to the left" 9 | 10 | move_slide_right: 11 | - "Moving sliding door to the right" 12 | 13 | turn_on_[x]_led: 14 | - "turn on the [x] light" 15 | 16 | turn_off_[x]_led: 17 | - "turn off the [x] light" 18 | 19 | rotate_[x]_block_right: 20 | - "grasp the [x] block, then rotate it right" 21 | 22 | rotate_[x]_block_left: 23 | - "grasp the [x] block, then rotate it left" 24 | 25 | push_[x]_block_left: 26 | - "push the [x] block towards the left" 27 | 28 | push_[x]_block_right: 29 | - "push the [x] block towards the right" 30 | 31 | place_[x]_box: 32 | - "Place the [x] block inside the box" 33 | 34 | place_[x]_drawer: 35 | - "Place the [x] block inside the drawer" 36 | 37 | place_[x]_left_cabinet: 38 | - "Place the [x] block inside the left cabinet" 39 | 40 | place_[x]_right_cabinet: 41 | - "Place the [x] block inside the right cabinet" 42 | 43 | place_[x]_table: 44 | - "Place the [x] block on the table" 45 | 46 | place_[x]_drawer_top: 47 | - "Place the [x] block on top of the drawer" 48 | 49 | unstack_[x]_block: 50 | - "Unstack the [x] block" 51 | 52 | lift_[x]_block: 53 | - "Lift the [x] block" 54 | 55 | push_[x]_block_in_drawer: 56 | - "push the [x] block inside the drawer" 57 | 58 | stack_[x]_on_[y]: 59 | - "Stack the [x] block on top of the [y] block" 60 | -------------------------------------------------------------------------------- /hulc2/scripts/utils/utils.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | import yaml 4 | 5 | 6 | def read_tasks(): 7 | file_dir = pathlib.Path(__file__).parent.resolve() 8 | file = file_dir / "tasks.yaml" 9 | with open(file.as_posix(), "r") as stream: 10 | try: 11 | tasks = yaml.safe_load(stream) 12 | except yaml.YAMLError as exc: 13 | print(exc) 14 | return tasks 15 | 16 | 17 | def read_colors(): 18 | file_dir = pathlib.Path(__file__).parent.resolve() 19 | file = file_dir / "colors.yaml" 20 | with open(file.as_posix(), "r") as stream: 21 | try: 22 | colors = yaml.safe_load(stream) 23 | except yaml.YAMLError as exc: 24 | print(exc) 25 | return colors 26 | -------------------------------------------------------------------------------- /hulc2/scripts/viz_annotations.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import os 4 | from pathlib import Path 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | 10 | def add_img_text(img, text_label): 11 | font_scale = 0.6 12 | thickness = 2 13 | color = (0, 0, 0) 14 | im_w, im_h = img.shape[:2] 15 | x1, y1 = 10, 20 16 | (w, h), _ = cv2.getTextSize(text_label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness) 17 | out_img = cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1 + h), color, -1) 18 | out_img = cv2.putText( 19 | out_img, 20 | text_label, 21 | org=(x1, y1), 22 | fontFace=cv2.FONT_HERSHEY_SIMPLEX, 23 | fontScale=font_scale, 24 | color=(255, 255, 255), 25 | thickness=thickness, 26 | ) 27 | return out_img 28 | 29 | 30 | def main(): 31 | # Please first run get_annotations to generate auto_lang_ann.npy 32 | lang_ann_path = ( 33 | Path(__file__).resolve().parents[1] / "annotations" / "lang_paraphrase-MiniLM-L3-v2" / "auto_lang_ann.npy" 34 | ) 35 | 36 | # Path where dataset is 37 | dataset_path = ( 38 | "/mnt/ssd_shared/Users/Jessica/Documents/Thesis_ssd/datasets/unprocessed/real_world/500k_all_tasks_dataset_15hz" 39 | ) 40 | 41 | annotations = np.load(lang_ann_path.resolve(), allow_pickle=True).item() 42 | indices = [317, 723, 22] 43 | for index in indices: 44 | idx = index - 1 45 | caption = annotations["language"]["ann"][idx] 46 | start_fr, end_fr = annotations["info"]["indx"][idx] 47 | for fr in range(start_fr, end_fr): 48 | frame_file = os.path.join(dataset_path, "episode_%07d.npz" % fr) 49 | step_file = np.load(frame_file) 50 | img = step_file["rgb_static"] 51 | w, h = img.shape[:2] 52 | img = cv2.resize(img, (h * 3, w * 3)) 53 | img = add_img_text(img, caption) 54 | cv2.imshow("img", img[:, :, ::-1]) 55 | cv2.waitKey(0) 56 | cv2.waitKey(1) 57 | 58 | 59 | if __name__ == "__main__": 60 | main() 61 | -------------------------------------------------------------------------------- /hulc2/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/utils/__init__.py -------------------------------------------------------------------------------- /hulc2/utils/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/utils/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /hulc2/utils/data_utils.py: -------------------------------------------------------------------------------- 1 | from typing import DefaultDict 2 | 3 | import numpy as np 4 | 5 | 6 | def get_split_data(play_start_end_ids, data_percent, lang_data=None): 7 | start_end_ids = np.array(play_start_end_ids) 8 | cumsum = np.cumsum([e - s for s, e in play_start_end_ids]) 9 | 10 | n_samples = int(cumsum[-1] * data_percent) 11 | max_idx = min(n_samples, cumsum[-1]) if n_samples > 0 else cumsum[-1] 12 | indices = [0] 13 | for i in range(len(cumsum) - 1): 14 | if cumsum[i] <= max_idx: 15 | indices.append(i + 1) 16 | 17 | # Valid play-data start_end_ids episodes 18 | start_end_ids = [start_end_ids[i] for i in indices] 19 | diff = cumsum[indices[-1]] - n_samples 20 | start_end_ids[-1][-1] = start_end_ids[-1][-1] - diff 21 | 22 | # Only add frames w/lang that are inside selected non-lang frames 23 | if lang_data is not None: 24 | lang_data = get_split_lang_sequences(start_end_ids, lang_data) 25 | return np.array(start_end_ids), lang_data 26 | 27 | 28 | def get_split_lang_sequences(start_end_ids, lang_data, asarray=True): 29 | split_lang_data = { 30 | "language": {"ann": [], "task": [], "emb": []}, 31 | "info": {"episodes": [], "indx": []}, 32 | } 33 | # Language annotated episodes(64 frames) 34 | # keys = [(start_i, end_i), ...] 35 | keys = np.array([idx for idx in lang_data["info"]["indx"]]) 36 | for start, end in start_end_ids: 37 | # Check if language annotated episode frames(64) are part of frames selected for non-language annotated frames(play data episodes). 38 | # i.e. Check that both language annotated and non-language come frome the same data 39 | cond = np.logical_and(keys[:, 0] >= start, keys[:, 1] <= end) 40 | inside_ep = np.where(cond)[0] 41 | 42 | # If lang-annotated ep is inside selected play-data ep copy selected ep 43 | for i in inside_ep: 44 | split_lang_data["language"]["ann"].append(lang_data["language"]["ann"][i]) 45 | split_lang_data["language"]["task"].append(lang_data["language"]["task"][i]) 46 | split_lang_data["language"]["emb"].append(lang_data["language"]["emb"][i]) 47 | split_lang_data["info"]["indx"].append(lang_data["info"]["indx"][i]) 48 | 49 | split_lang_data["language"]["emb"] = np.array(split_lang_data["language"]["emb"]) 50 | return split_lang_data 51 | -------------------------------------------------------------------------------- /hulc2/utils/data_visualization.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | import sys 4 | 5 | from calvin.utils.utils import get_git_commit_hash, get_last_checkpoint, print_system_env_info 6 | import hydra 7 | import numpy 8 | from omegaconf import DictConfig, ListConfig, OmegaConf 9 | from pytorch_lightning import seed_everything, Trainer 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | from matplotlib.animation import ArtistAnimation 14 | import matplotlib.pyplot as plt 15 | import numpy as np 16 | 17 | 18 | def visualize(data): 19 | seq_img = data[1][0][0].numpy() 20 | title = data[4][0] 21 | s, c, h, w = seq_img.shape 22 | seq_img = np.transpose(seq_img, (0, 2, 3, 1)) 23 | imgs = [] 24 | fig = plt.figure() 25 | for j in range(s): 26 | # imgRGB = seq_img[j].astype(int) 27 | imgRGB = seq_img[j] 28 | imgRGB = (imgRGB - imgRGB.min()) / (imgRGB.max() - imgRGB.min()) 29 | img = plt.imshow(imgRGB, animated=True) 30 | imgs.append([img]) 31 | anim = ArtistAnimation(fig, imgs, interval=50) 32 | plt.title(title) 33 | plt.show() 34 | 35 | 36 | @hydra.main(config_path="../../conf", config_name="default.yaml") 37 | def train(cfg: DictConfig) -> None: 38 | # sets seeds for numpy, torch, python.random and PYTHONHASHSEED. 39 | seed_everything(cfg.seed) 40 | data_module = hydra.utils.instantiate(cfg.dataset, num_workers=0) 41 | data_module.setup() 42 | train = data_module.train_dataloader() 43 | dataset = train["lang"] 44 | logger.info(f"Dataset Size: {len(dataset)}") 45 | for i, lang in enumerate(dataset): 46 | logger.info(f"Element : {i}") 47 | visualize(lang) 48 | 49 | 50 | if __name__ == "__main__": 51 | train() 52 | -------------------------------------------------------------------------------- /hulc2/utils/dataset_pipeline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Running from src: $1" 3 | echo "Output to: $2" 4 | 5 | # Job to perform 6 | # source ~/.bashrc 7 | # conda activate $1 8 | # srun python ${@:2} 9 | 10 | python preprocess_real_data.py --dataset_root $1 --output_dir $2_processed 11 | python render_low_freq.py --dataset_root $2_processed --output_dir $2_15hz 12 | python split_dataset.py --dataset_root $2_15hz 13 | -------------------------------------------------------------------------------- /hulc2/utils/dataset_task_statistics.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import logging 3 | 4 | import hydra 5 | from omegaconf import DictConfig 6 | from pytorch_lightning import seed_everything 7 | from tqdm import tqdm 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def count_tasks(batch, env, tasks, task_counter): 13 | state_obs, rgb_obs, depth_obs, actions, _, reset_info, idx = batch 14 | batch_size = state_obs.shape[0] 15 | for i in range(batch_size): 16 | # reset env to state of last step in the episode (goal state) 17 | env.reset(reset_info, i, -1) 18 | goal_info = env.get_info() 19 | # reset env to state of first step in the episode 20 | env.reset(reset_info, i, 0) 21 | start_info = env.get_info() 22 | # check if task was achieved in sequence 23 | task_info = tasks.get_task_info(start_info, goal_info) 24 | task_counter += Counter(task_info) 25 | 26 | 27 | @hydra.main(config_path="../../conf", config_name="config") 28 | def compute_dataset_statistics(cfg: DictConfig) -> None: 29 | """""" 30 | seed_everything(cfg.seed) 31 | 32 | # since we don't use the trainer during inference, manually set up datamodule 33 | data_module = hydra.utils.instantiate(cfg.dataset, batch_size=32, num_workers=4) 34 | data_module.prepare_data() 35 | data_module.setup() 36 | train_dataloader = data_module.train_dataloader() 37 | val_dataloader = data_module.val_dataloader() 38 | 39 | env = hydra.utils.instantiate(cfg.rollout.env_cfg, train_dataloader.dataset.dataset_loader, "cpu") 40 | tasks = hydra.utils.instantiate(cfg.rollout.task_cfg) 41 | 42 | task_counter = Counter() # type: ignore 43 | logger.info( 44 | f"training dataset with {len(train_dataloader.dataset.dataset_loader.max_batched_length_per_demo)} " 45 | f"episodes and {len(train_dataloader.dataset.dataset_loader.episode_lookup)} frames" 46 | ) 47 | 48 | for batch in tqdm(train_dataloader): 49 | count_tasks(batch, env, tasks, task_counter) 50 | logger.info(f"training tasks: {task_counter}") 51 | 52 | task_counter = Counter() 53 | logger.info( 54 | f"training dataset with {len(val_dataloader.dataset.dataset_loader.max_batched_length_per_demo)} " 55 | f"episodes and {len(val_dataloader.dataset.dataset_loader.episode_lookup)} frames" 56 | ) 57 | for batch in tqdm(val_dataloader): 58 | count_tasks(batch, env, tasks, task_counter) 59 | logger.info(f"validation tasks: {task_counter}") 60 | 61 | 62 | if __name__ == "__main__": 63 | compute_dataset_statistics() 64 | -------------------------------------------------------------------------------- /hulc2/utils/distributions.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from typing import Union 3 | 4 | import torch 5 | from torch.distributions import Independent, Normal, OneHotCategoricalStraightThrough # type: ignore 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | DiscState = namedtuple("DiscState", ["logit"]) 10 | ContState = namedtuple("ContState", ["mean", "std"]) 11 | 12 | State = Union[DiscState, ContState] 13 | 14 | 15 | class Distribution: 16 | def __init__(self, **kwargs): 17 | self.dist = kwargs.get("dist") 18 | assert self.dist == "discrete" or self.dist == "continuous" 19 | if self.dist == "discrete": 20 | self.category_size = kwargs.get("category_size") 21 | self.class_size = kwargs.get("class_size") 22 | 23 | def get_dist(self, state): 24 | if self.dist == "discrete": 25 | shape = state.logit.shape 26 | logits = torch.reshape(state.logit, shape=(*shape[:-1], self.category_size, self.class_size)) 27 | return Independent(OneHotCategoricalStraightThrough(logits=logits), 1) 28 | elif self.dist == "continuous": 29 | return Independent(Normal(state.mean, state.std), 1) 30 | 31 | def detach_state(self, state): 32 | if self.dist == "discrete": 33 | return DiscState(state.logit.detach()) 34 | elif self.dist == "continuous": 35 | return ContState(state.mean.detach(), state.std.detach()) 36 | 37 | def sample_latent_plan(self, distribution): 38 | sampled_plan = distribution.sample() 39 | if self.dist == "discrete": 40 | sampled_plan = torch.flatten(sampled_plan, start_dim=-2, end_dim=-1) 41 | return sampled_plan 42 | 43 | def build_state(self, hidden_size, plan_features): 44 | fc_state = [] 45 | if self.dist == "discrete": 46 | fc_state += [nn.Linear(hidden_size, plan_features)] 47 | elif self.dist == "continuous": 48 | fc_state += [nn.Linear(hidden_size, 2 * plan_features)] 49 | return nn.Sequential(*fc_state) 50 | 51 | def forward_dist(self, x): 52 | if self.dist == "discrete": 53 | prior_logit = x 54 | state = DiscState(prior_logit) # type: State 55 | elif self.dist == "continuous": 56 | mean, var = torch.chunk(x, 2, dim=-1) 57 | min_std = 0.0001 58 | std = F.softplus(var) + min_std 59 | state = ContState(mean, std) 60 | return state 61 | -------------------------------------------------------------------------------- /hulc2/utils/kl_callbacks.py: -------------------------------------------------------------------------------- 1 | from pytorch_lightning import Callback, LightningModule, Trainer 2 | import torch 3 | 4 | 5 | def sigmoid(scale: float, shift: float, x: int) -> float: 6 | return torch.sigmoid(torch.Tensor([(x - shift) / (scale / 12)])).item() 7 | 8 | 9 | class KLSchedule(Callback): 10 | """ 11 | Base class for KL Annealing 12 | """ 13 | 14 | def __init__(self, start_epoch: int, end_epoch: int, max_kl_beta: float): 15 | self.start_epoch = start_epoch 16 | self.end_epoch = end_epoch 17 | self.max_kl_beta = max_kl_beta 18 | 19 | def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) -> None: 20 | epoch = pl_module.current_epoch 21 | kl_beta = self._anneal_fn(epoch) 22 | pl_module.set_kl_beta(kl_beta) # type: ignore 23 | 24 | def _anneal_fn(self, epoch): 25 | raise NotImplementedError 26 | 27 | 28 | class KLConstantSchedule(KLSchedule): 29 | def __init__(self): 30 | pass 31 | 32 | def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) -> None: 33 | pass 34 | 35 | def _anneal_fn(self, epoch: int) -> None: 36 | pass 37 | 38 | 39 | class KLSigmoidSchedule(KLSchedule): 40 | def _anneal_fn(self, epoch: int) -> float: 41 | if epoch < self.start_epoch: 42 | kl_beta = 0.0 43 | elif epoch > self.end_epoch: 44 | kl_beta = self.max_kl_beta 45 | else: 46 | scale = self.end_epoch - self.start_epoch 47 | shift = (self.end_epoch + self.start_epoch) / 2 48 | kl_beta = sigmoid(scale=scale, shift=shift, x=epoch) * self.max_kl_beta 49 | return kl_beta 50 | 51 | 52 | class KLLinearSchedule(KLSchedule): 53 | def _anneal_fn(self, epoch: int) -> float: 54 | if epoch < self.start_epoch: 55 | kl_beta = 0.0 56 | elif epoch > self.end_epoch: 57 | kl_beta = self.max_kl_beta 58 | else: 59 | kl_beta = self.max_kl_beta * (epoch - self.start_epoch) / (self.end_epoch - self.start_epoch) 60 | return kl_beta 61 | 62 | 63 | if __name__ == "__main__": 64 | import matplotlib 65 | import matplotlib.pyplot as plt 66 | 67 | matplotlib.use("TkAgg") 68 | import numpy as np 69 | 70 | kl = KLLinearSchedule(10, 50, 0.1) 71 | x = np.arange(200) 72 | y = [kl._anneal_fn(i) for i in x] 73 | plt.plot(x, y) 74 | 75 | kl2 = KLSigmoidSchedule(10, 50, 0.1) 76 | x = np.arange(200) 77 | y = [kl2._anneal_fn(i) for i in x] 78 | plt.plot(x, y) 79 | 80 | plt.show() 81 | -------------------------------------------------------------------------------- /hulc2/utils/real_world_dataset_pipeline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Running from src: $1" 3 | echo "Output to: $2" 4 | 5 | # Job to perform 6 | # source ~/.bashrc 7 | # conda activate $1 8 | # srun python ${@:2} 9 | 10 | python preprocess_real_data.py --dataset_root $1 --output_dir $2_processed 11 | python render_low_freq.py --dataset_root $2_processed --output_dir $2_15hz 12 | python split_dataset.py --dataset_root $2_15hz 13 | -------------------------------------------------------------------------------- /hulc2/utils/relabel_with_new_lang_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | from typing import Dict 4 | 5 | import hydra 6 | import numpy as np 7 | from omegaconf import DictConfig, OmegaConf 8 | import torch 9 | 10 | import hulc2 11 | 12 | """This script allows for re-annotating video sequences of PlayData. 13 | Parameters: 14 | · +path=/path/to/current/auto_lang_ann.npy 15 | · +name_folder=name_to_new_annotations 16 | New annotations sampling from 'annotations=expert' defined in expert.yaml 17 | NLP model selection: 18 | · model.nlp_model=mini -> 'paraphrase-MiniLM-L6-v2' 19 | · model.nlp_model=multi -> 'paraphrase-multilingual-mpnet-base-v2' 20 | · model.nlp_model=mpnet -> 'paraphrase-mpnet-base-v2' 21 | """ 22 | 23 | 24 | @hydra.main(config_path="../../conf", config_name="lang_ann.yaml") 25 | def main(cfg: DictConfig) -> None: 26 | print("Loading data") 27 | path = Path(cfg.path) 28 | data = np.load(path, allow_pickle=True).reshape(-1)[0] 29 | if "training" in cfg.path: 30 | print("using training instructions...") 31 | task_ann = cfg.train_instructions 32 | else: 33 | print("using validation instructions...") 34 | task_ann = cfg.val_instructions 35 | if cfg.reannotate: 36 | print("Re-annotating sequences...") 37 | data["language"]["ann"] = [ 38 | task_ann[task][np.random.randint(len(task_ann[task]))] for task in data["language"]["task"] 39 | ] 40 | print("Loading Language Model") 41 | model = hydra.utils.instantiate(cfg.model) 42 | print(f"Computing Embeddings with Model --> {cfg.model}") 43 | data["language"]["emb"] = model(data["language"]["ann"]).cpu().numpy() 44 | print("Saving data") 45 | save_path = path.parent / ".." / cfg.name_folder 46 | save_path.mkdir(exist_ok=True) 47 | np.save(save_path / "auto_lang_ann.npy", data) 48 | 49 | if "validation" in cfg.path: 50 | embeddings: Dict = {} 51 | for task, ann in cfg.val_instructions.items(): 52 | embeddings[task] = {} 53 | language_embedding = model(list(ann)) 54 | embeddings[task]["emb"] = language_embedding.cpu().numpy() 55 | embeddings[task]["ann"] = ann 56 | np.save(save_path / "embeddings", embeddings) 57 | print("Done saving val language embeddings for Rollouts !") 58 | 59 | 60 | if __name__ == "__main__": 61 | main() 62 | -------------------------------------------------------------------------------- /hulc2/utils/tensor_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | from scipy.spatial.transform.rotation import Rotation as R 5 | import torch 6 | from torch.autograd import Variable 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def unravel_idx(indices, shape): 12 | coord = [] 13 | for dim in reversed(shape): 14 | coord.append(indices % dim) 15 | indices = indices // dim 16 | 17 | coord = np.stack(coord[::-1], axis=-1) 18 | return coord 19 | 20 | 21 | def calc_cnn_out_size(in_size, k, p=0, s=1): 22 | out_size = ((in_size + 2 * p - k) / s) + 1 23 | return int(out_size) 24 | 25 | 26 | def np_quat_to_scipy_quat(quat): 27 | """wxyz to xyzw""" 28 | return np.array([quat.x, quat.y, quat.z, quat.w]) 29 | 30 | 31 | def pos_orn_to_matrix(pos, orn): 32 | """ 33 | :param pos: np.array of shape (3,) 34 | :param orn: np.array of shape (4,) -> quaternion xyzw 35 | np.quaternion -> quaternion wxyz 36 | np.array of shape (3,) -> euler angles xyz 37 | :return: 4x4 homogeneous transformation 38 | """ 39 | mat = np.eye(4) 40 | if isinstance(orn, np.quaternion): 41 | orn = np_quat_to_scipy_quat(orn) 42 | mat[:3, :3] = R.from_quat(orn).as_matrix() 43 | elif len(orn) == 4: 44 | mat[:3, :3] = R.from_quat(orn).as_matrix() 45 | elif len(orn) == 3: 46 | mat[:3, :3] = R.from_euler("xyz", orn).as_matrix() 47 | mat[:3, 3] = pos 48 | return mat 49 | 50 | 51 | def tt(x, device): 52 | if isinstance(x, dict): 53 | dict_of_list = {} 54 | for key, val in x.items(): 55 | dict_of_list[key] = Variable(torch.from_numpy(val).float().to(device), requires_grad=False) 56 | return dict_of_list 57 | else: 58 | return Variable(torch.from_numpy(x).float().to(device), requires_grad=False) 59 | 60 | 61 | def torch_to_numpy(x): 62 | return x.detach().cpu().numpy() 63 | -------------------------------------------------------------------------------- /hulc2/utils/visualizations.py: -------------------------------------------------------------------------------- 1 | # Force matplotlib to not use any Xwindows backend. 2 | import matplotlib 3 | import numpy as np 4 | from pytorch_lightning.loggers import WandbLogger 5 | import torch 6 | import wandb 7 | 8 | matplotlib.use("Agg") 9 | import matplotlib.pyplot as plt 10 | 11 | 12 | def visualize_temporal_consistency(max_batched_length_per_demo, gpus, sampled_plans, all_idx, step, logger, prefix=""): 13 | """compute t-SNE plot of embeddings os a task to visualize temporal consistency""" 14 | labels = [] 15 | for demo in max_batched_length_per_demo: 16 | labels = np.concatenate((labels, np.arange(demo) / float(demo)), axis=0) 17 | # because with ddp, data doesn't come ordered anymore 18 | labels = labels[torch.flatten(all_idx).cpu()] 19 | colors = [plt.cm.Spectral(y_i) for y_i in labels] 20 | assert sampled_plans.shape[0] == len(labels), "plt X shape {}, label len {}".format( 21 | sampled_plans.shape[0], len(labels) 22 | ) 23 | 24 | from MulticoreTSNE import MulticoreTSNE as TSNE 25 | 26 | x_tsne = TSNE(perplexity=40, n_jobs=8).fit_transform(sampled_plans.cpu()) 27 | 28 | plt.close("all") 29 | fig, ax = plt.subplots() 30 | _ = ax.scatter(x_tsne[:, 0], x_tsne[:, 1], c=colors, cmap=plt.cm.Spectral) 31 | fig.suptitle("Temporal Consistency of Latent space") 32 | ax.axis("off") 33 | if isinstance(logger, WandbLogger): 34 | logger.experiment.log({prefix + "latent_embedding": wandb.Image(fig)}) 35 | else: 36 | logger.experiment.add_figure(prefix + "latent_embedding", fig, global_step=step) 37 | -------------------------------------------------------------------------------- /hulc2/utils/visualize_calvin_dataset.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from pathlib import Path 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | if __name__ == "__main__": 8 | parser = ArgumentParser(description="Interactive visualization of CALVIN dataset") 9 | parser.add_argument("path", type=str, help="Path to dir containing scene_info.npy") 10 | parser.add_argument("-d", "--data", nargs="*", default=["rgb_static", "rgb_gripper"], help="Data to visualize") 11 | args = parser.parse_args() 12 | 13 | if not Path(args.path).is_dir(): 14 | print(f"Path {args.path} is either not a directory, or does not exist.") 15 | exit() 16 | 17 | indices = next(iter(np.load(f"{args.path}/scene_info.npy", allow_pickle=True).item().values())) 18 | indices = list(range(indices[0], indices[1] + 1)) 19 | 20 | annotations = np.load(f"{args.path}/lang_annotations/auto_lang_ann.npy", allow_pickle=True).item() 21 | annotations = list(zip(annotations["info"]["indx"], annotations["language"]["ann"])) 22 | 23 | idx = 0 24 | ann_idx = -1 25 | 26 | while True: 27 | t = np.load(f"{args.path}/episode_{indices[idx]:07d}.npz", allow_pickle=True) 28 | 29 | for d in args.data: 30 | if d not in t: 31 | print(f"Data {d} cannot be found in transition") 32 | continue 33 | 34 | cv2.imshow(d, t[d][:, :, ::-1]) 35 | 36 | for n, ((low, high), ann) in enumerate(annotations): 37 | if indices[idx] >= low and indices[idx] <= high: 38 | if n != ann_idx: 39 | print(f"{ann}") 40 | ann_idx = n 41 | 42 | key = cv2.waitKey(0) 43 | if key == ord("q"): 44 | break 45 | elif key == 83: # Right arrow 46 | idx = (idx + 1) % len(indices) 47 | elif key == 81: # Left arrow 48 | idx = (len(indices) + idx - 1) % len(indices) 49 | else: 50 | print(f'Unrecognized keycode "{key}"') 51 | -------------------------------------------------------------------------------- /hulc2/utils/visualize_real_data.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | import cv2 5 | import numpy as np 6 | import tqdm 7 | 8 | 9 | def normalize_depth(img): 10 | img_mask = img == 0 11 | istats = (np.min(img[img > 0]), np.max(img)) 12 | imrange = (img.astype("float32") - istats[0]) / (istats[1] - istats[0]) 13 | imrange[img_mask] = 0 14 | imrange = 255.0 * imrange 15 | imsz = imrange.shape 16 | nchan = 1 17 | if len(imsz) == 3: 18 | nchan = imsz[2] 19 | imgcanvas = np.zeros((imsz[0], imsz[1], nchan), dtype="uint8") 20 | imgcanvas[0 : imsz[0], 0 : imsz[1]] = imrange.reshape((imsz[0], imsz[1], nchan)) 21 | return imgcanvas 22 | 23 | 24 | # Ger valid numpy files with raw data 25 | def get_files(path, extension, recursive=False): 26 | if not os.path.isdir(path): 27 | print("path does not exist: %s" % path) 28 | search_str = "/*.%s" % extension if not recursive else "**/*.%s" % extension 29 | files = glob.glob(path + search_str) 30 | if not files: 31 | print("No *.%s files found in %s" % (extension, path)) 32 | files.sort() 33 | return files 34 | 35 | 36 | def viz_data(data_dir): 37 | """Visualize teleop data recorded with Panda robot and check actions are valid""" 38 | files = get_files(data_dir, "npz", recursive=True) # Sorted files 39 | # Remove camera calibration npz from iterable files 40 | files = [f for f in files if "camera_info.npz" not in f] 41 | 42 | for idx, filename in enumerate(tqdm.tqdm(files)): 43 | data = np.load(filename, allow_pickle=True) 44 | if data is None: 45 | continue # Skip file 46 | 47 | new_size = (400, 400) 48 | for key in ["rgb_static", "depth_static", "rgb_gripper", "depth_gripper"]: 49 | img = cv2.resize(data[key], new_size) 50 | if "rgb" in key: 51 | cv2.imshow(key, img[:, :, ::-1]) 52 | else: 53 | img2 = normalize_depth(img) 54 | img2 = cv2.applyColorMap(img2, cv2.COLORMAP_JET) 55 | cv2.imshow(key, img2) 56 | cv2.waitKey(1) 57 | 58 | 59 | if __name__ == "__main__": 60 | data_dir = "/tmp/test_dataset" 61 | viz_data(data_dir) 62 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd calvin_env/tacto 4 | pip install -e . 5 | cd .. 6 | pip install -e . 7 | cd .. 8 | pip install -e . 9 | cd r3m 10 | pip install -e . 11 | -------------------------------------------------------------------------------- /media/hulc2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/media/hulc2.gif -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | # https://github.com/psf/black 3 | line-length = 120 4 | target-version = ["py38"] 5 | exclude = "(.eggs|.git|.hg|.mypy_cache|.nox|.tox|.venv|.svn|_build|buck-out|build|dist)" 6 | 7 | [tool.isort] 8 | profile = "black" 9 | line_length = 120 10 | force_sort_within_sections = "True" 11 | order_by_type = "False" 12 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | black 2 | flake8 3 | isort 4 | pre-commit 5 | mypy 6 | pytest 7 | pytest-cov 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cmake 2 | wheel 3 | numpy>1.2 4 | hydra-core==1.1.1 5 | hydra-colorlog 6 | matplotlib 7 | opencv-python 8 | omegaconf 9 | kaggle 10 | plotly 11 | ftfy 12 | pyhash 13 | pytorch-lightning==1.8.3 14 | torch==1.12.1 15 | torchvision 16 | MulticoreTSNE 17 | gitpython 18 | scipy 19 | sentence-transformers 20 | pytorch3d 21 | gym 22 | moviepy 23 | tqdm 24 | termcolor 25 | wandb 26 | pypng 27 | segmentation-models-pytorch 28 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Setup hulc2 installation.""" 4 | 5 | from os import path as op 6 | import re 7 | 8 | from setuptools import find_packages, setup 9 | 10 | 11 | def _read(f): 12 | return open(op.join(op.dirname(__file__), f)).read() if op.exists(f) else "" 13 | 14 | 15 | _meta = _read("hulc2/__init__.py") 16 | 17 | 18 | def find_meta(_meta, string): 19 | l_match = re.search(r"^" + string + r'\s*=\s*"(.*)"', _meta, re.M) 20 | if l_match: 21 | return l_match.group(1) 22 | raise RuntimeError(f"Unable to find {string} string.") 23 | 24 | 25 | install_requires = [ 26 | l for l in _read("requirements.txt").split("\n") if l and not l.startswith("#") and not l.startswith("-") 27 | ] 28 | 29 | meta = dict( 30 | name=find_meta(_meta, "__project__"), 31 | version=find_meta(_meta, "__version__"), 32 | license=find_meta(_meta, "__license__"), 33 | description="Grounding Language with Visual Affordances over Unstructured Data", 34 | platforms=("Any"), 35 | zip_safe=False, 36 | keywords="pytorch hulc2".split(), 37 | author=find_meta(_meta, "__author__"), 38 | author_email=find_meta(_meta, "__email__"), 39 | url=" https://github.com/mees/hulc2", 40 | packages=find_packages(exclude=["tests"]), 41 | install_requires=install_requires, 42 | ) 43 | 44 | if __name__ == "__main__": 45 | print("find_package", find_packages(exclude=["tests"])) 46 | setup(**meta) 47 | -------------------------------------------------------------------------------- /setup_local.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Setup hulc2 installation.""" 4 | 5 | from os import path as op 6 | import re 7 | 8 | from setuptools import find_packages, setup 9 | 10 | 11 | def _read(f): 12 | return open(op.join(op.dirname(__file__), f)).read() if op.exists(f) else "" 13 | 14 | 15 | _meta = _read("hulc2/__init__.py") 16 | 17 | 18 | def find_meta(_meta, string): 19 | l_match = re.search(r"^" + string + r'\s*=\s*"(.*)"', _meta, re.M) 20 | if l_match: 21 | return l_match.group(1) 22 | raise RuntimeError(f"Unable to find {string} string.") 23 | 24 | 25 | meta = dict( 26 | name=find_meta(_meta, "__project__"), 27 | version=find_meta(_meta, "__version__"), 28 | license=find_meta(_meta, "__license__"), 29 | description="Grounding Language with Visual Affordances over Unstructured Data", 30 | platforms=("Any"), 31 | zip_safe=False, 32 | keywords="pytorch hulc2".split(), 33 | author=find_meta(_meta, "__author__"), 34 | author_email=find_meta(_meta, "__email__"), 35 | url=" https://github.com/mees/hulc2", 36 | packages=find_packages(exclude=["tests"]), 37 | ) 38 | 39 | if __name__ == "__main__": 40 | print("find_package", find_packages(exclude=["tests"])) 41 | setup(**meta) 42 | -------------------------------------------------------------------------------- /slurm_scripts/sbatch_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Print some information about the job to STDOUT 3 | echo "Workingdir: $PWD"; 4 | echo "Started at $(date)"; 5 | echo "Running job $SLURM_JOB_NAME"; 6 | echo "cpus per node: $SLURM_JOB_CPUS_PER_NODE"; 7 | echo "gres: $SLURM_GRES"; 8 | echo "mem: $SLURM_MEM_PER_NODE"; 9 | echo "ntasks: $SLURM_NTASKS"; 10 | echo "JID $SLURM_JOB_ID on queue $SLURM_JOB_PARTITION"; 11 | 12 | export NCCL_DEBUG=INFO 13 | export PYTHONFAULTHANDLER=1 14 | export HYDRA_FULL_ERROR=1 15 | 16 | # Job to perform 17 | source ~/.bashrc 18 | conda activate $1 19 | srun python ${@:2} 20 | 21 | # Print some Information about the end-time to STDOUT 22 | echo "DONE"; 23 | echo "Finished at $(date)"; 24 | -------------------------------------------------------------------------------- /slurm_scripts/sbatch_lfp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Print some information about the job to STDOUT 3 | echo "Workingdir: $PWD"; 4 | echo "Started at $(date)"; 5 | echo "Running job $SLURM_JOB_NAME"; 6 | echo "cpus per node: $SLURM_JOB_CPUS_PER_NODE"; 7 | echo "gres: $SLURM_GRES"; 8 | echo "mem: $SLURM_MEM_PER_NODE"; 9 | echo "ntasks: $SLURM_NTASKS"; 10 | echo "JID $SLURM_JOB_ID on queue $SLURM_JOB_PARTITION"; 11 | 12 | export NCCL_DEBUG=INFO 13 | export PYTHONFAULTHANDLER=1 14 | export HYDRA_FULL_ERROR=1 15 | 16 | # Job to perform 17 | source ~/.bashrc 18 | conda activate $1 19 | timeout 23.8h srun python $2 slurm=true hydra.run.dir=$3 trainer.gpus=$4 ${@:5} 20 | 21 | if [[ $? -eq 124 ]]; then 22 | echo "Time limit exceeded. Resubmit job."; 23 | ssh ${USER}@kis2bat3 < 2 else np.inf 17 | 18 | checkpoints = get_all_checkpoints(training_dir) 19 | epochs = [str(e) for chk in checkpoints if (e := int(chk.stem.split("=")[1])) <= max_epoch] 20 | split_epochs = np.array_split(epochs, 8) 21 | epoch_args = [",".join(arr) for arr in split_epochs if len(arr)] 22 | for epoch_arg in epoch_args: 23 | cmd = [(training_dir / "evaluate.sh").as_posix(), "--checkpoints", epoch_arg] 24 | output = subprocess.check_output(cmd) 25 | print(output.decode("utf-8")) 26 | 27 | 28 | if __name__ == "__main__": 29 | main() 30 | --------------------------------------------------------------------------------