├── .flake8
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── conf
    ├── __init__.py
    ├── affordance
    │   ├── aff_detection
    │   │   ├── clip.yaml
    │   │   ├── mask_data.yaml
    │   │   ├── pixel_data.yaml
    │   │   ├── r3m.yaml
    │   │   ├── rn18_bert_mask.yaml
    │   │   ├── rn18_bert_pixel.yaml
    │   │   ├── rn18_clip_mask.yaml
    │   │   ├── rn18_clip_pixel.yaml
    │   │   ├── rn18_sbert_pixel.yaml
    │   │   ├── rn50_bert_pixel.yaml
    │   │   ├── rn50_clip_pixel.yaml
    │   │   └── streams
    │   │   │   ├── clip_lingunet.yaml
    │   │   │   ├── r3m_rn18_sbert.yaml
    │   │   │   ├── rn18_clip_lingunet.yaml
    │   │   │   ├── rn50_sbert_lingunet.yaml
    │   │   │   ├── rn_bert_lingunet.yaml
    │   │   │   └── rn_sbert_lingunet.yaml
    │   ├── cameras
    │   │   ├── cameras
    │   │   │   ├── gripper.yaml
    │   │   │   ├── opposing.yaml
    │   │   │   ├── static.yaml
    │   │   │   ├── static_calvin.yaml
    │   │   │   └── tactile.yaml
    │   │   ├── high_res.yaml
    │   │   ├── no_cameras.yaml
    │   │   ├── static_and_gripper.yaml
    │   │   ├── static_and_gripper_calvin.yaml
    │   │   └── static_and_tactile.yaml
    │   ├── cfg_datacollection.yaml
    │   ├── cfg_merge_dataset.yaml
    │   ├── labeling
    │   │   ├── cameras
    │   │   │   ├── cameras
    │   │   │   │   ├── gripper.yaml
    │   │   │   │   └── static.yaml
    │   │   │   └── static_and_gripper.yaml
    │   │   ├── env
    │   │   │   └── env_labeling.yaml
    │   │   ├── real_world.yaml
    │   │   ├── real_world_lang.yaml
    │   │   ├── robot
    │   │   │   └── panda.yaml
    │   │   ├── scene
    │   │   │   └── empty_table.yaml
    │   │   ├── simulation.yaml
    │   │   └── simulation_lang.yaml
    │   ├── test_affordance.yaml
    │   ├── train_affordance.yaml
    │   ├── train_depth.yaml
    │   └── transforms
    │   │   ├── clip.yaml
    │   │   ├── clip_color.yaml
    │   │   ├── clip_randShift.yaml
    │   │   ├── clip_randShift_color.yaml
    │   │   ├── clip_real_world.yaml
    │   │   ├── gray.yaml
    │   │   ├── r3m.yaml
    │   │   ├── rgb.yaml
    │   │   ├── rgb_color.yaml
    │   │   ├── rgb_randShift.yaml
    │   │   └── rgb_randShift_color.yaml
    ├── annotations
    │   ├── new_playtable.yaml
    │   └── new_playtable_validation.yaml
    ├── callbacks
    │   ├── calvin_default.yaml
    │   ├── checkpoint
    │   │   ├── all.yaml
    │   │   ├── clip_loss.yaml
    │   │   ├── kl.yaml
    │   │   ├── lh_sr.yaml
    │   │   ├── state_recon.yaml
    │   │   ├── task_sr.yaml
    │   │   └── val_action.yaml
    │   ├── kl_schedule
    │   │   ├── constant.yaml
    │   │   ├── linear.yaml
    │   │   └── sigmoid.yaml
    │   ├── real_world_default.yaml
    │   ├── rollout
    │   │   ├── default.yaml
    │   │   └── tasks
    │   │   │   └── new_playtable_tasks.yaml
    │   ├── rollout_lh
    │   │   └── default.yaml
    │   ├── shm_signal
    │   │   └── default.yaml
    │   └── tsne_plot
    │   │   └── default.yaml
    ├── cfg_high_level.yaml
    ├── cfg_high_level_rw.yaml
    ├── cfg_low_level.yaml
    ├── cfg_low_level_rw.yaml
    ├── datamodule
    │   ├── calvin_default.yaml
    │   ├── datasets
    │   │   ├── lang_dataset
    │   │   │   ├── lang.yaml
    │   │   │   └── lang_shm.yaml
    │   │   ├── lang_only.yaml
    │   │   ├── vision_dataset
    │   │   │   ├── vision.yaml
    │   │   │   └── vision_shm.yaml
    │   │   ├── vision_lang.yaml
    │   │   ├── vision_lang_shm.yaml
    │   │   ├── vision_only.yaml
    │   │   └── vision_only_shm.yaml
    │   ├── observation_space
    │   │   ├── all_mods_abs_act.yaml
    │   │   ├── lang_rgb_static_abs_act.yaml
    │   │   ├── lang_rgb_static_gripper_abs_act.yaml
    │   │   ├── lang_rgb_static_gripper_rel_act.yaml
    │   │   ├── lang_rgb_static_gripper_rel_gripper_act.yaml
    │   │   ├── lang_rgb_static_rel_act.yaml
    │   │   ├── lang_rgb_static_robot_scene_abs_act.yaml
    │   │   ├── lang_rgb_static_tactile_abs_act.yaml
    │   │   ├── lang_rgbd_both_abs_act.yaml
    │   │   ├── lang_rgbd_both_rel_act.yaml
    │   │   ├── lang_rgbd_static_gripper_rel_act.yaml
    │   │   ├── lang_rgbd_static_robot_abs_act.yaml
    │   │   ├── rgb_static_abs_act.yaml
    │   │   ├── rgb_static_gripper_rel_gripper_act.yaml
    │   │   ├── rgb_static_robot_scene_abs_act.yaml
    │   │   └── state_only.yaml
    │   ├── proprioception_dims
    │   │   ├── none.yaml
    │   │   ├── robot_full.yaml
    │   │   ├── robot_no_joints.yaml
    │   │   ├── robot_no_joints_no_gripper_width.yaml
    │   │   └── robot_scene.yaml
    │   ├── real_world_default.yaml
    │   └── transforms
    │   │   ├── clip.yaml
    │   │   ├── play_basic.yaml
    │   │   ├── rand_shift.yaml
    │   │   ├── real_world.yaml
    │   │   ├── real_world_no_rand_shift.yaml
    │   │   ├── real_world_r3m.yaml
    │   │   └── real_world_square.yaml
    ├── inference
    │   └── config_inference.yaml
    ├── inference_real.yaml
    ├── lang_ann.yaml
    ├── logger
    │   ├── tb_logger.yaml
    │   └── wandb.yaml
    ├── loss
    │   └── default.yaml
    ├── model
    │   ├── action_decoder
    │   │   ├── deterministic.yaml
    │   │   ├── logistic_decoder_rnn_calvin.yaml
    │   │   └── logistic_decoder_rnn_real_world.yaml
    │   ├── calvin_hulc++.yaml
    │   ├── clip_lang.yaml
    │   ├── distribution
    │   │   ├── continuous.yaml
    │   │   └── discrete.yaml
    │   ├── gcbc.yaml
    │   ├── language_encoder
    │   │   ├── default.yaml
    │   │   ├── none.yaml
    │   │   └── sbert.yaml
    │   ├── language_goal
    │   │   ├── default.yaml
    │   │   └── none.yaml
    │   ├── lr_scheduler
    │   │   ├── constant.yaml
    │   │   ├── cosine_schedule_with_warmup.yaml
    │   │   └── linear_schedule_with_warmup.yaml
    │   ├── optimizer
    │   │   ├── adam.yaml
    │   │   ├── adamw.yaml
    │   │   └── sgd.yaml
    │   ├── perceptual_encoder
    │   │   ├── RGBD_both.yaml
    │   │   ├── default.yaml
    │   │   ├── depth_gripper
    │   │   │   ├── default.yaml
    │   │   │   └── none.yaml
    │   │   ├── depth_static
    │   │   │   ├── default.yaml
    │   │   │   └── none.yaml
    │   │   ├── gripper_cam.yaml
    │   │   ├── proprio
    │   │   │   ├── identity.yaml
    │   │   │   └── none.yaml
    │   │   ├── resnet_aff.yaml
    │   │   ├── rgb_gripper
    │   │   │   ├── default.yaml
    │   │   │   ├── none.yaml
    │   │   │   ├── r3m.yaml
    │   │   │   ├── resnet.yaml
    │   │   │   └── resnet_aff.yaml
    │   │   ├── rgb_static
    │   │   │   ├── clip.yaml
    │   │   │   ├── default.yaml
    │   │   │   ├── r3m.yaml
    │   │   │   ├── resnet.yaml
    │   │   │   ├── resnet_aff.yaml
    │   │   │   └── vision_conv.yaml
    │   │   ├── state_decoder
    │   │   │   ├── default.yaml
    │   │   │   └── none.yaml
    │   │   ├── static_RGBD.yaml
    │   │   ├── static_RGB_tactile.yaml
    │   │   └── tactile
    │   │   │   ├── default.yaml
    │   │   │   └── none.yaml
    │   ├── plan_proposal
    │   │   └── default.yaml
    │   ├── plan_recognition
    │   │   ├── bilstm.yaml
    │   │   ├── birnn.yaml
    │   │   └── transformers.yaml
    │   ├── proj_vis_lang
    │   │   ├── default.yaml
    │   │   └── none.yaml
    │   ├── real_world_hulc++.yaml
    │   ├── sbert.yaml
    │   └── visual_goal
    │   │   └── default.yaml
    ├── paths
    │   └── general_paths.yaml
    ├── simulation
    │   ├── agent
    │   │   ├── base.yaml
    │   │   ├── baseline.yaml
    │   │   └── play_lmp.yaml
    │   ├── cameras
    │   │   ├── cameras
    │   │   │   ├── gripper.yaml
    │   │   │   ├── opposing.yaml
    │   │   │   ├── static.yaml
    │   │   │   ├── static_calvin.yaml
    │   │   │   └── tactile.yaml
    │   │   ├── high_res.yaml
    │   │   ├── no_cameras.yaml
    │   │   ├── static_and_gripper.yaml
    │   │   ├── static_and_gripper_calvin.yaml
    │   │   └── static_and_tactile.yaml
    │   ├── env
    │   │   └── env.yaml
    │   ├── robot
    │   │   ├── panda.yaml
    │   │   ├── panda_digit.yaml
    │   │   └── panda_longer_finger.yaml
    │   └── scene
    │   │   ├── calvin_scene_A.yaml
    │   │   ├── calvin_scene_A_eval.yaml
    │   │   ├── calvin_scene_B.yaml
    │   │   ├── calvin_scene_C.yaml
    │   │   ├── calvin_scene_D.yaml
    │   │   └── calvin_scene_D_eval.yaml
    ├── trainer
    │   └── play_trainer.yaml
    ├── training
    │   └── default_training.yaml
    └── utils
    │   └── combine_dataset.yaml
├── dataset
    ├── README.md
    └── download_data.sh
├── docs
    ├── affordance.md
    ├── affordance_condensed.md
    └── pipeline.md
├── hulc2
    ├── .gitignore
    ├── __init__.py
    ├── affordance
    │   ├── base_detector.py
    │   ├── dataset_creation
    │   │   ├── core
    │   │   │   ├── data_reader.py
    │   │   │   ├── real_cameras.py
    │   │   │   └── utils.py
    │   │   ├── create_percentage_data_splits.py
    │   │   ├── data_labeler.py
    │   │   ├── data_labeler_lang.py
    │   │   ├── find_norm_values.py
    │   │   └── merge_datasets.py
    │   ├── datasets
    │   │   ├── mask_label.py
    │   │   ├── pixel_label.py
    │   │   └── transforms.py
    │   ├── models
    │   │   ├── core
    │   │   │   ├── __init__.py
    │   │   │   ├── clip.py
    │   │   │   ├── fusion.py
    │   │   │   ├── language_network.py
    │   │   │   ├── resnet.py
    │   │   │   ├── unet.py
    │   │   │   ├── unet_decoder.py
    │   │   │   └── utils.py
    │   │   ├── depth
    │   │   │   ├── depth_gaussian.py
    │   │   │   ├── depth_logistics.py
    │   │   │   └── depth_module.py
    │   │   ├── lang_fusion
    │   │   │   ├── aff_lang_depth_pixel.py
    │   │   │   ├── one_stream_attention_lang_fusion_mask.py
    │   │   │   └── one_stream_attention_lang_fusion_pixel.py
    │   │   ├── language_encoders
    │   │   │   ├── base_lang_encoder.py
    │   │   │   ├── bert_lang_encoder.py
    │   │   │   ├── clip_lang_encoder.py
    │   │   │   ├── distilbert_lang_encoder.py
    │   │   │   └── sbert_lang_encoder.py
    │   │   └── visual_lang_encoders
    │   │   │   ├── base_lingunet.py
    │   │   │   ├── r3m_rn18.py
    │   │   │   ├── rn50_clip_lingunet.py
    │   │   │   ├── rn50_unet.py
    │   │   │   └── rn_lingunet.py
    │   ├── pixel_aff_lang_detector.py
    │   ├── run_on_cluster
    │   │   ├── cluster.py
    │   │   ├── sbatch_eval.sh
    │   │   ├── sbatch_train.sh
    │   │   └── slurm_eval.py
    │   ├── scripts
    │   │   ├── get_aff_preds.py
    │   │   ├── get_best_eval_model.py
    │   │   ├── make_seq_videos.py
    │   │   └── transform_old_episodes_split.py
    │   ├── test_affordance.py
    │   ├── test_move_to_pt.py
    │   ├── train_affordance.py
    │   ├── train_depth.py
    │   └── utils
    │   │   ├── data_utils.py
    │   │   ├── flowlib.py
    │   │   └── losses.py
    ├── agents
    │   ├── base_agent.py
    │   ├── lmp_agent.py
    │   └── real_world_agent.py
    ├── datasets
    │   ├── __init__.py
    │   ├── base_dataset.py
    │   ├── hulc2_real_world_data_module.py
    │   ├── hulc2_sim_data_module.py
    │   ├── npz_dataset.py
    │   ├── play_data_module.py
    │   ├── random.py
    │   ├── shm_dataset.py
    │   ├── shm_dataset_skip.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── episode_utils.py
    │   │   └── shared_memory_loader.py
    ├── env_wrappers
    │   ├── aff_lfp_real_world_wrapper.py
    │   ├── play_aff_lmp_wrapper.py
    │   └── play_lmp_wrapper.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── create_plots.py
    │   ├── evaluate_policy.py
    │   ├── evaluate_policy_singlestep.py
    │   ├── evaluation.py
    │   ├── manager_aff_lmp.py
    │   ├── manager_lmp.py
    │   ├── multistep_sequences.py
    │   ├── rollouts_interactive.py
    │   ├── run_multiple.py
    │   ├── test_policy_interactive.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── auxiliary_loss_networks
    │   │   ├── __init__.py
    │   │   ├── bc_z_lang_decoder.py
    │   │   ├── mia_lang_discriminator.py
    │   │   ├── proj_vis_lang.py
    │   │   └── state_decoder.py
    │   ├── decoders
    │   │   ├── __init__.py
    │   │   ├── action_decoder.py
    │   │   ├── clip_proj.py
    │   │   ├── deterministic_decoder.py
    │   │   ├── logistic_decoder_rnn.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── gripper_control.py
    │   │   │   └── rnn.py
    │   ├── encoders
    │   │   ├── __init__.py
    │   │   ├── clip_lang_encoder.py
    │   │   ├── goal_encoders.py
    │   │   ├── lang_encoder.py
    │   │   └── language_network.py
    │   ├── gcbc.py
    │   ├── hulc2.py
    │   ├── perceptual_encoders
    │   │   ├── __init__.py
    │   │   ├── clip.py
    │   │   ├── concat_encoders.py
    │   │   ├── poe_encoder.py
    │   │   ├── proprio_encoder.py
    │   │   ├── tactile_encoder.py
    │   │   ├── vision_clip.py
    │   │   ├── vision_network.py
    │   │   ├── vision_network_conv.py
    │   │   ├── vision_network_gripper.py
    │   │   ├── vision_r3m.py
    │   │   ├── vision_resnet.py
    │   │   └── vision_resnet_aff.py
    │   └── plan_encoders
    │   │   ├── __init__.py
    │   │   ├── plan_proposal_net.py
    │   │   └── plan_recognition_net.py
    ├── rollout
    │   ├── gpt3_planning.py
    │   ├── real_world_eval_aff.py
    │   ├── real_world_eval_combined.py
    │   ├── real_world_rollout.py
    │   ├── real_world_rollout_lang.py
    │   ├── real_world_rollout_vision.py
    │   ├── rollout.py
    │   ├── rollout_long_horizon.py
    │   └── rollout_video.py
    ├── scripts
    │   ├── get_annotations.py
    │   ├── utils
    │   │   ├── colors.yaml
    │   │   ├── config
    │   │   │   ├── lang_model
    │   │   │   │   ├── bert.yaml
    │   │   │   │   └── clip.yaml
    │   │   │   └── retrieve_data.yaml
    │   │   ├── tasks.yaml
    │   │   └── utils.py
    │   └── viz_annotations.py
    ├── training.py
    ├── utils
    │   ├── __init__.py
    │   ├── automatic_lang_annotator_mp.py
    │   ├── bpe_simple_vocab_16e6.txt.gz
    │   ├── clip_tokenizer.py
    │   ├── combine_dataset.py
    │   ├── compute_proprioception_statistics.py
    │   ├── convert_real_raw_data_splits.py
    │   ├── create_splits.py
    │   ├── data_utils.py
    │   ├── data_visualization.py
    │   ├── dataset_pipeline.sh
    │   ├── dataset_task_statistics.py
    │   ├── distributions.py
    │   ├── img_utils.py
    │   ├── kl_callbacks.py
    │   ├── language_annotator.py
    │   ├── preprocess_real_data.py
    │   ├── real_world_dataset_pipeline.sh
    │   ├── relabel_with_new_lang_model.py
    │   ├── render_low_freq.py
    │   ├── simple_tokenizer.py
    │   ├── split_dataset.py
    │   ├── tensor_utils.py
    │   ├── transforms.py
    │   ├── utils.py
    │   ├── visualizations.py
    │   ├── visualize_annotations.py
    │   ├── visualize_calvin_dataset.py
    │   └── visualize_real_data.py
    ├── visualization
    │   └── tsne_plot.py
    ├── wrap_training.py
    └── wrappers
    │   ├── hulc2_wrapper.py
    │   └── panda_lfp_wrapper.py
├── install.sh
├── media
    └── hulc2.gif
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── setup.py
├── setup_local.py
└── slurm_scripts
    ├── sbatch_eval.sh
    ├── sbatch_lfp.sh
    ├── slurm_eval.py
    └── slurm_training.py


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | exclude = .git
 3 | # Default is 79 in PEP 8
 4 | max-line-length = 120
 5 | select = E,F,W,C
 6 | ignore=W503, # line break before binary operator, need for black
 7 |        E203, # whitespace before ':'. Opposite convention enforced by black
 8 |        E731, # do not assign a lambda expression, use a def
 9 |        E722,
10 |        F401,
11 |        F841,
12 |        E402,  # module level import not at top of file
13 |        E741,  # ambiguous variable name
14 |        E501, # line too long. Handled by black
15 |        C406,  # Unnecessary list literal - rewrite as a dict literal
16 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "calvin_env"]
 2 | 	path = calvin_env
 3 | 	url = git@github.com:JessicaBorja/calvin_env.git
 4 | [submodule "r3m"]
 5 | 	path = r3m
 6 | 	url = git@github.com:mees/r3m.git
 7 | [submodule "LangAnnotationApp"]
 8 | 	path = LangAnnotationApp
 9 | 	url = git@github.com:mees/LanguageAnnotationWebApp.git
10 | [submodule "robot_io"]
11 | 	path = robot_io
12 | 	url = git@github.com:mees/robot_io.git
13 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |     python: python3.8
 3 | repos:
 4 |   - repo: https://github.com/psf/black
 5 |     rev: 22.10.0
 6 |     hooks:
 7 |       - id: black
 8 |         language_version: python3.8
 9 | 
10 |   - repo: https://gitlab.com/pycqa/flake8
11 |     rev: 3.8.4
12 |     hooks:
13 |       - id: flake8
14 | 
15 |   - repo: https://github.com/pycqa/isort
16 |     rev: 5.7.0
17 |     hooks:
18 |       - id: isort
19 | 
20 |   - repo: https://github.com/pre-commit/mirrors-mypy
21 |     rev: v0.812
22 |     hooks:
23 |       - id: mypy
24 |         args: [--ignore-missing-imports, --warn-no-return, --warn-redundant-casts, --disallow-incomplete-defs]
25 |         additional_dependencies: [pytorch-lightning==1.5.9, torch==1.10.1, numpy]
26 | 
27 |   - repo: https://github.com/pre-commit/pre-commit-hooks
28 |     rev: v4.0.1
29 |     hooks:
30 |       - id: check-yaml
31 |       - id: trailing-whitespace
32 |       - id: end-of-file-fixer
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Oier Mees
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/conf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/__init__.py


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/clip.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   _target_: hulc2.affordance.clip_detector.CLIPPointDetector
 3 |   resize: 224
 4 |   clip_model: "RN50"  # ["RN50", "RN101", "RN50x4", "RN50x16"]
 5 |   saliency_layer: "layer4"  # ["layer4", "layer3", "layer2", "layer1"]
 6 |   blur: False
 7 |   viz: True
 8 | 
 9 | img_size: 224
10 | defaults:
11 |   - /transforms: rgb
12 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/mask_data.yaml:
--------------------------------------------------------------------------------
 1 | wandb_saver:
 2 |   val_loss:
 3 |     monitor: 'Validation/total_loss'
 4 |     save_top_k: 2
 5 |     mode: min
 6 |     verbose: True
 7 |   val_miou:
 8 |     monitor: 'Validation/miou'
 9 |     save_top_k: 2
10 |     mode: max
11 |     verbose: True
12 |     save_last: True
13 | 
14 | model:
15 |   cfg:
16 |     hough_voting:
17 |       skip_pixels: 3
18 |       inlier_threshold: 0.8
19 |       angle_discretization: 100
20 |       inlier_distance: 16
21 |       percentage_threshold: 0.4
22 |       object_center_kernel_radius: 16
23 | 
24 | dataset:
25 |   _target_: hulc2.affordance.datasets.mask_label.MaskLabelLabelDataLang
26 |   transforms: ${aff_detection.streams.transforms}
27 |   radius:
28 |     static: 16
29 |     gripper: 10
30 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/pixel_data.yaml:
--------------------------------------------------------------------------------
 1 | wandb_saver:
 2 |   val_loss:
 3 |     monitor: 'Validation/total_loss'
 4 |     save_top_k: 2
 5 |     mode: min
 6 |     verbose: True
 7 |   val_err:
 8 |     monitor: 'Validation/px_dist_err'
 9 |     save_top_k: 2
10 |     mode: min
11 |     verbose: True
12 |     save_last: True
13 | 
14 | dataset:
15 |   _target_: hulc2.affordance.datasets.pixel_label.PixeLabelDataLang
16 |   transforms: ${aff_detection.streams.transforms}
17 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/r3m.yaml:
--------------------------------------------------------------------------------
 1 | # R3M Resnet 18 with respective decoder channels
 2 | # SBERT as sentence encoder
 3 | # Single pixel prediction
 4 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector
 5 | _recursive_: False
 6 | name: r3m_rn18_sbert_pixel
 7 | img_size: 224
 8 | 
 9 | model_cfg:
10 |   freeze_encoder:
11 |     lang: True
12 |     aff: True
13 |     depth: True
14 |   attn_stream_fusion_type: 'add'
15 |   lang_fusion_type: 'mult'
16 |   streams: ${aff_detection.streams}
17 |   batchnorm: False
18 |   encoder_name: r3m_resnet18
19 |   unet_cfg:
20 |     decoder_channels: [256, 128, 64, 32]
21 | 
22 | defaults:
23 |   - pixel_data
24 |   - streams: r3m_rn18_sbert
25 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/rn18_bert_mask.yaml:
--------------------------------------------------------------------------------
 1 | # Unet Resnet 18 with respective decoder channels
 2 | # BERT as sentence encoder
 3 | # Affordance binary mask prediction
 4 | name: rn18_bert_mask
 5 | 
 6 | model:
 7 |   _target_: hulc2.affordance.mask_aff_lang_detector.MaskAffLangDetector
 8 |   _recursive_: False
 9 |   cfg:
10 |     attn_stream_fusion_type: 'add'
11 |     lang_fusion_type: 'mult'
12 |     streams: ${aff_detection.streams}
13 |     batchnorm: False
14 |     loss:
15 |       centers: 2.5
16 |       dice: 5
17 |       ce_loss: 1
18 |       affordance:
19 |         add_dice: true
20 |         ce_class_weights: [0.2, 0.8]
21 |     unet_cfg:
22 |       decoder_channels: [256, 128, 64, 32]
23 | 
24 | img_size: 224
25 | defaults:
26 |   - mask_data
27 |   - streams: rn18_bert_lingunet
28 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/rn18_bert_pixel.yaml:
--------------------------------------------------------------------------------
 1 | # Unet Resnet 18 with respective decoder channels
 2 | # BERT as sentence encoder
 3 | # Single pixel prediction
 4 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector
 5 | _recursive_: False
 6 | name: rn18_bert_pixel
 7 | img_size: 224
 8 | 
 9 | model_cfg:
10 |   freeze_encoder:
11 |     lang: True
12 |     aff: True
13 |     depth: True
14 |   attn_stream_fusion_type: 'add'
15 |   lang_fusion_type: 'mult'
16 |   streams: ${aff_detection.streams}
17 |   batchnorm: False
18 |   encoder_name: resnet18
19 |   unet_cfg:
20 |     decoder_channels: [256, 128, 64, 32]
21 | 
22 | defaults:
23 |   - pixel_data
24 |   - streams: rn_bert_lingunet
25 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/rn18_clip_mask.yaml:
--------------------------------------------------------------------------------
 1 | # Unet Resnet 18 with respective decoder channels
 2 | # CLIP as sentence encoder
 3 | # Affordance binary mask prediction
 4 | name: rn18_clip_mask
 5 | 
 6 | model:
 7 |   _target_: hulc2.affordance.mask_aff_lang_detector.MaskAffLangDetector
 8 |   _recursive_: False
 9 |   cfg:
10 |     attn_stream_fusion_type: 'add'
11 |     lang_fusion_type: 'mult'
12 |     streams: ${aff_detection.streams}
13 |     batchnorm: False
14 |     loss:
15 |       centers: 2.5
16 |       dice: 5
17 |       ce_loss: 1
18 |       affordance:
19 |         add_dice: true
20 |         ce_class_weights: [0.2, 0.8]
21 |     unet_cfg:
22 |       decoder_channels: [256, 128, 64, 32]
23 | 
24 | img_size: 224
25 | defaults:
26 |   - mask_data
27 |   - streams: rn18_clip_lingunet
28 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/rn18_clip_pixel.yaml:
--------------------------------------------------------------------------------
 1 | # Unet Resnet 18 with respective decoder channels
 2 | # CLIP as sentence encoder
 3 | # Single pixel prediction
 4 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector
 5 | _recursive_: False
 6 | name: rn18_clip_pixel
 7 | img_size: 224
 8 | 
 9 | model_cfg:
10 |   freeze_encoder:
11 |     lang: True
12 |     aff: True
13 |     depth: True
14 |   attn_stream_fusion_type: 'add'
15 |   lang_fusion_type: 'mult'
16 |   streams: ${aff_detection.streams}
17 |   batchnorm: False
18 |   unet_cfg: # [256, 128, 64, 32, 16]
19 |     decoder_channels: [256, 128, 64, 32]
20 | 
21 | defaults:
22 |   - pixel_data
23 |   - streams: rn18_clip_lingunet
24 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/rn18_sbert_pixel.yaml:
--------------------------------------------------------------------------------
 1 | # Unet Resnet 18 with respective decoder channels
 2 | # BERT as sentence encoder
 3 | # Single pixel prediction
 4 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector
 5 | _recursive_: False
 6 | name: rn18_bert_pixel
 7 | img_size: 224
 8 | 
 9 | model_cfg:
10 |   freeze_encoder:
11 |     lang: True
12 |     aff: True
13 |     depth: True
14 |   attn_stream_fusion_type: 'add'
15 |   lang_fusion_type: 'mult'
16 |   streams: ${aff_detection.streams}
17 |   # streams:
18 |   #   lang_enc: sbert
19 |   batchnorm: False
20 |   encoder_name: resnet18
21 |   unet_cfg:
22 |     decoder_channels: [512, 256, 128, 64, 32]
23 | 
24 | defaults:
25 |   - pixel_data
26 |   - streams: rn_sbert_lingunet
27 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/rn50_bert_pixel.yaml:
--------------------------------------------------------------------------------
 1 | # Unet Resnet 18 with respective decoder channels
 2 | # BERT as sentence encoder
 3 | # Single pixel prediction
 4 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector
 5 | _recursive_: False
 6 | name: rn50_bert_pixel
 7 | img_size: 224
 8 | 
 9 | model_cfg:
10 |   freeze_encoder:
11 |     lang: True
12 |     aff: True
13 |     depth: True
14 |   attn_stream_fusion_type: 'add'
15 |   lang_fusion_type: 'mult'
16 |   streams: ${aff_detection.streams}
17 |   streams:
18 |     lang_enc: sbert
19 |   batchnorm: False
20 |   encoder_name: resnet50
21 |   unet_cfg:
22 |     decoder_channels: [256, 128, 64, 32]
23 | 
24 | defaults:
25 |   - pixel_data
26 |   - streams: rn_bert_lingunet
27 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/rn50_clip_pixel.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.affordance.pixel_aff_lang_detector.PixelAffLangDetector
 2 | _recursive_: False
 3 | name: rn50_clip_pixel
 4 | img_size: 224
 5 | 
 6 | model_cfg:
 7 |   batchnorm: False # important: False because batch_size=1
 8 |   attn_stream_fusion_type: 'add'
 9 |   lang_fusion_type: 'mult'
10 |   streams: ${aff_detection.streams}
11 |   freeze_encoder:
12 |     lang: True
13 |     aff: True
14 |     depth: True
15 | defaults:
16 |   - pixel_data
17 |   - streams: clip_lingunet
18 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/streams/clip_lingunet.yaml:
--------------------------------------------------------------------------------
1 | vision_net: clip
2 | lang_enc: clip
3 | defaults:
4 |   - /transforms: clip_color
5 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/streams/r3m_rn18_sbert.yaml:
--------------------------------------------------------------------------------
1 | vision_net: r3m_rn18
2 | lang_enc: sbert
3 | defaults:
4 |   - /transforms: r3m
5 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/streams/rn18_clip_lingunet.yaml:
--------------------------------------------------------------------------------
1 | vision_net: rn
2 | lang_enc: clip
3 | defaults:
4 |   - /transforms: clip_randShift_color
5 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/streams/rn50_sbert_lingunet.yaml:
--------------------------------------------------------------------------------
1 | vision_net: rn
2 | lang_enc: sbert
3 | defaults:
4 |   - /transforms: rgb_randShift_color
5 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/streams/rn_bert_lingunet.yaml:
--------------------------------------------------------------------------------
1 | vision_net: rn
2 | lang_enc: sbert
3 | defaults:
4 |   - /transforms: rgb_randShift_color
5 | 


--------------------------------------------------------------------------------
/conf/affordance/aff_detection/streams/rn_sbert_lingunet.yaml:
--------------------------------------------------------------------------------
1 | vision_net: rn
2 | lang_enc: sbert
3 | defaults:
4 |   - /transforms: rgb_randShift_color
5 | 


--------------------------------------------------------------------------------
/conf/affordance/cameras/cameras/gripper.yaml:
--------------------------------------------------------------------------------
1 | _target_: calvin_env.camera.gripper_camera.GripperCamera
2 | name: gripper
3 | fov: 75
4 | aspect: 1
5 | nearval: 0.01
6 | farval: 2
7 | width: 84
8 | height: 84
9 | 


--------------------------------------------------------------------------------
/conf/affordance/cameras/cameras/opposing.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.camera.static_camera.StaticCamera
 2 | name: opposing
 3 | fov: 75
 4 | aspect: 1
 5 | nearval: 0.01
 6 | farval: 2
 7 | width: 200
 8 | height: 200
 9 | look_at: [ 0.4, 0.5, 0.6 ]
10 | look_from: [ 0.4, 1.5, 0.9 ]
11 | 


--------------------------------------------------------------------------------
/conf/affordance/cameras/cameras/static.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.camera.static_camera.StaticCamera
 2 | name: static
 3 | fov: 10
 4 | aspect: 1
 5 | nearval: 0.01
 6 | farval: 10
 7 | width: 300
 8 | height: 300
 9 | look_at: [ -0.026242351159453392, -0.0302329882979393, 0.3920000493526459]
10 | look_from: [ 2.871459009488717, -2.166602199425597, 2.555159848480571]
11 | up_vector: [ 0.4041403970338857, 0.22629790978217404, 0.8862616969685161]
12 | 


--------------------------------------------------------------------------------
/conf/affordance/cameras/cameras/static_calvin.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.camera.static_camera.StaticCamera
 2 | name: static
 3 | fov: 10
 4 | aspect: 1
 5 | nearval: 0.01
 6 | farval: 10
 7 | width: 200
 8 | height: 200
 9 | look_at: [-0.026242351159453392, -0.0302329882979393, 0.3920000493526459]
10 | look_from: [2.871459009488717, -2.166602199425597, 2.555159848480571]
11 | up_vector: [0.4041403970338857, 0.22629790978217404, 0.8862616969685161]
12 | 


--------------------------------------------------------------------------------
/conf/affordance/cameras/cameras/tactile.yaml:
--------------------------------------------------------------------------------
1 | _target_: calvin_env.camera.tactile_sensor.TactileSensor
2 | name: tactile
3 | width: 120
4 | height: 160
5 | digit_link_ids: [10, 12] # ${robot.digit_link_ids}
6 | visualize_gui: true
7 | config_path: conf/digit_sensor/config_digit.yml
8 | 


--------------------------------------------------------------------------------
/conf/affordance/cameras/high_res.yaml:
--------------------------------------------------------------------------------
 1 | static:
 2 |   _target_: calvin_env.camera.static_camera.StaticCamera
 3 |   name: static
 4 |   fov: 10
 5 |   aspect: 1
 6 |   nearval: 0.01
 7 |   farval: 10
 8 |   width: 500
 9 |   height: 500
10 |   look_at: [-0.026242351159453392, -0.0302329882979393, 0.3920000493526459]
11 |   look_from: [ 2.871459009488717, -2.166602199425597, 2.555159848480571]
12 |   up_vector: [ 0.4041403970338857, 0.22629790978217404, 0.8862616969685161]
13 | 
14 | gripper:
15 |   _target_: calvin_env.camera.gripper_camera.GripperCamera
16 |   name: gripper
17 |   fov: 75
18 |   aspect: 1
19 |   nearval: 0.01
20 |   farval: 2
21 |   width: 300
22 |   height: 300
23 | 


--------------------------------------------------------------------------------
/conf/affordance/cameras/no_cameras.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/affordance/cameras/no_cameras.yaml


--------------------------------------------------------------------------------
/conf/affordance/cameras/static_and_gripper.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - cameras@static: static
3 |   - cameras@gripper: gripper
4 | 


--------------------------------------------------------------------------------
/conf/affordance/cameras/static_and_gripper_calvin.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - cameras@static: static_calvin
3 |   - cameras@gripper: gripper
4 | 


--------------------------------------------------------------------------------
/conf/affordance/cameras/static_and_tactile.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - cameras@static: static
3 |   - cameras@tactile: tactile
4 | 


--------------------------------------------------------------------------------
/conf/affordance/cfg_datacollection.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - labeling: simulation_lang
 3 |   - labeling/env@env: env_labeling
 4 |   - labeling/scene@scene: empty_table
 5 |   - labeling/robot@robot: panda
 6 |   - labeling/cameras@cameras: static_and_gripper
 7 |   - ../paths@paths: general_paths
 8 |   - override hydra/hydra_logging: colorlog
 9 |   - override hydra/job_logging: colorlog
10 | 
11 | #Environment
12 | output_size:
13 |   static: [150, 200]
14 |   gripper: 84
15 | mask_on_close: False
16 | save_viz: False
17 | euler_obs: True
18 | frames_before_saving: 5000
19 | viz: false
20 | 
21 | language:
22 |   folder: lang_paraphrase-MiniLM-L3-v2
23 |   file: auto_lang_ann.npy
24 | 
25 | # To write all data to a single split
26 | output_cfg:
27 |   single_split: null # "validation", "training"
28 |   multiclass: False
29 | 
30 | # For collecting dataset with playdata
31 | dataset_name: real_world/500k_all_tasks_dataset_15hz
32 | play_data_dir: /export/home/meeso/${dataset_name}
33 | 
34 | #Output directory where dataset will be stored
35 | output_dir: ${paths.datasets}/${dataset_name}
36 | 
37 | # Finding classes in playdata
38 | task_discovery:
39 |   dist_thresh: 0.03  # Max distance to consider that object is the same
40 |   sample_freq: 20   # track objects every sample_freq ts
41 |   frames_after_move: 3  # Find movement diretion after frames_after_move frames
42 |   max_n_episodes: 2  # Find clusters in subset of data
43 | 
44 | # Prediction
45 | task_detector:
46 |   cluster_info_path: null
47 |   dataset_dir: ${output_dir}
48 |   k_largest: 2
49 |   dims: [0, 1, 2]   # x, y, z, r_x, r_y, r_z
50 |   clustering_method: Kmeans
51 |   params:
52 |     n_clusters: 2
53 |     random_state: 0
54 |   # clustering_method: DBSCAN
55 |   # params:
56 |   #   eps: 0.3
57 |   #   min_samples: 4
58 |     # eps: 0.08
59 |     # min_samples: 5
60 | 
61 | 
62 | 
63 | hydra:
64 |   run:
65 |     dir: ./hydra_outputs/datacollection/${now:%Y-%m-%d}_${now:%H-%M-%S}
66 | 


--------------------------------------------------------------------------------
/conf/affordance/cfg_merge_dataset.yaml:
--------------------------------------------------------------------------------
1 | # Paths to where episodes_split.json is stored for each dataset
2 | # Relative to the main hulc2 directory
3 | data_lst:
4 |   - ../../../datasets/calvin_langDepthEndPt/training
5 |   - ../../../datasets/calvin_langDepthEndPt/validation
6 | 


--------------------------------------------------------------------------------
/conf/affordance/labeling/cameras/cameras/gripper.yaml:
--------------------------------------------------------------------------------
1 | _target_: calvin_env.camera.gripper_camera.GripperCamera
2 | name: gripper
3 | fov: 75
4 | aspect: 1
5 | nearval: 0.01
6 | farval: 2
7 | width: 84
8 | height: 84
9 | 


--------------------------------------------------------------------------------
/conf/affordance/labeling/cameras/cameras/static.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.camera.static_camera.StaticCamera
 2 | name: static
 3 | fov: 10
 4 | aspect: 1
 5 | nearval: 0.01
 6 | farval: 10
 7 | width: 200
 8 | height: 200
 9 | look_at: [ -0.026242351159453392, -0.0302329882979393, 0.3920000493526459]
10 | look_from: [ 2.871459009488717, -2.166602199425597, 2.555159848480571]
11 | up_vector: [ 0.4041403970338857, 0.22629790978217404, 0.8862616969685161]
12 | 


--------------------------------------------------------------------------------
/conf/affordance/labeling/cameras/static_and_gripper.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - cameras@static: static
3 |   - cameras@gripper: gripper
4 | 


--------------------------------------------------------------------------------
/conf/affordance/labeling/env/env_labeling.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.calvin_env.envs.play_table_env.PlayTableSimEnv
 2 | _recursive_: false
 3 | cameras: ${cameras}
 4 | seed: 0
 5 | bullet_time_step: 240.0
 6 | use_vr: false
 7 | show_gui: false
 8 | robot_cfg: ${robot}
 9 | scene_cfg: ${scene}
10 | use_scene_info: false
11 | use_egl: false
12 | control_freq: 30
13 | 


--------------------------------------------------------------------------------
/conf/affordance/labeling/real_world.yaml:
--------------------------------------------------------------------------------
 1 | split_by_episodes: True
 2 | mode: "real_world_raw"
 3 | back_frames: [0, 60]
 4 | fixed_pt_del_radius: 0.08  # Meters
 5 | remove_blank_mask_instances: True
 6 | min_labels: 4
 7 | label_size:
 8 |   static: 17
 9 |   gripper: 30
10 | 


--------------------------------------------------------------------------------
/conf/affordance/labeling/real_world_lang.yaml:
--------------------------------------------------------------------------------
 1 | split_by_episodes: True
 2 | mode: "real_world_processed"
 3 | back_frames: [0, 100]
 4 | fixed_pt_del_radius: 0.08  # Meters
 5 | remove_blank_mask_instances: True
 6 | min_labels: 1
 7 | label_size:
 8 |   static: 10
 9 |   gripper: 30
10 | 


--------------------------------------------------------------------------------
/conf/affordance/labeling/robot/panda.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.robot.robot.Robot
 2 | filename: franka_panda/panda.urdf
 3 | base_position: ${scene.robot_base_position}
 4 | base_orientation: ${scene.robot_base_orientation}
 5 | initial_joint_positions: ${scene.robot_initial_joint_positions}
 6 | max_joint_force: 200.0
 7 | gripper_force: 200
 8 | arm_joint_ids: [0, 1, 2, 3, 4, 5, 6]
 9 | gripper_joint_ids: [9, 10]
10 | gripper_joint_limits: [0, 0.04]
11 | tcp_link_id: 13
12 | end_effector_link_id: 7
13 | gripper_cam_link: 12
14 | use_nullspace: false
15 | max_velocity: 2
16 | use_ik_fast: false
17 | magic_scaling_factor_pos: 1 # 1.6
18 | magic_scaling_factor_orn: 1 # 2.2
19 | use_target_pose: true
20 | euler_obs: true
21 | workspace_limits: [[-0.20, 0.35, 0.61], [0.7, 0.85, 1.2]]
22 | max_rel_pos: 0.02
23 | max_rel_orn: 0.05
24 | 


--------------------------------------------------------------------------------
/conf/affordance/labeling/scene/empty_table.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene
 2 | _recursive_: false
 3 | data_path: ${paths.vr_data}
 4 | global_scaling: 0.8
 5 | euler_obs: True
 6 | robot_base_position: [0.3, 0.15, 0.45]
 7 | robot_base_orientation: [0, 0, 1.5707963]
 8 | robot_initial_joint_positions: [-0.3457686708019129, -0.15454379621111053, -0.6607497652179231, -2.431721569843283, -0.12811896258574057, 2.3050911768605884, -0.128854091294185]
 9 | surfaces: []
10 | 
11 | objects:
12 |   fixed_objects:
13 |     table:
14 |       file: table/hightable.urdf
15 |       initial_pos: [0.3, 0.7, 0.02]
16 |       initial_orn: [0, 0, 0]
17 |       fixed: true
18 |     bin:
19 |       file: ais_objects/bin_10_30_50/bin_10_30_50.urdf
20 |       initial_pos: [0.7, 0.75, 0.6]
21 |       initial_orn: [1.57, 0, 0]
22 |       fixed: true
23 |   movable_objects:
24 |     bowl:
25 |       file: 024_bowl/google_16k/textured.urdf
26 |       initial_pos: [0.18, 0.58, 0.6230520401985216]
27 |       initial_orn: [0, 0, 0]
28 |       fixed: false
29 | 


--------------------------------------------------------------------------------
/conf/affordance/labeling/simulation.yaml:
--------------------------------------------------------------------------------
 1 | split_by_episodes: True
 2 | mode: "simulation"
 3 | back_frames: [5, 50]
 4 | fixed_pt_del_radius: 0.09  # Meters
 5 | remove_blank_mask_instances: False
 6 | min_labels: 4
 7 | label_size:
 8 |   static: 14
 9 |   gripper: 30
10 | 


--------------------------------------------------------------------------------
/conf/affordance/labeling/simulation_lang.yaml:
--------------------------------------------------------------------------------
 1 | split_by_episodes: True
 2 | mode: "simulation"
 3 | back_frames: [0, 60]
 4 | fixed_pt_del_radius: 0.09  # Meters
 5 | remove_blank_mask_instances: True
 6 | min_labels: 4
 7 | label_size:
 8 |   static: 14
 9 |   gripper: 13
10 | 


--------------------------------------------------------------------------------
/conf/affordance/test_affordance.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - paths: general_paths
 3 |   - override hydra/hydra_logging: colorlog
 4 |   - override hydra/job_logging: colorlog
 5 | 
 6 | #
 7 | save_viz: True
 8 | debug: False
 9 | 
10 | # folders
11 | checkpoint:
12 |   train_folder: ~/logs/hulc2/aff_model/2022-07-02/17-01-30_aff_model
13 |   model_name: val_err.ckpt
14 | 
15 | dataset_name: calvin_lang_MoCEndPt
16 | aff_detection:
17 |   dataset:
18 |     _recursive_: False
19 |     data_dir: ${paths.datasets}/${dataset_name}
20 |     cam: static
21 |     data_percent: 1.0
22 |     episodes_file: episodes_split.json
23 |     img_resize:
24 |       static: 224  # clip img size
25 |       gripper: 96
26 |       all: 100
27 |   hough_voting:
28 |     skip_pixels: 3
29 |     inlier_threshold: 0.8
30 |     angle_discretization: 100
31 |     inlier_distance: 16
32 |     percentage_threshold: 0.4
33 |     object_center_kernel_radius: 16
34 | 
35 | 
36 | dataloader:
37 |   num_workers: 4
38 |   batch_size: 4
39 |   pin_memory: true
40 | 
41 | #-- Hydra config --#
42 | hydra_outputs: ./hydra_outputs/aff_preds/
43 | hydra:
44 |   run:
45 |     dir: ${hydra_outputs}/${now:%Y-%m-%d}/${now:%H-%M-%S} # Output
46 | 


--------------------------------------------------------------------------------
/conf/affordance/train_affordance.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - aff_detection: r3m
 3 |   - ../paths@paths: general_paths
 4 |   - override hydra/hydra_logging: colorlog
 5 |   - override hydra/job_logging: colorlog
 6 | 
 7 | save_viz: True
 8 | 
 9 | # folders
10 | run_name: ${aff_detection.name}
11 | load_from_last_ckpt: True
12 | checkpoint:
13 |   path: ./
14 |   model_name: last.ckpt
15 | 
16 | trainer:
17 |   # script configs
18 |   accelerator: gpu
19 |   devices: 1
20 |   strategy: ddp
21 |   max_epochs: 30
22 |   check_val_every_n_epoch: 1
23 |   num_sanity_val_steps: 1
24 |   precision: 16
25 | 
26 | dataset_name: calvin_lang_MoCEndPt
27 | aff_detection:
28 |   depth_dist: gaussian # logistic / gaussian
29 |   normalize_depth: True
30 |   optimizer:
31 |     lr: 1e-4
32 |   loss_weights:
33 |     aff: 0.1
34 |     depth: 0.9
35 |   dataset:
36 |     _recursive_: False
37 |     data_dir: ${paths.datasets}/${dataset_name}
38 |     cam: static
39 |     data_percent: 1.0
40 |     img_resize:
41 |       static: 224
42 |       gripper: 96
43 |       all: 100
44 | 
45 | dataloader:
46 |   num_workers: 4
47 |   batch_size: 32
48 |   pin_memory: true
49 | 
50 | wandb:
51 |   logger:
52 |     name: ${run_name}
53 |     entity: affordance
54 |     project: aff_lang
55 |     offline: False
56 |     group: ${aff_detection.dataset.cam}_${aff_detection.dataset.data_percent}p
57 |   saver: ${aff_detection.wandb_saver}
58 | 
59 | #-- Hydra config --#
60 | hydra_outputs: ./hydra_outputs/affordance_model/
61 | hydra:
62 |   run:
63 |     dir: ${hydra_outputs}/${now:%Y-%m-%d}/${now:%H-%M-%S} # Output
64 | 


--------------------------------------------------------------------------------
/conf/affordance/train_depth.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - paths: general_paths
 3 |   - transforms: clip_real_world
 4 |   - override hydra/hydra_logging: colorlog
 5 |   - override hydra/job_logging: colorlog
 6 | 
 7 | save_viz: True
 8 | 
 9 | # folders
10 | run_name: depth
11 | load_from_last_ckpt: True
12 | checkpoint:
13 |   path: ./ # ${hydra_outputs}/2022-01-16/14-42-24_aff_rl
14 |   model_name: last.ckpt # epoch=49-step=34449.ckpt
15 | 
16 | trainer:
17 |   # script configs
18 |   gpus: -1
19 |   max_epochs: 15
20 |   check_val_every_n_epoch: 1
21 |   num_sanity_val_steps: 1
22 |   strategy: ddp
23 |   precision: 32
24 | 
25 | dataset_name: calvin_lang_MoCEndPt
26 | model:
27 |   lr: 1e-05
28 |   depth_dist: logistic  # logistic / gaussian
29 |   lang_fusion_type: mult
30 |   normalize_depth: False
31 | 
32 | aff_detection:
33 |   img_size: 224
34 | 
35 | dataset:
36 |   _recursive_: False
37 |   _target_: hulc2.affordance.datasets.pixel_label.PixeLabelDataLang
38 |   transforms: ${transforms}
39 |   radius:
40 |     static: 16
41 |     gripper: 10
42 |   data_dir: ${paths.datasets}/${dataset_name}
43 |   cam: static
44 |   img_resize:
45 |     static: 224  # clip img size
46 |     gripper: 96
47 |     all: 100
48 | 
49 | dataloader:
50 |   num_workers: 4
51 |   batch_size: 32
52 |   pin_memory: true
53 | 
54 | wandb:
55 |   logger:
56 |     name: ${run_name}
57 |     entity: jessibd
58 |     project: depth_est
59 |     offline: False
60 |   saver:
61 |     val_loss:
62 |       monitor: 'Validation/total_loss'
63 |       save_top_k: 2
64 |       mode: min
65 |       verbose: True
66 |     val_err:
67 |       monitor: 'Validation/depth_err'
68 |       save_top_k: 2
69 |       mode: min
70 |       verbose: True
71 |       save_last: True
72 | 
73 | #-- Hydra config --#
74 | hydra_outputs: ./hydra_outputs/affordance_model/
75 | hydra:
76 |   run:
77 |     dir: ${hydra_outputs}/${now:%Y-%m-%d}/${now:%H-%M-%S} # Output
78 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/clip.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
 5 |   # - _target_: hulc2.affordance.datasets.transforms.ColorTransform
 6 |   #   contrast: 0.05
 7 |   #   brightness: 0.05
 8 |   #   hue: 0.02
 9 |   #   prob: 1
10 |   - _target_: torchvision.transforms.Normalize
11 |     mean: [0.48145466, 0.4578275, 0.40821073]
12 |     std: [0.26862954, 0.26130258, 0.27577711]
13 | 
14 | validation:
15 |   - _target_: torchvision.transforms.Resize
16 |     size: ${aff_detection.img_size}
17 |   - _target_:  hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
18 |   - _target_: torchvision.transforms.Normalize
19 |     mean: [0.48145466, 0.4578275, 0.40821073]
20 |     std: [0.26862954, 0.26130258, 0.27577711]
21 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/clip_color.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
 5 |   - _target_: hulc2.affordance.datasets.transforms.ColorTransform
 6 |     contrast: 0.05
 7 |     brightness: 0.05
 8 |     hue: 0.01
 9 |     prob: 1
10 |   - _target_: torchvision.transforms.Normalize
11 |     mean: [0.48145466, 0.4578275, 0.40821073]
12 |     std: [0.26862954, 0.26130258, 0.27577711]
13 | 
14 | validation:
15 |   - _target_: torchvision.transforms.Resize
16 |     size: ${aff_detection.img_size}
17 |   - _target_:  hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
18 |   - _target_: torchvision.transforms.Normalize
19 |     mean: [0.48145466, 0.4578275, 0.40821073]
20 |     std: [0.26862954, 0.26130258, 0.27577711]
21 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/clip_randShift.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   - _target_: hulc2.affordance.datasets.transforms.RandomShiftsAug
 5 |     pad: 5
 6 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
 7 |   # - _target_: hulc2.affordance.datasets.transforms.ColorTransform
 8 |   #   contrast: 0.05
 9 |   #   brightness: 0.05
10 |   #   hue: 0.02
11 |   #   prob: 1
12 |   - _target_: torchvision.transforms.Normalize
13 |     mean: [0.48145466, 0.4578275, 0.40821073]
14 |     std: [0.26862954, 0.26130258, 0.27577711]
15 | 
16 | validation:
17 |   - _target_: torchvision.transforms.Resize
18 |     size: ${aff_detection.img_size}
19 |   - _target_:  hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
20 |   - _target_: torchvision.transforms.Normalize
21 |     mean: [0.48145466, 0.4578275, 0.40821073]
22 |     std: [0.26862954, 0.26130258, 0.27577711]
23 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/clip_randShift_color.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   - _target_: hulc2.affordance.datasets.transforms.RandomShiftsAug
 5 |     pad: 5
 6 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
 7 |   - _target_: hulc2.affordance.datasets.transforms.ColorTransform
 8 |     contrast: 0.05
 9 |     brightness: 0.05
10 |     hue: 0.01
11 |     prob: 1
12 |   - _target_: torchvision.transforms.Normalize
13 |     mean: [0.48145466, 0.4578275, 0.40821073]
14 |     std: [0.26862954, 0.26130258, 0.27577711]
15 | 
16 | validation:
17 |   - _target_: torchvision.transforms.Resize
18 |     size: ${aff_detection.img_size}
19 |   - _target_:  hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
20 |   - _target_: torchvision.transforms.Normalize
21 |     mean: [0.48145466, 0.4578275, 0.40821073]
22 |     std: [0.26862954, 0.26130258, 0.27577711]
23 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/clip_real_world.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   # - _target_: hulc2.affordance.datasets.transforms.RandomShiftsAug
 5 |   #   pad: 3
 6 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
 7 |   - _target_: hulc2.affordance.datasets.transforms.ColorTransform
 8 |     contrast: 0.05
 9 |     brightness: 0.1
10 |     hue: 0.02
11 |     prob: 1
12 |   - _target_: torchvision.transforms.Normalize
13 |     mean: [0.48145466, 0.4578275, 0.40821073]
14 |     std: [0.26862954, 0.26130258, 0.27577711]
15 | 
16 | validation:
17 |   - _target_: torchvision.transforms.Resize
18 |     size: ${aff_detection.img_size}
19 |   - _target_:  hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
20 |   - _target_: torchvision.transforms.Normalize
21 |     mean: [0.48145466, 0.4578275, 0.40821073]
22 |     std: [0.26862954, 0.26130258, 0.27577711]
23 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/gray.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   - _target_: hulc2.affordance.datasets.transforms.ColorTransform
 5 |     contrast: 0.05
 6 |     brightness: 0.05
 7 |     hue: 0.02
 8 |     prob: 1
 9 |   - _target_: torchvision.transforms.Grayscale
10 |     num_output_channels: 1
11 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
12 |   - _target_: torchvision.transforms.Normalize
13 |     mean: [0.5,]
14 |     std: [0.5,]
15 |   - _target_: hulc2.affordance.datasets.transforms.AddGaussianNoise
16 |     mean: [0.0]
17 |     std: [0.01]
18 |     clip: [-1, 1]
19 | 
20 | validation:
21 |   - _target_: torchvision.transforms.Resize
22 |     size: ${aff_detection.img_size}
23 |   - _target_: torchvision.transforms.Grayscale
24 |     num_output_channels: 1
25 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor
26 |   - _target_: torchvision.transforms.Normalize
27 |     mean: [0.5,]
28 |     std: [0.5,]
29 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/r3m.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   - _target_: hulc2.affordance.datasets.transforms.RandomShiftsAug
 5 |     pad: 5
 6 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
 7 |   - _target_: hulc2.affordance.datasets.transforms.ColorTransform
 8 |     contrast: 0.05
 9 |     brightness: 0.05
10 |     hue: 0.02
11 |     prob: 1
12 |   - _target_: torchvision.transforms.Normalize
13 |     mean: [0.485, 0.456, 0.406]
14 |     std: [0.229, 0.224, 0.225]
15 | 
16 | validation:
17 |   # - _target_: torch.nn.Identity
18 |   - _target_: torchvision.transforms.Resize
19 |     size: ${aff_detection.img_size}
20 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
21 |   - _target_: torchvision.transforms.Normalize
22 |     mean: [0.485, 0.456, 0.406]
23 |     std: [0.229, 0.224, 0.225]
24 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/rgb.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
 5 |   - _target_: torchvision.transforms.Normalize
 6 |     mean: [0.5,]
 7 |     std: [0.5,]
 8 |   - _target_: hulc2.affordance.datasets.transforms.AddGaussianNoise
 9 |     mean: [0.0]
10 |     std: [0.005]
11 |     clip: [-1, 1]
12 | 
13 | validation:
14 |   - _target_: torchvision.transforms.Resize
15 |     size: ${aff_detection.img_size}
16 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor
17 |   - _target_: torchvision.transforms.Normalize
18 |     mean: [0.5,]
19 |     std: [0.5,]
20 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/rgb_color.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
 5 |   - _target_: hulc2.affordance.datasets.transforms.ColorTransform
 6 |     contrast: 0.05
 7 |     brightness: 0.05
 8 |     hue: 0.02
 9 |     prob: 1
10 |   - _target_: torchvision.transforms.Normalize
11 |     mean: [0.5,]
12 |     std: [0.5,]
13 |   - _target_: hulc2.affordance.datasets.transforms.AddGaussianNoise
14 |     mean: [0.0]
15 |     std: [0.005]
16 |     clip: [-1, 1]
17 | 
18 | validation:
19 |   - _target_: torchvision.transforms.Resize
20 |     size: ${aff_detection.img_size}
21 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor
22 |   - _target_: torchvision.transforms.Normalize
23 |     mean: [0.5,]
24 |     std: [0.5,]
25 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/rgb_randShift.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
 5 |   - _target_: hulc2.affordance.datasets.transforms.ColorTransform
 6 |     contrast: 0.05
 7 |     brightness: 0.05
 8 |     hue: 0.02
 9 |     prob: 1
10 |   - _target_: torchvision.transforms.Normalize
11 |     mean: [0.5,]
12 |     std: [0.5,]
13 |   - _target_: hulc2.affordance.datasets.transforms.AddGaussianNoise
14 |     mean: [0.0]
15 |     std: [0.005]
16 |     clip: [-1, 1]
17 | 
18 | validation:
19 |   - _target_: torchvision.transforms.Resize
20 |     size: ${aff_detection.img_size}
21 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor
22 |   - _target_: torchvision.transforms.Normalize
23 |     mean: [0.5,]
24 |     std: [0.5,]
25 | 


--------------------------------------------------------------------------------
/conf/affordance/transforms/rgb_randShift_color.yaml:
--------------------------------------------------------------------------------
 1 | training:
 2 |   - _target_: torchvision.transforms.Resize
 3 |     size: ${aff_detection.img_size}
 4 |   - _target_: hulc2.affordance.datasets.transforms.RandomShiftsAug
 5 |     pad: 5
 6 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor  # Scale 0-255 to 0-1
 7 |   - _target_: hulc2.affordance.datasets.transforms.ColorTransform
 8 |     contrast: 0.05
 9 |     brightness: 0.05
10 |     hue: 0.02
11 |     prob: 1
12 |   - _target_: torchvision.transforms.Normalize
13 |     mean: [0.5,]
14 |     std: [0.5,]
15 |   # - _target_: hulc2.affordance.datasets.transforms.AddGaussianNoise
16 |   #   mean: [0.0]
17 |   #   std: [0.005]
18 |   #   clip: [-1, 1]
19 | 
20 | validation:
21 |   - _target_: torchvision.transforms.Resize
22 |     size: ${aff_detection.img_size}
23 |   - _target_: hulc2.affordance.datasets.transforms.ScaleImageTensor
24 |   - _target_: torchvision.transforms.Normalize
25 |     mean: [0.5,]
26 |     std: [0.5,]
27 | 


--------------------------------------------------------------------------------
/conf/annotations/new_playtable_validation.yaml:
--------------------------------------------------------------------------------
 1 | # rotation
 2 | rotate_red_block_right: ["take the red block and rotate it to the right"]
 3 | rotate_red_block_left: ["take the red block and rotate it to the left"]
 4 | rotate_blue_block_right: ["take the blue block and rotate it to the right"]
 5 | rotate_blue_block_left: ["take the blue block and rotate it to the left"]
 6 | rotate_pink_block_right: ["take the pink block and rotate it to the right"]
 7 | rotate_pink_block_left: ["take the pink block and rotate it to the left"]
 8 | 
 9 | # sliding
10 | push_red_block_right: ["go push the red block right"]
11 | push_red_block_left: ["go push the red block left"]
12 | push_blue_block_right: ["go push the blue block right"]
13 | push_blue_block_left: ["go push the blue block left"]
14 | push_pink_block_right: ["go push the pink block right"]
15 | push_pink_block_left: ["go push the pink block left"]
16 | 
17 | # open/close
18 | move_slider_left: [ "push the sliding door to the left side"]
19 | move_slider_right: [ "push the sliding door to the right side"]
20 | open_drawer: ["pull the handle to open the drawer"]
21 | close_drawer: ["push the handle to close the drawer"]
22 | 
23 | # lifting
24 | lift_red_block_table: ["grasp and lift the red block"]
25 | lift_blue_block_table: ["grasp and lift the blue block"]
26 | lift_pink_block_table: ["grasp and lift the pink block"]
27 | 
28 | lift_red_block_slider: [ "lift the red block from the sliding cabinet"]
29 | lift_blue_block_slider: [ "lift the blue block from the sliding cabinet"]
30 | lift_pink_block_slider: [ "lift the pink block from the sliding cabinet"]
31 | 
32 | lift_red_block_drawer: ["Take the red block from the drawer"]
33 | lift_blue_block_drawer: ["Take the blue block from the drawer"]
34 | lift_pink_block_drawer: ["Take the pink block from the drawer"]
35 | 
36 | place_in_slider: [ "store the grasped block in the sliding cabinet"]
37 | place_in_drawer: [ "store the grasped block in the drawer"]
38 | 
39 | push_into_drawer: ["slide the block that it falls into the drawer"]
40 | 
41 | stack_block: ["stack the grasped block"]
42 | unstack_block: ["remove the stacked block"]
43 | 
44 | turn_on_lightbulb: ["use the switch to turn on the light bulb"]
45 | turn_off_lightbulb: ["use the switch to turn off the light bulb"]
46 | turn_on_led: ["press the button to turn on the led light"]
47 | turn_off_led: ["press the button to turn off the led light"]
48 | 


--------------------------------------------------------------------------------
/conf/callbacks/calvin_default.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - rollout: default
3 |   - rollout_lh: default
4 |   - checkpoint: all
5 |   - tsne_plot: default
6 |   - kl_schedule: constant
7 |   - shm_signal: default
8 | 


--------------------------------------------------------------------------------
/conf/callbacks/checkpoint/all.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint
2 | save_top_k: -1
3 | verbose: True
4 | dirpath: saved_models
5 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}'
6 | 


--------------------------------------------------------------------------------
/conf/callbacks/checkpoint/clip_loss.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint
2 | save_top_k: 3
3 | verbose: True
4 | monitor: val/val_pred_clip_loss
5 | mode: min
6 | dirpath: saved_models
7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}'
8 | 


--------------------------------------------------------------------------------
/conf/callbacks/checkpoint/kl.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint
2 | save_top_k: 3
3 | verbose: True
4 | monitor: train/kl_loss
5 | mode: max
6 | dirpath: saved_models
7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}'
8 | 


--------------------------------------------------------------------------------
/conf/callbacks/checkpoint/lh_sr.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint
2 | save_top_k: 3
3 | verbose: True
4 | monitor: eval_lh/avg_seq_len
5 | mode: max
6 | dirpath: saved_models
7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}'
8 | every_n_epochs: ${callbacks.rollout_lh.rollout_freq}
9 | 


--------------------------------------------------------------------------------
/conf/callbacks/checkpoint/state_recon.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint
2 | save_top_k: 3
3 | verbose: True
4 | monitor: val/state_recon_loss
5 | mode: min
6 | dirpath: saved_models
7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}'
8 | 


--------------------------------------------------------------------------------
/conf/callbacks/checkpoint/task_sr.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint
2 | save_top_k: 3
3 | verbose: True
4 | monitor: tasks/average_sr
5 | mode: max
6 | dirpath: saved_models
7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}'
8 | every_n_epochs: ${callbacks.rollout.rollout_freq}
9 | 


--------------------------------------------------------------------------------
/conf/callbacks/checkpoint/val_action.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorch_lightning.callbacks.ModelCheckpoint
2 | save_top_k: -1
3 | verbose: True
4 | monitor: val_act/action_loss_pp
5 | mode: min
6 | dirpath: saved_models
7 | filename: '{epoch}' #put back in when PL fixes this _{val/accuracy:.4f}'
8 | 


--------------------------------------------------------------------------------
/conf/callbacks/kl_schedule/constant.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.utils.kl_callbacks.KLConstantSchedule
2 | 


--------------------------------------------------------------------------------
/conf/callbacks/kl_schedule/linear.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.utils.kl_callbacks.KLLinearSchedule
2 | start_epoch: 10
3 | end_epoch: 50
4 | max_kl_beta: ${loss.kl_beta}
5 | 


--------------------------------------------------------------------------------
/conf/callbacks/kl_schedule/sigmoid.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | _target_: hulc2.utils.kl_callbacks.KLSigmoidSchedule
3 | start_epoch: 10
4 | end_epoch: 50
5 | max_kl_beta: ${loss.kl_beta}
6 | 


--------------------------------------------------------------------------------
/conf/callbacks/real_world_default.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - checkpoint: all
3 |   - kl_schedule: constant
4 |   - shm_signal: default
5 | 


--------------------------------------------------------------------------------
/conf/callbacks/rollout/default.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - tasks: new_playtable_tasks
 3 | _target_: hulc2.rollout.rollout.Rollout
 4 | _recursive_: false
 5 | env_cfg:
 6 |   _target_: hulc2.wrappers.hulc2_wrapper.Hulc2Wrapper
 7 | skip_epochs: 1
 8 | rollout_freq: 5
 9 | video: true
10 | num_rollouts_per_task: 10
11 | check_percentage_of_batch: 1  # which percentage of sequences do we want to check for possible tasks
12 | replan_freq: 30
13 | ep_len: 120
14 | empty_cache: false
15 | log_video_to_file: false
16 | save_dir: ./videos
17 | start_robot_neutral: false
18 | add_goal_thumbnail: true
19 | min_window_size: ${datamodule.datasets.vision_dataset.min_window_size}
20 | max_window_size: ${datamodule.datasets.vision_dataset.max_window_size}
21 | id_selection_strategy: "select_longest"
22 | lang_folder: ${datamodule.datasets.lang_dataset.lang_folder}
23 | 


--------------------------------------------------------------------------------
/conf/callbacks/rollout_lh/default.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /callbacks/rollout/tasks@tasks: new_playtable_tasks
 3 |   - /annotations@val_annotations: new_playtable_validation
 4 | _target_: hulc2.rollout.rollout_long_horizon.RolloutLongHorizon
 5 | _recursive_: false
 6 | env_cfg:
 7 |   _target_: calvin_env.envs.play_lmp_wrapper.PlayLMPWrapper
 8 | skip_epochs: 1
 9 | rollout_freq: 1
10 | num_videos: 16
11 | num_sequences: 128
12 | replan_freq: 30
13 | ep_len: 360
14 | empty_cache: false
15 | log_video_to_file: false
16 | save_dir: ./videos
17 | lang_folder: ${datamodule.datasets.lang_dataset.lang_folder}
18 | debug: false
19 | 


--------------------------------------------------------------------------------
/conf/callbacks/shm_signal/default.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.datasets.utils.shared_memory_loader.SignalCallback
2 | 


--------------------------------------------------------------------------------
/conf/callbacks/tsne_plot/default.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.visualization.tsne_plot.TSNEPlot
2 | perplexity: 40
3 | n_jobs: 8
4 | plot_percentage: 0.2
5 | opacity: 0.3
6 | marker_size: 5
7 | 


--------------------------------------------------------------------------------
/conf/cfg_high_level.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - paths: general_paths
 3 |   - simulation/robot@robot: panda_longer_finger
 4 |   - simulation/scene@scene: calvin_scene_D
 5 |   - simulation/env@env: env
 6 |   - simulation/cameras@cameras: high_res
 7 |   - simulation/agent@agent: play_lmp
 8 |   - override hydra/hydra_logging: colorlog
 9 |   - override hydra/job_logging: colorlog
10 | 
11 | data_path: ${paths.vr_data}
12 | model_name: full
13 | 
14 | max_timesteps: 364
15 | gripper_offset: [0.0, -0.025, 0.05]
16 | policy_checkpoint:
17 |   train_folder: ./trained_agents/lfp
18 |   model_name: epoch=30.ckpt
19 | 
20 | agent:
21 |   viz_obs: True
22 | 
23 | aff_detection:
24 |   checkpoint:
25 |     train_folder: ~/logs/hulc2/aff_ablation/2022-06-15/18-23-49_aff_ablation
26 |     # train_folder: ./hydra_outputs/affordance_model/2022-03-09/01-38-55_aff_rl
27 |     model_name: val_err.ckpt
28 | 
29 | hough_voting:
30 |   skip_pixels: 4
31 |   inlier_threshold: 0.7
32 |   angle_discretization: 100
33 |   inlier_distance: 15
34 |   percentage_threshold: 0.3
35 |   object_center_kernel_radius: 10
36 | 
37 | save_dir: ./hydra_outputs/calvin
38 | hydra:
39 |   run:
40 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}
41 | 


--------------------------------------------------------------------------------
/conf/cfg_high_level_rw.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - datamodule: default
 3 |   - robot: panda_frankx_interface_policy
 4 |   - env: robot_io_env
 5 |   - cams: camera_manager
 6 |   - agent: real_world
 7 |   - paths: general_paths
 8 |   - override hydra/hydra_logging: colorlog
 9 |   - override hydra/job_logging: colorlog
10 |   - override datamodule/datasets: vision_only
11 |   - _self_
12 | 
13 | data_path: ${paths.vr_data}
14 | model_name: real_world
15 | train_folder: ???
16 | max_timesteps: 100
17 | agent:
18 |   _target_: hulc2.agents.real_world_agent.AffHULCAgent
19 |   _recursive_: False
20 |   save_viz: False
21 |   viz_obs: True
22 |   offset: [-0.05, -0.05, 0.13]  # Relative to end effector
23 |   aff_cfg:
24 |     train_folder: ./real_world_checkpoints/aff_model_single
25 |     model_name: last.ckpt
26 | 
27 | model_free:
28 |   train_folder: ./real_world_checkpoints/lang_lfp_single
29 |   checkpoint: 17
30 | seed: 42
31 | env:
32 |   freq: 15
33 | panda_env_wrapper:
34 |   max_rel_pos: 0.02
35 |   max_rel_orn: 0.05
36 | 
37 | save_dir: ./hydra_outputs/real_world_inference
38 | hydra:
39 |   run:
40 |     dir: ${save_dir}/${now:%Y-%m-%d}/${now:%H-%M-%S}
41 |   searchpath:
42 |     - pkg://robot_io.conf
43 |     - pkg://hulc2.conf
44 | 
45 |   sweep:
46 |     dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
47 |     subdir: ${hydra.job.override_dirname}
48 | 


--------------------------------------------------------------------------------
/conf/cfg_low_level.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - callbacks: calvin_default
 4 |   - datamodule: calvin_default
 5 |   - model: calvin_hulc++
 6 |   - loss: default
 7 |   - training: default_training
 8 |   - trainer: play_trainer
 9 |   - logger: wandb
10 | 
11 |   - override hydra/job_logging: colorlog
12 |   - override hydra/hydra_logging: colorlog
13 | 
14 | data_percent: 1
15 | seed: 42
16 | log_dir: ../
17 | slurm: false
18 | 
19 | hydra:
20 |   run:
21 |     dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
22 |   sweep:
23 |     dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
24 |     subdir: ${hydra.job.override_dirname}
25 |   job:
26 |     config:
27 |       override_dirname:
28 |         exclude_keys:
29 |           - log_dir
30 |           - datamodule.root_data_dir
31 |           - trainer.gpus
32 |           - model.tsne_plot
33 |           - datamodule.num_workers
34 |           - trainer.limit_train_batches
35 |           - trainer.limit_val_batches
36 |           - model.action_decoder.load_action_bounds
37 | 


--------------------------------------------------------------------------------
/conf/cfg_low_level_rw.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - callbacks: real_world_default
 4 |   - datamodule: real_world_default
 5 |   - model: real_world_hulc++
 6 |   - loss: default
 7 |   - training: default_training
 8 |   - trainer: play_trainer
 9 |   - logger: wandb
10 | 
11 |   - override hydra/job_logging: colorlog
12 |   - override hydra/hydra_logging: colorlog
13 | 
14 | data_percent: 1
15 | seed: 42
16 | log_dir: ../
17 | slurm: false
18 | 
19 | hydra:
20 |   run:
21 |     dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
22 |   sweep:
23 |     dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
24 |     subdir: ${hydra.job.override_dirname}
25 |   job:
26 |     config:
27 |       override_dirname:
28 |         exclude_keys:
29 |           - log_dir
30 |           - datamodule.root_data_dir
31 |           - trainer.gpus
32 |           - model.tsne_plot
33 |           - datamodule.num_workers
34 |           - trainer.limit_train_batches
35 |           - trainer.limit_val_batches
36 |           - model.action_decoder.load_action_bounds
37 | 


--------------------------------------------------------------------------------
/conf/datamodule/calvin_default.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - datasets: vision_lang_shm
 3 |   - transforms: rand_shift
 4 |   - proprioception_dims: robot_no_joints #robot_full
 5 |   - observation_space: lang_rgb_static_gripper_rel_act
 6 | _target_: hulc2.datasets.hulc2_sim_data_module.Hulc2SimdDataModule
 7 | _recursive_: false
 8 | root_data_dir: ???
 9 | action_space: 7
10 | action_max: [1., 1., 1., 1., 1., 1., 1.,]
11 | action_min: [-1., -1., -1., -1., -1., -1., -1]
12 | shuffle_val: false
13 | 


--------------------------------------------------------------------------------
/conf/datamodule/datasets/lang_dataset/lang.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.datasets.npz_dataset.NpzDataset
 2 | key: "lang"
 3 | save_format: "npz"
 4 | batch_size: 32
 5 | min_window_size: 20
 6 | max_window_size: 32
 7 | proprio_state: ${datamodule.proprioception_dims}
 8 | obs_space: ${datamodule.observation_space}
 9 | skip_frames: 1
10 | pad: true
11 | lang_folder: "lang_paraphrase-MiniLM-L3-v2"
12 | aux_lang_loss_window: 8
13 | num_workers: 2
14 | data_percent: ${data_percent}
15 | load_lang_embeddings: false
16 | 


--------------------------------------------------------------------------------
/conf/datamodule/datasets/lang_dataset/lang_shm.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.datasets.shm_dataset.ShmDataset
 2 | key: "lang"
 3 | save_format: "npz"
 4 | batch_size: 32
 5 | min_window_size: 20
 6 | max_window_size: 32
 7 | proprio_state: ${datamodule.proprioception_dims}
 8 | obs_space: ${datamodule.observation_space}
 9 | skip_frames: 1
10 | pad: true
11 | lang_folder: "lang_paraphrase-MiniLM-L3-v2"
12 | aux_lang_loss_window: 8
13 | num_workers: 2
14 | data_percent: ${data_percent}
15 | load_lang_embeddings: false
16 | 


--------------------------------------------------------------------------------
/conf/datamodule/datasets/lang_only.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - lang_dataset: lang
3 | 


--------------------------------------------------------------------------------
/conf/datamodule/datasets/vision_dataset/vision.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.datasets.npz_dataset.NpzDataset
 2 | key: "vis"
 3 | save_format: "npz"
 4 | batch_size: 32
 5 | min_window_size: 20
 6 | max_window_size: 32
 7 | proprio_state: ${datamodule.proprioception_dims}
 8 | obs_space: ${datamodule.observation_space}
 9 | pad: true
10 | lang_folder: "lang_paraphrase-MiniLM-L3-v2"
11 | num_workers: 8
12 | data_percent: ${data_percent}
13 | load_lang_embeddings: false
14 | 


--------------------------------------------------------------------------------
/conf/datamodule/datasets/vision_dataset/vision_shm.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.datasets.shm_dataset.ShmDataset
 2 | key: "vis"
 3 | save_format: "npz"
 4 | batch_size: 32
 5 | min_window_size: 20
 6 | max_window_size: 32
 7 | proprio_state: ${datamodule.proprioception_dims}
 8 | obs_space: ${datamodule.observation_space}
 9 | pad: true
10 | lang_folder: "lang_paraphrase-MiniLM-L3-v2"
11 | num_workers: 2
12 | data_percent: ${data_percent}
13 | load_lang_embeddings: false
14 | 


--------------------------------------------------------------------------------
/conf/datamodule/datasets/vision_lang.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - vision_dataset: vision
3 |   - lang_dataset: lang
4 | 


--------------------------------------------------------------------------------
/conf/datamodule/datasets/vision_lang_shm.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - vision_dataset: vision_shm
3 |   - lang_dataset: lang_shm
4 | 


--------------------------------------------------------------------------------
/conf/datamodule/datasets/vision_only.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - vision_dataset: vision
3 | 


--------------------------------------------------------------------------------
/conf/datamodule/datasets/vision_only_shm.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - vision_dataset: vision_shm
3 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/all_mods_abs_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static', 'rgb_gripper', 'rgb_tactile']
2 | depth_obs: ['depth_static', 'depth_gripper', 'depth_tactile']
3 | state_obs: ['robot_obs', 'scene_obs']
4 | actions: ['actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgb_static_abs_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static']
2 | depth_obs: []
3 | state_obs: ['robot_obs']
4 | actions: ['actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgb_static_gripper_abs_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static', 'rgb_gripper']
2 | depth_obs: []
3 | state_obs: ['robot_obs']
4 | actions: ['actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgb_static_gripper_rel_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static', 'rgb_gripper']
2 | depth_obs: []
3 | state_obs: ['robot_obs']
4 | actions: ['rel_actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgb_static_gripper_rel_gripper_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static', 'rgb_gripper']
2 | depth_obs: []
3 | state_obs: ['robot_obs']
4 | actions: ['rel_actions_gripper']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgb_static_rel_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static']
2 | depth_obs: []
3 | state_obs: ['robot_obs']
4 | actions: ['rel_actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgb_static_robot_scene_abs_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static']
2 | depth_obs: []
3 | state_obs: ['robot_obs', 'scene_obs']
4 | actions: ['actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgb_static_tactile_abs_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static', 'rgb_tactile']
2 | depth_obs: []
3 | state_obs: ['robot_obs']
4 | actions: ['actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgbd_both_abs_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static', 'rgb_gripper']
2 | depth_obs: ['depth_static', 'depth_gripper']
3 | state_obs: ['robot_obs']
4 | actions: ['actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgbd_both_rel_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static', 'rgb_gripper']
2 | depth_obs: ['depth_static', 'depth_gripper']
3 | state_obs: ['robot_obs']
4 | actions: ['rel_actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgbd_static_gripper_rel_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static', 'rgb_gripper']
2 | depth_obs: ['depth_gripper']
3 | state_obs: ['robot_obs']
4 | actions: ['rel_actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/lang_rgbd_static_robot_abs_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static']
2 | depth_obs: ['depth_static']
3 | state_obs: ['robot_obs']
4 | actions: ['actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/rgb_static_abs_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static']
2 | depth_obs: []
3 | state_obs: ['robot_obs']
4 | actions: ['actions']
5 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/rgb_static_gripper_rel_gripper_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static', 'rgb_gripper']
2 | depth_obs: []
3 | state_obs: ['robot_obs']
4 | actions: ['rel_actions_gripper']
5 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/rgb_static_robot_scene_abs_act.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: ['rgb_static']
2 | depth_obs: []
3 | state_obs: ['robot_obs', 'scene_obs']
4 | actions: ['actions']
5 | 


--------------------------------------------------------------------------------
/conf/datamodule/observation_space/state_only.yaml:
--------------------------------------------------------------------------------
1 | rgb_obs: []
2 | depth_obs: []
3 | state_obs: ['robot_obs']
4 | actions: ['actions']
5 | language: ['language']
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/proprioception_dims/none.yaml:
--------------------------------------------------------------------------------
1 | n_state_obs: 0
2 | keep_indices: [[0, 0]]
3 | robot_orientation_idx: [3, 6]
4 | normalize: False
5 | normalize_robot_orientation: False
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/proprioception_dims/robot_full.yaml:
--------------------------------------------------------------------------------
1 | n_state_obs: 15
2 | keep_indices: [[0, 15]]
3 | robot_orientation_idx: [3, 6]
4 | normalize: True
5 | normalize_robot_orientation: True
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/proprioception_dims/robot_no_joints.yaml:
--------------------------------------------------------------------------------
1 | n_state_obs: 8
2 | keep_indices: [[0, 7], [14,15]]
3 | robot_orientation_idx: [3, 6]
4 | normalize: True
5 | normalize_robot_orientation: True
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/proprioception_dims/robot_no_joints_no_gripper_width.yaml:
--------------------------------------------------------------------------------
1 | n_state_obs: 7
2 | keep_indices: [[0, 6], [14,15]]
3 | robot_orientation_idx: [3, 6]
4 | normalize: True
5 | normalize_robot_orientation: True
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/proprioception_dims/robot_scene.yaml:
--------------------------------------------------------------------------------
1 | n_state_obs: 54
2 | keep_indices: [[0, 54]]
3 | robot_orientation_idx: [3, 6]
4 | normalize: True
5 | normalize_robot_orientation: True
6 | 


--------------------------------------------------------------------------------
/conf/datamodule/real_world_default.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - datasets: vision_lang_shm
 3 |   - transforms: real_world_r3m
 4 |   - proprioception_dims: robot_no_joints #robot_full
 5 |   - observation_space: lang_rgb_static_gripper_rel_gripper_act
 6 | _target_: hulc2.datasets.hulc2_real_world_data_module.Hulc2RealWorldDataModule
 7 | _recursive_: false
 8 | root_data_dir: ???
 9 | action_space: 7
10 | action_max: [1., 1., 1., 1., 1., 1., 1.,]
11 | action_min: [-1., -1., -1., -1., -1., -1., -1]
12 | shuffle_val: false
13 | 


--------------------------------------------------------------------------------
/conf/datamodule/transforms/real_world.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   rgb_static:
 3 | #    - _target_: torchvision.transforms.Resize
 4 | #      size: 200
 5 | #    - _target_: hulc2.utils.transforms.RandomShiftsAug
 6 | #      pad: 10
 7 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
 8 |     - _target_: hulc2.utils.transforms.ColorTransform
 9 |       contrast: 0.05
10 |       brightness: 0.05
11 |       hue: 0.02
12 |       prob: 1
13 |     - _target_: torchvision.transforms.Normalize
14 |       mean: [0.5,]
15 |       std: [0.5,]
16 |   rgb_gripper:
17 |     - _target_: torchvision.transforms.Resize
18 |       size: 84
19 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
20 |     - _target_: hulc2.utils.transforms.ColorTransform
21 |       contrast: 0.05
22 |       brightness: 0.05
23 |       hue: 0.02
24 |       prob: 1
25 |     - _target_: hulc2.utils.transforms.RandomShiftsAug
26 |       pad: 4
27 |     - _target_: torchvision.transforms.Normalize
28 |       mean: [0.5,]
29 |       std: [0.5,]
30 |   depth_static:
31 |     # - _target_: torchvision.transforms.Resize
32 |     #   size: [200, 200]
33 |     - _target_: hulc2.utils.transforms.AddDepthNoise
34 |       shape: [1000.0]
35 |       rate: [1000.0]
36 |   depth_gripper:
37 |     - _target_: torchvision.transforms.Resize
38 |       size: 84
39 |     - _target_: hulc2.utils.transforms.AddGaussianNoise
40 |       mean: [ 0.0 ]
41 |       std: [ 0.01 ]
42 |   robot_obs:
43 |     - _target_: hulc2.utils.transforms.NormalizeVector
44 | 
45 | #  language:
46 | #    - _target_: hulc2.utils.transforms.AddGaussianNoise
47 | #      mean: [ 0.0 ]
48 | #      std: [ 0.01 ]
49 | 
50 | 
51 | val:
52 |   rgb_static:
53 | #    - _target_: torchvision.transforms.Resize
54 | #      size: 200
55 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
56 |     - _target_: torchvision.transforms.Normalize
57 |       mean: [0.5,]
58 |       std: [0.5,]
59 |   rgb_gripper:
60 |     - _target_: torchvision.transforms.Resize
61 |       size: 84
62 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
63 |     - _target_: torchvision.transforms.Normalize
64 |       mean: [0.5,]
65 |       std: [0.5,]
66 |   depth_static:
67 |     - _target_: torchvision.transforms.Resize
68 |       size: 200
69 |   depth_gripper:
70 |     - _target_: torchvision.transforms.Resize
71 |       size: 84
72 |   robot_obs:
73 |     - _target_: hulc2.utils.transforms.NormalizeVector
74 | 


--------------------------------------------------------------------------------
/conf/datamodule/transforms/real_world_no_rand_shift.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   rgb_static:
 3 | #    - _target_: torchvision.transforms.Resize
 4 | #      size: 200
 5 | #    - _target_: hulc2.utils.transforms.RandomShiftsAug
 6 | #      pad: 10
 7 |     - _target_: hulc2.utils.transforms.ScaleImageTensor # Scale image between 0-1 (float)
 8 |     - _target_: hulc2.utils.transforms.ColorTransform  # Maintains the range and image type
 9 |       contrast: 0.05
10 |       brightness: 0.05
11 |       hue: 0.02
12 |       prob: 1
13 |     - _target_: torchvision.transforms.Normalize
14 |       mean: [0.5,]
15 |       std: [0.5,]
16 |   rgb_gripper:
17 |     - _target_: torchvision.transforms.Resize
18 |       size: 84
19 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
20 |     - _target_: hulc2.utils.transforms.ColorTransform
21 |       contrast: 0.05
22 |       brightness: 0.05
23 |       hue: 0.02
24 |       prob: 1
25 |     - _target_: torchvision.transforms.Normalize
26 |       mean: [0.5,]
27 |       std: [0.5,]
28 |   depth_static:
29 |     # - _target_: torchvision.transforms.Resize
30 |     #   size: [200, 200]
31 |     - _target_: hulc2.utils.transforms.AddDepthNoise
32 |       shape: [1000.0]
33 |       rate: [1000.0]
34 |   depth_gripper:
35 |     - _target_: torchvision.transforms.Resize
36 |       size: 84
37 |     - _target_: hulc2.utils.transforms.AddGaussianNoise
38 |       mean: [ 0.0 ]
39 |       std: [ 0.01 ]
40 |   robot_obs:
41 |     - _target_: hulc2.utils.transforms.NormalizeVector
42 | 
43 | #  language:
44 | #    - _target_: hulc2.utils.transforms.AddGaussianNoise
45 | #      mean: [ 0.0 ]
46 | #      std: [ 0.01 ]
47 | 
48 | 
49 | val:
50 |   rgb_static:
51 | #    - _target_: torchvision.transforms.Resize
52 | #      size: 200
53 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
54 |     - _target_: torchvision.transforms.Normalize
55 |       mean: [0.5,]
56 |       std: [0.5,]
57 |   rgb_gripper:
58 |     - _target_: torchvision.transforms.Resize
59 |       size: 84
60 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
61 |     - _target_: torchvision.transforms.Normalize
62 |       mean: [0.5,]
63 |       std: [0.5,]
64 |   depth_static:
65 |     - _target_: torchvision.transforms.Resize
66 |       size: 200
67 |   depth_gripper:
68 |     - _target_: torchvision.transforms.Resize
69 |       size: 84
70 |   robot_obs:
71 |     - _target_: hulc2.utils.transforms.NormalizeVector
72 | 


--------------------------------------------------------------------------------
/conf/datamodule/transforms/real_world_r3m.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   rgb_static:
 3 | #    - _target_: torchvision.transforms.Resize
 4 | #      size: 200
 5 | #    - _target_: hulc2.utils.transforms.RandomShiftsAug
 6 | #      pad: 10
 7 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
 8 |     - _target_: hulc2.utils.transforms.ColorTransform
 9 |       contrast: 0.05
10 |       brightness: 0.05
11 |       hue: 0.02
12 |       prob: 1
13 |     - _target_: hulc2.utils.transforms.UpScaleImageTensor
14 |   rgb_gripper:
15 |     - _target_: torchvision.transforms.Resize
16 |       size: 84
17 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
18 |     - _target_: hulc2.utils.transforms.ColorTransform
19 |       contrast: 0.05
20 |       brightness: 0.05
21 |       hue: 0.02
22 |       prob: 1
23 |     - _target_: hulc2.utils.transforms.RandomShiftsAug
24 |       pad: 4
25 |     - _target_: torchvision.transforms.Normalize
26 |       mean: [0.5,]
27 |       std: [0.5,]
28 |   depth_static:
29 |     # - _target_: torchvision.transforms.Resize
30 |     #   size: [200, 200]
31 |     - _target_: hulc2.utils.transforms.AddDepthNoise
32 |       shape: [1000.0]
33 |       rate: [1000.0]
34 |   depth_gripper:
35 |     - _target_: torchvision.transforms.Resize
36 |       size: 84
37 |     - _target_: hulc2.utils.transforms.AddGaussianNoise
38 |       mean: [ 0.0 ]
39 |       std: [ 0.01 ]
40 |   robot_obs:
41 |     - _target_: hulc2.utils.transforms.NormalizeVector
42 | 
43 | #  language:
44 | #    - _target_: hulc2.utils.transforms.AddGaussianNoise
45 | #      mean: [ 0.0 ]
46 | #      std: [ 0.01 ]
47 | 
48 | 
49 | val:
50 | #  rgb_static:
51 | #    - _target_: torchvision.transforms.Resize
52 | #      size: 200
53 | #    - _target_: hulc2.utils.transforms.ScaleImageTensor
54 | #    - _target_: torchvision.transforms.Normalize
55 | #      mean: [0.5,]
56 | #      std: [0.5,]
57 |   rgb_gripper:
58 |     - _target_: torchvision.transforms.Resize
59 |       size: 84
60 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
61 |     - _target_: torchvision.transforms.Normalize
62 |       mean: [0.5,]
63 |       std: [0.5,]
64 |   depth_static:
65 |     - _target_: torchvision.transforms.Resize
66 |       size: 200
67 |   depth_gripper:
68 |     - _target_: torchvision.transforms.Resize
69 |       size: 84
70 |   robot_obs:
71 |     - _target_: hulc2.utils.transforms.NormalizeVector
72 | 


--------------------------------------------------------------------------------
/conf/datamodule/transforms/real_world_square.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   rgb_static:
 3 |     - _target_: torchvision.transforms.Resize
 4 |       size: [150, 150]
 5 |     - _target_: hulc2.utils.transforms.RandomShiftsAug
 6 |       pad: 6
 7 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
 8 |     - _target_: hulc2.utils.transforms.ColorTransform
 9 |       contrast: 0.05
10 |       brightness: 0.05
11 |       hue: 0.02
12 |       prob: 1
13 |     - _target_: torchvision.transforms.Normalize
14 |       mean: [0.5,]
15 |       std: [0.5,]
16 |   rgb_gripper:
17 |     - _target_: torchvision.transforms.Resize
18 |       size: 84
19 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
20 |     - _target_: hulc2.utils.transforms.ColorTransform
21 |       contrast: 0.05
22 |       brightness: 0.05
23 |       hue: 0.02
24 |       prob: 1
25 |     - _target_: hulc2.utils.transforms.RandomShiftsAug
26 |       pad: 4
27 |     - _target_: torchvision.transforms.Normalize
28 |       mean: [0.5,]
29 |       std: [0.5,]
30 |   depth_static:
31 |     # - _target_: torchvision.transforms.Resize
32 |     #   size: [200, 200]
33 |     - _target_: hulc2.utils.transforms.AddDepthNoise
34 |       shape: [1000.0]
35 |       rate: [1000.0]
36 |   depth_gripper:
37 |     - _target_: torchvision.transforms.Resize
38 |       size: 84
39 |     - _target_: hulc2.utils.transforms.AddGaussianNoise
40 |       mean: [ 0.0 ]
41 |       std: [ 0.01 ]
42 |   robot_obs:
43 |     - _target_: hulc2.utils.transforms.NormalizeVector
44 | 
45 | #  language:
46 | #    - _target_: hulc2.utils.transforms.AddGaussianNoise
47 | #      mean: [ 0.0 ]
48 | #      std: [ 0.01 ]
49 | 
50 | 
51 | val:
52 |   rgb_static:
53 |     - _target_: torchvision.transforms.Resize
54 |       size: [150, 150]
55 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
56 |     - _target_: torchvision.transforms.Normalize
57 |       mean: [0.5,]
58 |       std: [0.5,]
59 |   rgb_gripper:
60 |     - _target_: torchvision.transforms.Resize
61 |       size: 84
62 |     - _target_: hulc2.utils.transforms.ScaleImageTensor
63 |     - _target_: torchvision.transforms.Normalize
64 |       mean: [0.5,]
65 |       std: [0.5,]
66 |   depth_static:
67 |     - _target_: torchvision.transforms.Resize
68 |       size: 200
69 |   depth_gripper:
70 |     - _target_: torchvision.transforms.Resize
71 |       size: 84
72 |   robot_obs:
73 |     - _target_: hulc2.utils.transforms.NormalizeVector
74 | 


--------------------------------------------------------------------------------
/conf/inference/config_inference.yaml:
--------------------------------------------------------------------------------
 1 | train_folder: ???  # config path to the config.yaml of the training folder (in .hydra)
 2 | load_checkpoint: ???
 3 | seed: 42
 4 | log_dir: /tmp
 5 | visualize: True
 6 | ep_len: 120
 7 | replan_freq: 30
 8 | processes: 1
 9 | 
10 | hydra:
11 |   run:
12 |     dir: ${log_dir}/inference_runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
13 | 
14 | defaults:
15 |   - override hydra/job_logging: colorlog
16 |   - override hydra/hydra_logging: colorlog
17 | 


--------------------------------------------------------------------------------
/conf/inference_real.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - datamodule: default
 3 |   - env: env
 4 |   - robot: panda_frankx_interface_policy
 5 |   - cams: camera_manager
 6 |   - override hydra/job_logging: colorlog
 7 |   - override hydra/hydra_logging: colorlog
 8 | 
 9 | seed: 42
10 | log_dir: ../
11 | slurm: false
12 | env:
13 |   freq: 15
14 | train_folder: ???
15 | checkpoint: ???
16 | 
17 | hydra:
18 |   searchpath:
19 |     - pkg://robot_io.conf
20 |   run:
21 |     dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
22 |   sweep:
23 |     dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
24 |     subdir: ${hydra.job.override_dirname}
25 |   job:
26 |     config:
27 |       override_dirname:
28 |         exclude_keys:
29 |           - log_dir
30 |           - datamodule.root_data_dir
31 |           - trainer.gpus
32 |           - model.tsne_plot
33 |           - datamodule.num_workers
34 |           - trainer.limit_train_batches
35 |           - trainer.limit_val_batches
36 |           - model.action_decoder.load_action_bounds
37 | 


--------------------------------------------------------------------------------
/conf/lang_ann.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - callbacks: default
 3 |   - datamodule: default
 4 |   - model: sbert
 5 |   - loss: default
 6 |   - training: default_training
 7 |   - trainer: play_trainer
 8 |   - logger: wandb
 9 |   - annotations@train_instructions: new_playtable
10 |   - annotations@val_instructions: new_playtable_validation
11 | 
12 |   - override hydra/job_logging: colorlog
13 |   - override hydra/hydra_logging: colorlog
14 |   #- override datamodule/observation_space: state_only
15 |   #- override datamodule/datasets: vision_only
16 |   - _self_
17 | 
18 | seed: 42
19 | log_dir: ../
20 | slurm: false
21 | eps: 0.01
22 | postprocessing: true
23 | lang_folder: "lang_paraphrase-MiniLM-L3-v2_singleTasks"
24 | with_text: false
25 | reannotate: false
26 | prior_steps_window: 16
27 | validation_scene: calvin_scene_D
28 | datamodule:
29 |   datasets:
30 |     vision_dataset:
31 |       min_window_size: 64
32 |       max_window_size: 64
33 | 
34 | hydra:
35 |   run:
36 |     dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.override_dirname}
37 |   sweep:
38 |     dir: ${log_dir}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
39 |     subdir: ${hydra.job.override_dirname}
40 |   job:
41 |     config:
42 |       override_dirname:
43 |         exclude_keys:
44 |           - log_dir
45 |           - datamodule.root_data_dir
46 |           - trainer.gpus
47 |           - model.tsne_plot
48 |           - datamodule.num_workers
49 |           - trainer.limit_train_batches
50 |           - trainer.limit_val_batches
51 |           - model.decoder.load_action_bounds
52 | 


--------------------------------------------------------------------------------
/conf/logger/tb_logger.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorch_lightning.loggers.TensorBoardLogger
2 | save_dir: .
3 | name: play_lmp
4 | version: ""
5 | 


--------------------------------------------------------------------------------
/conf/logger/wandb.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorch_lightning.loggers.WandbLogger
2 | save_dir: .
3 | name: play_lmp
4 | group: play_lmp
5 | log_model: false
6 | project: "multi_play"
7 | id: ???
8 | 


--------------------------------------------------------------------------------
/conf/loss/default.yaml:
--------------------------------------------------------------------------------
1 | kl_beta: 0.01
2 | state_recon_beta: 0.5
3 | kl_balancing_mix: 0.8
4 | bc_z_auxiliary_loss_beta: 1.0
5 | mia_auxiliary_loss_beta: 1.0
6 | clip_auxiliary_loss_beta: 3.0
7 | 


--------------------------------------------------------------------------------
/conf/model/action_decoder/deterministic.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.decoders.deterministic_decoder.DeterministicDecoder
 2 | hidden_size: 2048
 3 | out_features: ${datamodule.action_space}
 4 | policy_rnn_dropout_p: 0.0
 5 | perceptual_features: ??
 6 | latent_goal_features: ${model.visual_goal.latent_goal_features}
 7 | plan_features: ???
 8 | criterion: HuberLoss  # MSELoss
 9 | num_layers: 2
10 | rnn_model: rnn_decoder
11 | perceptual_emb_slice: [64, 128]
12 | gripper_control: true
13 | 


--------------------------------------------------------------------------------
/conf/model/action_decoder/logistic_decoder_rnn_calvin.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.decoders.logistic_decoder_rnn.LogisticDecoderRNN
 2 | n_mixtures: 10
 3 | hidden_size: 2048
 4 | out_features: ${datamodule.action_space}
 5 | log_scale_min: -7.0
 6 | act_max_bound: ${datamodule.action_max}
 7 | act_min_bound: ${datamodule.action_min}
 8 | dataset_dir: ${datamodule.root_data_dir}
 9 | load_action_bounds: false
10 | num_classes: 10
11 | latent_goal_features: ${model.visual_goal.latent_goal_features}
12 | plan_features: ???
13 | perceptual_features: ???
14 | gripper_alpha: 1.0
15 | perceptual_emb_slice: [64, 128]
16 | policy_rnn_dropout_p: 0.0
17 | num_layers: 2
18 | rnn_model: rnn_decoder
19 | gripper_control: true
20 | discrete_gripper: true
21 | 


--------------------------------------------------------------------------------
/conf/model/action_decoder/logistic_decoder_rnn_real_world.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.decoders.logistic_decoder_rnn.LogisticDecoderRNN
 2 | n_mixtures: 10
 3 | hidden_size: 2048
 4 | out_features: ${datamodule.action_space}
 5 | log_scale_min: -7.0
 6 | act_max_bound: ${datamodule.action_max}
 7 | act_min_bound: ${datamodule.action_min}
 8 | dataset_dir: ${datamodule.root_data_dir}
 9 | load_action_bounds: false
10 | num_classes: 10
11 | latent_goal_features: ${model.visual_goal.latent_goal_features}
12 | plan_features: ???
13 | perceptual_features: ???
14 | gripper_alpha: 1.0
15 | perceptual_emb_slice: [0, 128]
16 | policy_rnn_dropout_p: 0.0
17 | num_layers: 2
18 | rnn_model: rnn_decoder
19 | gripper_control: false
20 | discrete_gripper: true
21 | 


--------------------------------------------------------------------------------
/conf/model/calvin_hulc++.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - perceptual_encoder: gripper_cam
 3 |   - plan_proposal: default
 4 |   - plan_recognition: transformers
 5 |   - distribution:  discrete
 6 |   - visual_goal: default
 7 |   - language_encoder: sbert
 8 |   - language_goal: default
 9 |   - action_decoder: logistic_decoder_rnn_calvin
10 |   - optimizer: adam
11 |   - lr_scheduler: constant
12 |   - proj_vis_lang: default
13 | 
14 | _target_: hulc2.models.hulc2.Hulc2
15 | _recursive_: false
16 | 
17 | kl_beta: ${loss.kl_beta}
18 | kl_balancing_mix: ${loss.kl_balancing_mix}
19 | replan_freq: 30
20 | use_clip_auxiliary_loss: true
21 | clip_auxiliary_loss_beta: ${loss.clip_auxiliary_loss_beta}
22 | 


--------------------------------------------------------------------------------
/conf/model/clip_lang.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.encoders.clip_lang_encoder.LangClip
2 | freeze_backbone: true
3 | model_name: "RN50" # "RN101", "RN50x4", "RN50x16", "ViT-B/32", "ViT-B/16"
4 | 


--------------------------------------------------------------------------------
/conf/model/distribution/continuous.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.utils.distributions.Distribution
2 | dist: "continuous"
3 | plan_features: 256
4 | 


--------------------------------------------------------------------------------
/conf/model/distribution/discrete.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.utils.distributions.Distribution
2 | dist: "discrete"
3 | category_size: 32
4 | class_size: 32
5 | 


--------------------------------------------------------------------------------
/conf/model/gcbc.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - calvin_hulc++
3 | 
4 | _target_: hulc2.models.gcbc.GCBC
5 | 


--------------------------------------------------------------------------------
/conf/model/language_encoder/default.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.encoders.lang_encoder.LanguageEncoder
2 | language_features: 384
3 | hidden_size: 2048
4 | out_features: 256
5 | word_dropout_p: 0.0
6 | activation_function: ReLU #ELU
7 | 


--------------------------------------------------------------------------------
/conf/model/language_encoder/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/language_encoder/none.yaml


--------------------------------------------------------------------------------
/conf/model/language_encoder/sbert.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.affordance.models.language_encoders.sbert_lang_encoder.SBertLang
2 | freeze_backbone: True
3 | nlp_model: "paraphrase-MiniLM-L3-v2"
4 | 


--------------------------------------------------------------------------------
/conf/model/language_goal/default.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.encoders.goal_encoders.LanguageGoalEncoder
2 | in_features: 384
3 | hidden_size: 2048
4 | latent_goal_features: 32
5 | l2_normalize_goal_embeddings: False
6 | activation_function: ReLU #ELU
7 | word_dropout_p: 0.0
8 | 


--------------------------------------------------------------------------------
/conf/model/language_goal/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/language_goal/none.yaml


--------------------------------------------------------------------------------
/conf/model/lr_scheduler/constant.yaml:
--------------------------------------------------------------------------------
1 | _target_: transformers.get_constant_schedule
2 | 


--------------------------------------------------------------------------------
/conf/model/lr_scheduler/cosine_schedule_with_warmup.yaml:
--------------------------------------------------------------------------------
1 | _target_: transformers.get_cosine_schedule_with_warmup
2 | num_training_steps: -1  # -1 specifies to infer number of training steps
3 | num_warmup_steps: 0.1  # float values determines percentage of training steps to use as warmup
4 | num_cycles: 0.5
5 | 


--------------------------------------------------------------------------------
/conf/model/lr_scheduler/linear_schedule_with_warmup.yaml:
--------------------------------------------------------------------------------
1 | _target_: transformers.get_linear_schedule_with_warmup
2 | num_training_steps: -1  # -1 specifies to infer number of training steps
3 | num_warmup_steps: 0.1  # float values determines percentage of training steps to use as warmup
4 | 


--------------------------------------------------------------------------------
/conf/model/optimizer/adam.yaml:
--------------------------------------------------------------------------------
1 | _target_: torch.optim.Adam
2 | lr: ${training.lr}
3 | #weight_decay: 1e-6
4 | 


--------------------------------------------------------------------------------
/conf/model/optimizer/adamw.yaml:
--------------------------------------------------------------------------------
1 | _target_: torch.optim.AdamW
2 | lr: ${training.lr}
3 | weight_decay: 1e-6
4 | #amsgrad: False
5 | 


--------------------------------------------------------------------------------
/conf/model/optimizer/sgd.yaml:
--------------------------------------------------------------------------------
1 | _target_: torch.optim.SGD
2 | lr: ${training.lr}
3 | momentum: 0.9
4 | #weight_decay: 0.0005
5 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/RGBD_both.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders
 2 | _recursive_: false
 3 | 
 4 | defaults:
 5 |  - rgb_static: default
 6 |  - rgb_gripper: default
 7 |  - depth_static: default
 8 |  - depth_gripper: default
 9 |  - proprio: none
10 |  - tactile: none
11 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/default.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders
 2 | _recursive_: false
 3 | 
 4 | defaults:
 5 |  - rgb_static: default
 6 |  - rgb_gripper: none
 7 |  - depth_static: none
 8 |  - depth_gripper: none
 9 |  - proprio: none
10 |  - tactile: none
11 |  - state_decoder: none
12 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/depth_gripper/default.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.perceptual_encoders.vision_network_gripper.VisionNetwork
 2 | input_width: 84
 3 | input_height: 84
 4 | activation_function: ReLU #ELU
 5 | dropout_vis_fc: 0.0
 6 | l2_normalize_output: false
 7 | visual_features: 64
 8 | conv_encoder: nature_cnn
 9 | num_c: 1
10 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/depth_gripper/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/depth_gripper/none.yaml


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/depth_static/default.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.perceptual_encoders.vision_network.VisionNetwork
2 | input_width: 200
3 | input_height: 200
4 | activation_function: ReLU #ELU
5 | dropout_vis_fc: 0.0
6 | l2_normalize_output: false
7 | visual_features: 64
8 | num_c: 1
9 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/depth_static/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/depth_static/none.yaml


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/gripper_cam.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders
 2 | _recursive_: false
 3 | 
 4 | defaults:
 5 |  - rgb_static: r3m
 6 |  - rgb_gripper: default
 7 |  - depth_static: none
 8 |  - depth_gripper: none
 9 |  - proprio: none
10 |  - tactile: none
11 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/proprio/identity.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.perceptual_encoders.proprio_encoder.IdentityEncoder
2 | proprioception_dims: ${datamodule.proprioception_dims}
3 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/proprio/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/proprio/none.yaml


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/resnet_aff.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders
 2 | _recursive_: false
 3 | 
 4 | defaults:
 5 |  - rgb_static: resnet_aff
 6 |  - rgb_gripper: resnet_aff
 7 |  - depth_static: none
 8 |  - depth_gripper: none
 9 |  - proprio: none
10 |  - tactile: none
11 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_gripper/default.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.perceptual_encoders.vision_network_gripper.VisionNetwork
 2 | input_width: 84
 3 | input_height: 84
 4 | activation_function: ReLU #ELU
 5 | dropout_vis_fc: 0.0
 6 | l2_normalize_output: false
 7 | visual_features: 64
 8 | conv_encoder: nature_cnn
 9 | num_c: 3
10 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_gripper/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/rgb_gripper/none.yaml


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_gripper/r3m.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.perceptual_encoders.vision_r3m.VisionR3M
2 | visual_features: 64
3 | freeze_backbone: True
4 | resnet_model: "resnet18"
5 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_gripper/resnet.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.perceptual_encoders.vision_resnet.VisionResnet
2 | visual_features: 64
3 | freeze_backbone: True
4 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_gripper/resnet_aff.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.perceptual_encoders.vision_resnet_aff.VisionResnetAff
2 | visual_features: 64
3 | freeze_backbone: True
4 | input_shape: [84, 84, 3]
5 | depth: 3
6 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_static/clip.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.perceptual_encoders.vision_clip.VisionClip
2 | visual_features: 64
3 | freeze_backbone: true
4 | model_name: "RN50" # "RN101", "RN50x4", "RN50x16", "ViT-B/32", "ViT-B/16"
5 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_static/default.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.perceptual_encoders.vision_network.VisionNetwork
 2 | input_width: 200
 3 | input_height: 150
 4 | activation_function: ReLU #ELU
 5 | dropout_vis_fc: 0.0
 6 | l2_normalize_output: false
 7 | visual_features: 64
 8 | num_c: 3
 9 | use_sinusoid: false
10 | spatial_softmax_temp: 1.0
11 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_static/r3m.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.perceptual_encoders.vision_r3m.VisionR3M
2 | visual_features: 64
3 | freeze_backbone: True
4 | resnet_model: "resnet18"
5 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_static/resnet.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.perceptual_encoders.vision_resnet.VisionResnet
2 | visual_features: 64
3 | freeze_backbone: True
4 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_static/resnet_aff.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.perceptual_encoders.vision_resnet_aff.VisionResnetAff
2 | visual_features: 64
3 | freeze_backbone: True
4 | input_shape: [200, 200, 3]
5 | depth: 3
6 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/rgb_static/vision_conv.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.perceptual_encoders.vision_network_conv.VisionNetworkConv
2 | activation_function: ReLU #ELU
3 | dropout_vis_fc: 0.0
4 | l2_normalize_output: false
5 | visual_features: 64
6 | num_c: 3
7 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/state_decoder/default.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.auxiliary_loss_networks.StateDecoder
2 | visual_features: 64
3 | n_state_obs: 8
4 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/state_decoder/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/state_decoder/none.yaml


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/static_RGBD.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders
 2 | _recursive_: false
 3 | 
 4 | defaults:
 5 |  - rgb_static: default
 6 |  - rgb_gripper: none
 7 |  - depth_static: default
 8 |  - depth_gripper: none
 9 |  - proprio: none
10 |  - tactile: none
11 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/static_RGB_tactile.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.perceptual_encoders.concat_encoders.ConcatEncoders
 2 | _recursive_: false
 3 | 
 4 | defaults:
 5 |  - rgb_static: default
 6 |  - rgb_gripper: none
 7 |  - depth_static: none
 8 |  - depth_gripper: none
 9 |  - proprio: none
10 |  - tactile: default
11 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/tactile/default.yaml:
--------------------------------------------------------------------------------
1 | _target_: calvin.models.perceptual_encoders.tactile_encoder.TactileEncoder
2 | visual_features: 64
3 | 


--------------------------------------------------------------------------------
/conf/model/perceptual_encoder/tactile/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/perceptual_encoder/tactile/none.yaml


--------------------------------------------------------------------------------
/conf/model/plan_proposal/default.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.plan_encoders.plan_proposal_net.PlanProposalNetwork
2 | perceptual_features: ???
3 | latent_goal_features: ${model.visual_goal.latent_goal_features}
4 | plan_features: ???
5 | activation_function: ReLU #ELU
6 | hidden_size: 2048
7 | 


--------------------------------------------------------------------------------
/conf/model/plan_recognition/bilstm.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.plan_encoders.plan_recognition_net.PlanRecognitionBiLSTMNetwork
2 | in_features: ???
3 | plan_features: 256
4 | action_space: ${datamodule.action_space}
5 | birnn_dropout_p: 0.0
6 | 


--------------------------------------------------------------------------------
/conf/model/plan_recognition/birnn.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.plan_encoders.plan_recognition_net.PlanRecognitionBiRNNNetwork
2 | in_features: ???
3 | plan_features: 256
4 | action_space: ${datamodule.action_space}
5 | birnn_dropout_p: 0.0
6 | 


--------------------------------------------------------------------------------
/conf/model/plan_recognition/transformers.yaml:
--------------------------------------------------------------------------------
 1 | _target_: hulc2.models.plan_encoders.plan_recognition_net.PlanRecognitionTransformersNetwork
 2 | num_heads: 8
 3 | num_layers: 2
 4 | encoder_hidden_size: 2048
 5 | fc_hidden_size: 4096
 6 | in_features: ??
 7 | plan_features: ???
 8 | action_space: ${datamodule.action_space}
 9 | dropout_p: 0.1
10 | encoder_normalize: false
11 | positional_normalize: false
12 | position_embedding: true
13 | max_position_embeddings: ${datamodule.datasets.lang_dataset.max_window_size}
14 | 


--------------------------------------------------------------------------------
/conf/model/proj_vis_lang/default.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.auxiliary_loss_networks.proj_vis_lang.ProjVisLang
2 | im_dim: ${model.plan_recognition.fc_hidden_size}
3 | lang_dim: ${model.language_goal.latent_goal_features}
4 | output_dim: ${model.language_goal.latent_goal_features}
5 | proj_lang: true
6 | 


--------------------------------------------------------------------------------
/conf/model/proj_vis_lang/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/model/proj_vis_lang/none.yaml


--------------------------------------------------------------------------------
/conf/model/real_world_hulc++.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - perceptual_encoder: gripper_cam
 3 |   - plan_proposal: default
 4 |   - plan_recognition: transformers
 5 |   - distribution:  discrete
 6 |   - visual_goal: default
 7 |   - language_encoder: sbert
 8 |   - language_goal: default
 9 |   - action_decoder: logistic_decoder_rnn_real_world
10 |   - optimizer: adam
11 |   - lr_scheduler: constant
12 |   - proj_vis_lang: none
13 | 
14 | _target_: hulc2.models.hulc2.Hulc2
15 | _recursive_: false
16 | 
17 | kl_beta: ${loss.kl_beta}
18 | kl_balancing_mix: ${loss.kl_balancing_mix}
19 | replan_freq: 30
20 | use_clip_auxiliary_loss: false
21 | clip_auxiliary_loss_beta: ${loss.clip_auxiliary_loss_beta}
22 | 


--------------------------------------------------------------------------------
/conf/model/sbert.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.encoders.language_network.SBert
2 | freeze_backbone: True
3 | nlp_model: "paraphrase-MiniLM-L3-v2"
4 | 


--------------------------------------------------------------------------------
/conf/model/visual_goal/default.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.models.encoders.goal_encoders.VisualGoalEncoder
2 | in_features: ???
3 | hidden_size: 2048
4 | latent_goal_features: 32
5 | l2_normalize_goal_embeddings: False
6 | activation_function: ReLU #ELU
7 | 


--------------------------------------------------------------------------------
/conf/paths/general_paths.yaml:
--------------------------------------------------------------------------------
1 | parent_folder: ../
2 | vr_data: ${paths.parent_folder}/VREnv/data/
3 | 
4 | # Trained affordance models
5 | trained_models: ${paths.vapo_path}/trained_models/
6 | 
7 | # For training affordance model and policy
8 | datasets: /tmp/datasets/
9 | 


--------------------------------------------------------------------------------
/conf/simulation/agent/base.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.agents.base_agent.BaseAgent
2 | _recursive_: False
3 | 


--------------------------------------------------------------------------------
/conf/simulation/agent/baseline.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.agents.lmp_agent.PlayLMPAgent
2 | _recursive_: False
3 | dataset_path: ${paths.datasets}/unprocessed/task_D_D
4 | checkpoint:
5 |   train_folder: ./trained_agents/D_D_static_rgb_baseline
6 |   model_name: mcil_baseline.ckpt
7 | offset: ${gripper_offset}
8 | 


--------------------------------------------------------------------------------
/conf/simulation/agent/play_lmp.yaml:
--------------------------------------------------------------------------------
1 | _target_: hulc2.agents.lmp_agent.PlayLMPAgent
2 | _recursive_: False
3 | dataset_path: ${paths.datasets}/unprocessed/task_D_D
4 | move_outside: False
5 | checkpoint:
6 |   train_folder: ./trained_agents/lfp
7 |   model_name: epoch=30.ckpt
8 | offset: ${gripper_offset}
9 | 


--------------------------------------------------------------------------------
/conf/simulation/cameras/cameras/gripper.yaml:
--------------------------------------------------------------------------------
1 | _target_: calvin_env.camera.gripper_camera.GripperCamera
2 | name: gripper
3 | fov: 75
4 | aspect: 1
5 | nearval: 0.01
6 | farval: 2
7 | width: 84
8 | height: 84
9 | 


--------------------------------------------------------------------------------
/conf/simulation/cameras/cameras/opposing.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.camera.static_camera.StaticCamera
 2 | name: opposing
 3 | fov: 75
 4 | aspect: 1
 5 | nearval: 0.01
 6 | farval: 2
 7 | width: 200
 8 | height: 200
 9 | look_at: [ 0.4, 0.5, 0.6 ]
10 | look_from: [ 0.4, 1.5, 0.9 ]
11 | 


--------------------------------------------------------------------------------
/conf/simulation/cameras/cameras/static.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.camera.static_camera.StaticCamera
 2 | name: static
 3 | fov: 10
 4 | aspect: 1
 5 | nearval: 0.01
 6 | farval: 10
 7 | width: 300
 8 | height: 300
 9 | look_at: [ -0.026242351159453392, -0.0302329882979393, 0.3920000493526459]
10 | look_from: [ 2.871459009488717, -2.166602199425597, 2.555159848480571]
11 | up_vector: [ 0.4041403970338857, 0.22629790978217404, 0.8862616969685161]
12 | 


--------------------------------------------------------------------------------
/conf/simulation/cameras/cameras/static_calvin.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.camera.static_camera.StaticCamera
 2 | name: static
 3 | fov: 10
 4 | aspect: 1
 5 | nearval: 0.01
 6 | farval: 10
 7 | width: 200
 8 | height: 200
 9 | look_at: [-0.026242351159453392, -0.0302329882979393, 0.3920000493526459]
10 | look_from: [2.871459009488717, -2.166602199425597, 2.555159848480571]
11 | up_vector: [0.4041403970338857, 0.22629790978217404, 0.8862616969685161]
12 | 


--------------------------------------------------------------------------------
/conf/simulation/cameras/cameras/tactile.yaml:
--------------------------------------------------------------------------------
1 | _target_: calvin_env.camera.tactile_sensor.TactileSensor
2 | name: tactile
3 | width: 120
4 | height: 160
5 | digit_link_ids: [10, 12] # ${robot.digit_link_ids}
6 | visualize_gui: true
7 | config_path: conf/digit_sensor/config_digit.yml
8 | 


--------------------------------------------------------------------------------
/conf/simulation/cameras/high_res.yaml:
--------------------------------------------------------------------------------
 1 | static:
 2 |   _target_: calvin_env.camera.static_camera.StaticCamera
 3 |   name: static
 4 |   fov: 10
 5 |   aspect: 1
 6 |   nearval: 0.01
 7 |   farval: 10
 8 |   width: 500
 9 |   height: 500
10 |   look_at: [-0.026242351159453392, -0.0302329882979393, 0.3920000493526459]
11 |   look_from: [ 2.871459009488717, -2.166602199425597, 2.555159848480571]
12 |   up_vector: [ 0.4041403970338857, 0.22629790978217404, 0.8862616969685161]
13 | 
14 | gripper:
15 |   _target_: calvin_env.camera.gripper_camera.GripperCamera
16 |   name: gripper
17 |   fov: 75
18 |   aspect: 1
19 |   nearval: 0.01
20 |   farval: 2
21 |   width: 300
22 |   height: 300
23 | 


--------------------------------------------------------------------------------
/conf/simulation/cameras/no_cameras.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/conf/simulation/cameras/no_cameras.yaml


--------------------------------------------------------------------------------
/conf/simulation/cameras/static_and_gripper.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - cameras@static: static
3 |   - cameras@gripper: gripper
4 | 


--------------------------------------------------------------------------------
/conf/simulation/cameras/static_and_gripper_calvin.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - cameras@static: static_calvin
3 |   - cameras@gripper: gripper
4 | 


--------------------------------------------------------------------------------
/conf/simulation/cameras/static_and_tactile.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - cameras@static: static
3 |   - cameras@tactile: tactile
4 | 


--------------------------------------------------------------------------------
/conf/simulation/env/env.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.calvin_env.envs.play_table_env.PlayTableSimEnv
 2 | _recursive_: false
 3 | cameras: ${cameras}
 4 | seed: 0
 5 | bullet_time_step: 240.0
 6 | use_vr: False
 7 | show_gui: False
 8 | robot_cfg: ${robot}
 9 | scene_cfg: ${scene}
10 | use_scene_info: false
11 | use_egl: true
12 | control_freq: 30
13 | 


--------------------------------------------------------------------------------
/conf/simulation/robot/panda.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.robot.robot.Robot
 2 | filename: franka_panda/panda.urdf
 3 | base_position: ${scene.robot_base_position}
 4 | base_orientation: ${scene.robot_base_orientation}
 5 | initial_joint_positions: ${scene.robot_initial_joint_positions}
 6 | max_joint_force: 200.0
 7 | gripper_force: 200
 8 | arm_joint_ids: [0, 1, 2, 3, 4, 5, 6]
 9 | gripper_joint_ids: [9, 10]
10 | gripper_joint_limits: [0, 0.04]
11 | tcp_link_id: 13
12 | end_effector_link_id: 7
13 | gripper_cam_link: 12
14 | use_nullspace: false
15 | max_velocity: 2
16 | use_ik_fast: false
17 | magic_scaling_factor_pos: 1 # 1.6
18 | magic_scaling_factor_orn: 1 # 2.2
19 | use_target_pose: true
20 | euler_obs: true
21 | # workspace_limits: [[-0.25, 0.2, 0.61], [0.9, 1, 1.2]]
22 | max_rel_pos: 0.03
23 | max_rel_orn: 0.1
24 | 


--------------------------------------------------------------------------------
/conf/simulation/robot/panda_digit.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - panda
3 | 
4 | filename: franka_panda/panda_digit.urdf
5 | gripper_joint_ids: [9, 11]
6 | tcp_link_id: 15
7 | 


--------------------------------------------------------------------------------
/conf/simulation/robot/panda_longer_finger.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - panda
3 | 
4 | filename: franka_panda/panda_longer_finger.urdf
5 | gripper_joint_ids: [9, 11]
6 | tcp_link_id: 15
7 | 


--------------------------------------------------------------------------------
/conf/simulation/scene/calvin_scene_A.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene
 2 | _recursive_: false
 3 | data_path: ${data_path}
 4 | global_scaling: 0.8
 5 | euler_obs: ${robot.euler_obs}
 6 | robot_base_position: [-0.34, -0.46, 0.24]
 7 | robot_base_orientation: [0, 0, 0]
 8 | robot_initial_joint_positions: [-1.21779206,  1.03987646,  2.11978261, -2.34205014, -0.87015947, 1.64119353,  0.55344866]
 9 | surfaces:
10 |   table: [[-0.2, -0.15, 0.46], [0.35, -0.03, 0.46]]
11 |   slider_left: [[-0.32, 0.05, 0.46], [-0.16, 0.12, 0.46]]
12 |   slider_right: [[-0.05, 0.05, 0.46], [0.13, 0.12, 0.46]]
13 | objects:
14 |   fixed_objects:
15 |     table:
16 |       file: calvin_table_A/urdf/calvin_table_A.urdf
17 |       initial_pos: [0, 0, 0]
18 |       initial_orn: [0, 0, 0]
19 |       joints:
20 |         base__slide:
21 |           initial_state: 0 # Prismatic
22 |         base__drawer:
23 |           initial_state: 0 # Prismatic
24 |       buttons:
25 |         base__button:
26 |           initial_state: 0 # Prismatic
27 |           effect: led
28 |       switches:
29 |         base__switch:
30 |           initial_state: 0 # Revolute
31 |           effect: lightbulb
32 |       lights:
33 |         lightbulb:
34 |           link: light_link
35 |           color: [1, 1, 0, 1] # yellow
36 |         led:
37 |           link: led_link
38 |           color: [0, 1, 0, 1] # green
39 |   movable_objects:
40 |     block_red:
41 |       file: blocks/block_red_middle.urdf
42 |       initial_pos: any
43 |       initial_orn: any
44 |     block_blue:
45 |       file: blocks/block_blue_big.urdf
46 |       initial_pos: any
47 |       initial_orn: any
48 |     block_pink:
49 |       file: blocks/block_pink_small.urdf
50 |       initial_pos: any
51 |       initial_orn: any
52 | 


--------------------------------------------------------------------------------
/conf/simulation/scene/calvin_scene_A_eval.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene
 2 | _recursive_: false
 3 | data_path: ${data_path}
 4 | global_scaling: 0.8
 5 | euler_obs: ${robot.euler_obs}
 6 | robot_base_position: [-0.34, -0.46, 0.24]
 7 | robot_base_orientation: [0, 0, 0]
 8 | robot_initial_joint_positions: [-1.21779206,  1.03987646,  2.11978261, -2.34205014, -0.87015947, 1.64119353,  0.55344866]
 9 | surfaces:
10 |   table: [[0.0, -0.15, 0.46], [0.35, -0.03, 0.46]]
11 |   slider_left: [[-0.32, 0.05, 0.46], [-0.16, 0.12, 0.46]]
12 |   slider_right: [[-0.05, 0.05, 0.46], [0.13, 0.12, 0.46]]
13 | objects:
14 |   fixed_objects:
15 |     table:
16 |       file: calvin_table_A/urdf/calvin_table_A.urdf
17 |       initial_pos: [0, 0, 0]
18 |       initial_orn: [0, 0, 0]
19 |       joints:
20 |         base__slide:
21 |           initial_state: 0 # Prismatic
22 |         base__drawer:
23 |           initial_state: 0 # Prismatic
24 |       buttons:
25 |         base__button:
26 |           initial_state: 0 # Prismatic
27 |           effect: led
28 |       switches:
29 |         base__switch:
30 |           initial_state: 0 # Revolute
31 |           effect: lightbulb
32 |       lights:
33 |         lightbulb:
34 |           link: light_link
35 |           color: [1, 1, 0, 1] # yellow
36 |         led:
37 |           link: led_link
38 |           color: [0, 1, 0, 1] # green
39 |   movable_objects:
40 |     block_red:
41 |       file: blocks/block_red_middle.urdf
42 |       initial_pos: [0, -0.12, 0.46]
43 |       initial_orn: [0, 0, 1.57]
44 |     block_blue:
45 |       file: blocks/block_blue_small.urdf
46 |       initial_pos: [0.2, -0.12, 0.46]
47 |       initial_orn: [0, 0, 0]
48 |     block_pink:
49 |       file: blocks/block_pink_big.urdf
50 |       initial_pos: [0.10, 0.08, 0.46]
51 |       initial_orn: [0, 0, 1.57]
52 | 


--------------------------------------------------------------------------------
/conf/simulation/scene/calvin_scene_B.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene
 2 | _recursive_: false
 3 | data_path: ${data_path}
 4 | global_scaling: 0.8
 5 | euler_obs: ${robot.euler_obs}
 6 | robot_base_position: [-0.34, -0.46, 0.24]
 7 | robot_base_orientation: [0, 0, 0]
 8 | robot_initial_joint_positions: [-1.21779206,  1.03987646,  2.11978261, -2.34205014, -0.87015947, 1.64119353,  0.55344866]
 9 | surfaces:
10 |   table: [[-0.35, -0.15, 0.46], [0.15, -0.03, 0.46]]
11 |   slider_left: [[-0.12, 0.05, 0.46], [0.06, 0.12, 0.46]]
12 |   slider_right: [[0.15, 0.05, 0.46], [0.33, 0.12, 0.46]]
13 | objects:
14 |   fixed_objects:
15 |     table:
16 |       file: calvin_table_B/urdf/calvin_table_B.urdf
17 |       initial_pos: [0, 0, 0]
18 |       initial_orn: [0, 0, 0]
19 |       joints:
20 |         base__slide:
21 |           initial_state: 0 # Prismatic
22 |         base__drawer:
23 |           initial_state: 0 # Prismatic
24 |       buttons:
25 |         base__button:
26 |           initial_state: 0 # Prismatic
27 |           effect: led
28 |       switches:
29 |         base__switch:
30 |           initial_state: 0 # Revolute
31 |           effect: lightbulb
32 |       lights:
33 |         lightbulb:
34 |           link: light_link
35 |           color: [1, 1, 0, 1] # yellow
36 |         led:
37 |           link: led_link
38 |           color: [0, 1, 0, 1] # green
39 |   movable_objects:
40 |     block_red:
41 |       file: blocks/block_red_small.urdf
42 |       initial_pos: any
43 |       initial_orn: any
44 |     block_blue:
45 |       file: blocks/block_blue_big.urdf
46 |       initial_pos: any
47 |       initial_orn: any
48 |     block_pink:
49 |       file: blocks/block_pink_middle.urdf
50 |       initial_pos: any
51 |       initial_orn: any
52 | 


--------------------------------------------------------------------------------
/conf/simulation/scene/calvin_scene_C.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene
 2 | _recursive_: false
 3 | data_path: ${data_path}
 4 | global_scaling: 0.8
 5 | euler_obs: ${robot.euler_obs}
 6 | robot_base_position: [-0.34, -0.46, 0.24]
 7 | robot_base_orientation: [0, 0, 0]
 8 | robot_initial_joint_positions: [-1.21779206,  1.03987646,  2.11978261, -2.34205014, -0.87015947, 1.64119353,  0.55344866]
 9 | surfaces:
10 |   table: [[0.0, -0.15, 0.46], [0.35, -0.03, 0.46]]
11 |   slider_left: [[-0.12, 0.05, 0.46], [0.06, 0.12, 0.46]]
12 |   slider_right: [[0.15, 0.05, 0.46], [0.3, 0.12, 0.46]]
13 | objects:
14 |   fixed_objects:
15 |     table:
16 |       file: calvin_table_C/urdf/calvin_table_C.urdf
17 |       initial_pos: [0, 0, 0]
18 |       initial_orn: [0, 0, 0]
19 |       joints:
20 |         base__slide:
21 |           initial_state: 0 # Prismatic
22 |         base__drawer:
23 |           initial_state: 0 # Prismatic
24 |       buttons:
25 |         base__button:
26 |           initial_state: 0 # Prismatic
27 |           effect: led
28 |       switches:
29 |         base__switch:
30 |           initial_state: 0 # Revolute
31 |           effect: lightbulb
32 |       lights:
33 |         lightbulb:
34 |           link: light_link
35 |           color: [1, 1, 0, 1] # yellow
36 |         led:
37 |           link: led_link
38 |           color: [0, 1, 0, 1] # green
39 |   movable_objects:
40 |     block_red:
41 |       file: blocks/block_red_big.urdf
42 |       initial_pos: any
43 |       initial_orn: any
44 |     block_blue:
45 |       file: blocks/block_blue_small.urdf
46 |       initial_pos: any
47 |       initial_orn: any
48 |     block_pink:
49 |       file: blocks/block_pink_middle.urdf
50 |       initial_pos: any
51 |       initial_orn: any
52 | 


--------------------------------------------------------------------------------
/conf/simulation/scene/calvin_scene_D.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene
 2 | _recursive_: false
 3 | data_path: ${data_path}
 4 | global_scaling: 0.8
 5 | euler_obs: ${robot.euler_obs}
 6 | robot_base_position: [-0.34, -0.46, 0.24]
 7 | robot_base_orientation: [0, 0, 0]
 8 | robot_initial_joint_positions: [-1.2230011780331578, 1.322365213449541, 2.6154021466198802, -2.811095767734293, -0.5087544766657718, 1.531250568385011, 0.969207720370319]
 9 | surfaces:
10 |   table: [[0.0, -0.15, 0.46], [0.35, -0.03, 0.46]]
11 |   slider_left: [[-0.32, 0.05, 0.46], [-0.16, 0.12, 0.46]]
12 |   slider_right: [[-0.05, 0.05, 0.46], [0.13, 0.12, 0.46]]
13 | objects:
14 |   fixed_objects:
15 |     table:
16 |       file: calvin_table_D/urdf/calvin_table_D.urdf
17 |       initial_pos: [0, 0, 0]
18 |       initial_orn: [0, 0, 0]
19 |       joints:
20 |         base__slide:
21 |           initial_state: 0 # Prismatic
22 |         base__drawer:
23 |           initial_state: 0 # Prismatic
24 |       buttons:
25 |         base__button:
26 |           initial_state: 0 # Prismatic
27 |           effect: led
28 |       switches:
29 |         base__switch:
30 |           initial_state: 0 # Revolute
31 |           effect: lightbulb
32 |       lights:
33 |         lightbulb:
34 |           link: light_link
35 |           color: [1, 1, 0, 1] # yellow
36 |         led:
37 |           link: led_link
38 |           color: [0, 1, 0, 1] # green
39 |   movable_objects:
40 |     block_red:
41 |       file: blocks/block_red_middle.urdf
42 |       initial_pos: any
43 |       initial_orn: any
44 |     block_blue:
45 |       file: blocks/block_blue_small.urdf
46 |       initial_pos: any
47 |       initial_orn: any
48 |     block_pink:
49 |       file: blocks/block_pink_big.urdf
50 |       initial_pos: any
51 |       initial_orn: any
52 | 


--------------------------------------------------------------------------------
/conf/simulation/scene/calvin_scene_D_eval.yaml:
--------------------------------------------------------------------------------
 1 | _target_: calvin_env.scene.play_table_scene.PlayTableScene
 2 | _recursive_: false
 3 | data_path: ${data_path}
 4 | global_scaling: 0.8
 5 | euler_obs: ${robot.euler_obs}
 6 | robot_base_position: [-0.34, -0.46, 0.24]
 7 | robot_base_orientation: [0, 0, 0]
 8 | robot_initial_joint_positions: [-1.21779206,  1.03987646,  2.11978261, -2.34205014, -0.87015947, 1.64119353,  0.55344866]
 9 | surfaces:
10 |   table: [[0.0, -0.15, 0.46], [0.35, -0.03, 0.46]]
11 |   slider_left: [[-0.32, 0.05, 0.46], [-0.16, 0.12, 0.46]]
12 |   slider_right: [[-0.05, 0.05, 0.46], [0.13, 0.12, 0.46]]
13 | objects:
14 |   fixed_objects:
15 |     table:
16 |       file: calvin_table_D/urdf/calvin_table_D.urdf
17 |       initial_pos: [0, 0, 0]
18 |       initial_orn: [0, 0, 0]
19 |       joints:
20 |         base__slide:
21 |           initial_state: 0 # Prismatic
22 |         base__drawer:
23 |           initial_state: 0 # Prismatic
24 |       buttons:
25 |         base__button:
26 |           initial_state: 0 # Prismatic
27 |           effect: led
28 |       switches:
29 |         base__switch:
30 |           initial_state: 0 # Revolute
31 |           effect: lightbulb
32 |       lights:
33 |         lightbulb:
34 |           link: light_link
35 |           color: [1, 1, 0, 1] # yellow
36 |         led:
37 |           link: led_link
38 |           color: [0, 1, 0, 1] # green
39 |   movable_objects:
40 |     block_red:
41 |       file: blocks/block_red_middle.urdf
42 |       initial_pos: [0.05, -0.12, 0.46]
43 |       initial_orn: [0, 0, 1.57]
44 |     block_blue:
45 |       file: blocks/block_blue_small.urdf
46 |       initial_pos: [0.23, -0.12, 0.46]
47 |       initial_orn: [0, 0, 0]
48 |     block_pink:
49 |       file: blocks/block_pink_big.urdf
50 |       initial_pos: [0.10, 0.08, 0.46]
51 |       initial_orn: [0, 0, 1.57]
52 | 


--------------------------------------------------------------------------------
/conf/trainer/play_trainer.yaml:
--------------------------------------------------------------------------------
1 | accelerator: gpu
2 | devices: 1
3 | precision: 16
4 | max_epochs: 100
5 | sync_batchnorm: false
6 | 


--------------------------------------------------------------------------------
/conf/training/default_training.yaml:
--------------------------------------------------------------------------------
1 | lr: 0.0002
2 | 


--------------------------------------------------------------------------------
/conf/utils/combine_dataset.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | defaults:
 3 |   - _self_
 4 | 
 5 | src_dirs:
 6 |   - "/work/dlclarge2/roseteb-thesis/dataset/validation"
 7 |   - "/work/dlclarge2/roseteb-thesis/dataset/erick_data3"
 8 | 
 9 | dest: "/work/dlclarge2/roseteb-thesis/dataset/new_validation"
10 | 


--------------------------------------------------------------------------------
/dataset/download_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Download, Unzip, and Remove zip
 4 | if [ "$1" = "D" ]
 5 | then
 6 | 
 7 |     echo "Downloading task_D_D ..."
 8 |     wget http://calvin.cs.uni-freiburg.de/dataset/task_D_D.zip
 9 |     unzip task_D_D.zip && rm task_D_D.zip
10 |     echo "saved folder: task_D_D"
11 | elif [ "$1" = "ABC" ]
12 | then
13 | 
14 |     echo "Downloading task_ABC_D ..."
15 |     wget http://calvin.cs.uni-freiburg.de/dataset/task_ABC_D.zip
16 |     unzip task_ABC_D.zip && rm task_ABC_D.zip
17 |     echo "saved folder: task_ABC_D"
18 | 
19 | elif [ "$1" = "ABCD" ]
20 | then
21 | 
22 |     echo "Downloading task_ABCD_D ..."
23 |     wget http://calvin.cs.uni-freiburg.de/dataset/task_ABCD_D.zip
24 |     unzip task_ABCD_D.zip && rm task_ABCD_D.zip
25 |     echo "saved folder: task_ABCD_D"
26 | 
27 | elif [ "$1" = "debug" ]
28 | then
29 | 
30 |     echo "Downloading debug dataset ..."
31 |     wget http://calvin.cs.uni-freiburg.de/dataset/calvin_debug_dataset.zip
32 |     unzip calvin_debug_dataset.zip && rm calvin_debug_dataset.zip
33 |     echo "saved folder: calvin_debug_dataset"
34 | 
35 | 
36 | else
37 |     echo "Failed: Usage download_data.sh D | ABC | ABCD | debug"
38 |     exit 1
39 | fi
40 | 


--------------------------------------------------------------------------------
/hulc2/.gitignore:
--------------------------------------------------------------------------------
 1 | data
 2 | play_data/
 3 | __pycache__/
 4 | relay-policy-learning/
 5 | puppet/
 6 | mjrl/
 7 | results/
 8 | runs/
 9 | analysis/videos/
10 | analysis/tsne_results/proposal_clusters/
11 | analysis/tsne_results/unseen_data_collection/
12 | 


--------------------------------------------------------------------------------
/hulc2/__init__.py:
--------------------------------------------------------------------------------
 1 | """'Learning from Play implementation in pytorch
 2 | :copyright: 2020 by Oier Mees
 3 | :license: GPLv3, see LICENSE for more details.
 4 | """
 5 | 
 6 | __version__ = "0.0.1"
 7 | __project__ = "hulc2"
 8 | __author__ = "Oier Mees"
 9 | __license__ = "GPLv3"
10 | __email__ = "meeso@informatik.uni-freiburg.de"
11 | 


--------------------------------------------------------------------------------
/hulc2/affordance/base_detector.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | 
 5 | class BaseDetector:
 6 |     def __init__(self, cfg, *args, **kwargs):
 7 |         self.n_classes = 1
 8 |         cm = plt.get_cmap("jet")
 9 |         self._colors = cm(np.linspace(0, 1, self.n_classes))
10 |         self.clusters = {}
11 | 
12 |     @property
13 |     def colors(self):
14 |         return self._colors
15 | 
16 |     @colors.setter
17 |     def colors(self, value):
18 |         self._colors = value
19 | 
20 |     def predict(self, new_point):
21 |         return 0
22 | 


--------------------------------------------------------------------------------
/hulc2/affordance/dataset_creation/create_percentage_data_splits.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import os
 4 | 
 5 | from hulc2.utils.utils import get_abspath, split_by_percentage
 6 | 
 7 | 
 8 | def main(args):
 9 |     root_dir = get_abspath(args.root_dir)
10 |     json_file = os.path.join(root_dir, "episodes_split.json")
11 |     data_percent = [0.75, 0.50, 0.25]
12 | 
13 |     with open(json_file) as f:
14 |         episodes_split = json.load(f)
15 | 
16 |     for percentage in data_percent:
17 |         episodes_split_percentage = split_by_percentage(root_dir, episodes_split, percentage)
18 |         jsons_filename = root_dir + "/episodes_split_%s.json" % str(percentage * 100)
19 |         with open(jsons_filename, "w") as outfile:
20 |             json.dump(episodes_split_percentage, outfile, indent=2)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     parser = argparse.ArgumentParser(description="create episodes_split.json for different percentage of original data")
25 |     parser.add_argument("--root_dir", default=None, type=str, help="path to processed dataset")
26 |     args = parser.parse_args()
27 |     main(args)
28 | 


--------------------------------------------------------------------------------
/hulc2/affordance/dataset_creation/merge_datasets.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import os
 4 | from pathlib import Path
 5 | 
 6 | import yaml
 7 | 
 8 | 
 9 | def to_abs(path):
10 |     if os.path.isabs(path):
11 |         return path
12 |     else:
13 |         repo_src_dir = Path(__file__).absolute().parents[1]
14 |         return os.path.abspath(repo_src_dir / path)
15 | 
16 | 
17 | def parse_args():
18 |     parser = argparse.ArgumentParser(description="Process some integers.")
19 |     parser.add_argument("--output_dir", type=str, default="", help="directory to output merged episodes_split.json")
20 | 
21 |     args = parser.parse_args()
22 |     cfg_path = to_abs("../../conf/affordance/cfg_merge_dataset.yaml")
23 |     with open(cfg_path, "r") as stream:
24 |         directory_list = yaml.safe_load(stream)["data_lst"]
25 | 
26 |     if args.output_dir == "":
27 |         output_dir = to_abs(os.path.dirname(directory_list[0]))
28 |     else:
29 |         output_dir = to_abs(args.output_dir)
30 | 
31 |     print("Writing to %s " % output_dir)
32 |     return output_dir, directory_list
33 | 
34 | 
35 | # Merge datasets using json files
36 | def merge_datasets():
37 |     output_dir, directory_list = parse_args()
38 | 
39 |     new_data = {"training": {}, "validation": {}}
40 |     for dir in directory_list:
41 |         abs_dir = os.path.abspath(dir)
42 |         json_path = os.path.join(abs_dir, "episodes_split.json")
43 |         with open(json_path) as f:
44 |             data = json.load(f)
45 | 
46 |         # Rename episode numbers if repeated
47 |         data_keys = list(data.keys())
48 |         split_keys = ["validation", "training"]
49 |         other_keys = [k for k in data_keys if k not in split_keys]
50 |         episode = 0
51 |         for split in split_keys:
52 |             dataset_name = os.path.basename(os.path.normpath(dir))
53 |             for key in data[split].keys():
54 |                 new_data[split]["/%s/%s" % (dataset_name, key)] = data[split][key]
55 |                 episode += 1
56 |         for key in other_keys:
57 |             new_data[key] = data[key]
58 |     # Write output
59 |     if not os.path.exists(output_dir):
60 |         os.makedirs(output_dir)
61 |     out_file = os.path.join(output_dir, "episodes_split.json")
62 |     with open(out_file, "w") as outfile:
63 |         json.dump(new_data, outfile, indent=2)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     merge_datasets()
68 | 


--------------------------------------------------------------------------------
/hulc2/affordance/models/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/affordance/models/core/__init__.py


--------------------------------------------------------------------------------
/hulc2/affordance/models/core/language_network.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | import numpy as np
 4 | from sentence_transformers import SentenceTransformer
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class SBert(nn.Module):
10 |     def __init__(self, weights):
11 |         super().__init__()
12 |         self.model = SentenceTransformer(weights)
13 | 
14 |     def forward(self, x: List, show_progress_bar: bool = False) -> torch.Tensor:
15 |         emb = self.model.encode(x, convert_to_tensor=True, show_progress_bar=show_progress_bar)
16 |         return torch.unsqueeze(emb, 1)
17 | 


--------------------------------------------------------------------------------
/hulc2/affordance/models/lang_fusion/one_stream_attention_lang_fusion_mask.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from hulc2.affordance.models.lang_fusion.one_stream_attention_lang_fusion_pixel import AttentionLangFusionPixel
 7 | 
 8 | 
 9 | class AttentionLangFusionMask(AttentionLangFusionPixel):
10 |     def __init__(self, *args, **kwargs):
11 |         super().__init__(*args, **kwargs)
12 |         # self.output_dim = out_channels = n_classes
13 |         if self.output_dim > 1:
14 |             # Softmax over channels
15 |             self.act_fnc = torch.nn.Softmax(1)
16 |         else:
17 |             self.act_fnc = torch.nn.Sigmoid()
18 | 
19 |     def forward(self, inp_img, lang_goal, softmax=True):
20 |         """Forward pass."""
21 |         in_data = F.pad(inp_img, self.padding, mode="constant")
22 |         in_tens = in_data.to(dtype=torch.float)  # [B 3 H W]
23 | 
24 |         # Forward pass.
25 |         aff_out, info = self.attend(in_tens, lang_goal)
26 |         if softmax:
27 |             aff_out = self.act_fnc(aff_out)
28 | 
29 |         c0 = np.array([self.padding[2], self.padding[0]])  # top(H), left(W)
30 |         c1 = c0 + inp_img.shape[2:]
31 |         aff_out = aff_out[:, :, c0[0] : c1[0], c0[1] : c1[1]]
32 | 
33 |         info["affordance"] = aff_out
34 |         return info
35 | 


--------------------------------------------------------------------------------
/hulc2/affordance/models/lang_fusion/one_stream_attention_lang_fusion_pixel.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | import hulc2.models as models
 7 | 
 8 | 
 9 | class AttentionLangFusionPixel(nn.Module):
10 |     def __init__(self, stream_fcn, in_shape, cfg, device, output_dim=1):
11 |         super().__init__()
12 |         self.fusion_type = cfg.attn_stream_fusion_type
13 |         self.stream_fcn = stream_fcn
14 |         self.cfg = cfg
15 |         self.batchnorm = self.cfg.batchnorm
16 | 
17 |         self.padding = np.zeros((3, 2), dtype=int)  # H, W, C
18 |         max_dim = np.max(in_shape[:2])
19 |         pad = (max_dim - np.array(in_shape[:2])) / 2
20 |         self.padding[:2] = pad.reshape(2, 1)  # H, W, C
21 | 
22 |         in_shape = np.array(in_shape)
23 |         in_shape += np.sum(self.padding, axis=1)
24 |         in_shape = list(in_shape)
25 | 
26 |         # for torch: left, right,(W) top, bottom,(H) front, back(C)
27 |         self.padding = self.padding[[1, 0, 2]]  # C, H, W
28 |         self.padding = tuple(self.padding.flatten())
29 |         self.in_shape = in_shape
30 |         self.output_dim = output_dim
31 |         self._build_nets()
32 | 
33 |     @property
34 |     def decoder_layers(self):
35 |         return self.attn_stream.decoder_layers
36 | 
37 |     def _build_nets(self):
38 |         stream_one_fcn = self.stream_fcn
39 |         stream_one_model = models.lang_img_nets[stream_one_fcn]
40 | 
41 |         self.stream_one = stream_one_model(self.in_shape, self.output_dim, self.cfg)
42 |         print(f"Attn FCN: {stream_one_fcn}")
43 | 
44 |     def attend(self, x, l):
45 |         x = self.stream_one(x, l)
46 |         return x
47 | 
48 |     def forward(self, inp_img, lang_goal, softmax=True):
49 |         """Forward pass."""
50 |         in_data = F.pad(inp_img, self.padding, mode="constant")
51 |         in_tens = in_data.to(dtype=torch.float, device=self.stream_one.device)  # [B 3 H W]
52 | 
53 |         # Forward pass.
54 |         logits, _info = self.attend(in_tens, lang_goal)
55 | 
56 |         c0 = np.array([self.padding[2], self.padding[0]])  # top(H), left(W)
57 |         c1 = c0 + inp_img.shape[2:]
58 |         logits = logits[:, :, c0[0] : c1[0], c0[1] : c1[1]]
59 | 
60 |         logits = logits.permute(0, 2, 3, 1)  # [B H W 1]
61 |         output = logits.reshape(logits.shape[0], np.prod(logits.shape[1:]))
62 |         if softmax:
63 |             output = F.softmax(output, dim=-1)
64 |             output = output.reshape(logits.shape)
65 |         return output, _info
66 | 


--------------------------------------------------------------------------------
/hulc2/affordance/models/language_encoders/base_lang_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from transformers import DistilBertModel, DistilBertTokenizer
 4 | 
 5 | 
 6 | class LangEncoder(nn.Module):
 7 |     def __init__(self, freeze_backbone=True, pretrained=True) -> None:
 8 |         super(LangEncoder, self).__init__()
 9 |         self.freeze_backbone = freeze_backbone
10 |         self.pretrained = pretrained
11 |         self._load_model()
12 | 
13 |     def _load_model(self):
14 |         raise NotImplementedError()
15 | 
16 |     def encode_text(self, x):
17 |         """
18 |         Returns:
19 |             - text_encodings
20 |             - text_embeddings
21 |             - text_mask
22 |         """
23 |         raise NotImplementedError()
24 | 


--------------------------------------------------------------------------------
/hulc2/affordance/models/language_encoders/bert_lang_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from transformers import BertConfig, BertModel, BertTokenizer
 4 | 
 5 | from hulc2.affordance.models.language_encoders.base_lang_encoder import LangEncoder
 6 | 
 7 | 
 8 | class BERTLang(LangEncoder):
 9 |     def __init__(self, freeze_backbone=True, pretrained=True) -> None:
10 |         super(BERTLang, self).__init__(freeze_backbone, pretrained)
11 | 
12 |     def _load_model(self):
13 |         self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
14 |         if self.pretrained:
15 |             self.text_encoder = BertModel.from_pretrained("bert-base-uncased")
16 |         else:
17 |             distilbert_config = BertConfig()
18 |             self.text_encoder = BertModel(distilbert_config)
19 |         _embd_dim = 768
20 |         self.text_fc = nn.Linear(_embd_dim, 1024)
21 | 
22 |     def encode_text(self, x):
23 |         with torch.set_grad_enabled(not self.freeze_backbone):
24 |             inputs = self.tokenizer(x, return_tensors="pt", padding=True, truncation=True)
25 |             input_ids, attention_mask = inputs["input_ids"], inputs["attention_mask"]
26 |             input_ids = input_ids.to(self.text_encoder.device)
27 |             attention_mask = attention_mask.to(self.text_encoder.device)
28 |             text_embeddings = self.text_encoder(input_ids, attention_mask)
29 |             text_encodings = text_embeddings.last_hidden_state.mean(1)
30 | 
31 |         text_feat = self.text_fc(text_encodings)
32 |         text_mask = torch.ones_like(input_ids)  # [1, max_token_len]
33 |         return text_feat, text_embeddings.last_hidden_state, text_mask
34 | 


--------------------------------------------------------------------------------
/hulc2/affordance/models/language_encoders/clip_lang_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from hulc2.affordance.models.core.clip import build_model, load_clip, tokenize
 5 | from hulc2.affordance.models.language_encoders.base_lang_encoder import LangEncoder
 6 | 
 7 | 
 8 | class CLIPLang(LangEncoder):
 9 |     def __init__(self, freeze_backbone=True, pretrained=True) -> None:
10 |         super(CLIPLang, self).__init__(freeze_backbone, pretrained)
11 | 
12 |     def _load_model(self):
13 |         model, _ = load_clip("RN50", jit=False)
14 |         _clip_rn50 = build_model(model.state_dict())
15 |         del model
16 |         if self.freeze_backbone:
17 |             for param in _clip_rn50.parameters():
18 |                 param.requires_grad = False
19 |         #     for param in _clip_rn50.layer4.parameters():
20 |         #         param.requires_grad = True
21 |         else:
22 |             _clip_rn50 = _clip_rn50.float()
23 |         # modules = list(net.children())[:-1]
24 |         self.model = _clip_rn50
25 | 
26 |     def encode_text(self, x):
27 |         with torch.set_grad_enabled(not self.freeze_backbone):
28 |             tokens = tokenize(x)
29 |             tokens = tokens.to(self.model.positional_embedding.device)
30 |             text_feat, text_emb = self.model.encode_text_with_embeddings(tokens)
31 | 
32 |         text_mask = torch.where(tokens == 0, tokens, 1)
33 |         return text_feat, text_emb, text_mask
34 | 


--------------------------------------------------------------------------------
/hulc2/affordance/models/language_encoders/distilbert_lang_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from transformers import DistilBertConfig, DistilBertModel, DistilBertTokenizer
 4 | 
 5 | from hulc2.affordance.models.language_encoders.base_lang_encoder import LangEncoder
 6 | 
 7 | 
 8 | class DistilBERTLang(LangEncoder):
 9 |     def __init__(self, freeze_backbone=True, pretrained=True) -> None:
10 |         super(DistilBERTLang, self).__init__(freeze_backbone, pretrained)
11 | 
12 |     def _load_model(self):
13 |         self.tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
14 |         if self.pretrained:
15 |             self.text_encoder = DistilBertModel.from_pretrained("distilbert-base-uncased")
16 |         else:
17 |             distilbert_config = DistilBertConfig()
18 |             self.text_encoder = DistilBertModel(distilbert_config)
19 |         _embd_dim = 768
20 |         self.text_fc = nn.Linear(_embd_dim, 1024)
21 | 
22 |     def encode_text(self, x):
23 |         with torch.set_grad_enabled(not self.freeze_backbone):
24 |             inputs = self.tokenizer(x, return_tensors="pt", padding=True, truncation=True)
25 |             input_ids, attention_mask = inputs["input_ids"], inputs["attention_mask"]
26 |             input_ids = input_ids.to(self.text_encoder.device)
27 |             attention_mask = attention_mask.to(self.text_encoder.device)
28 |             text_embeddings = self.text_encoder(input_ids, attention_mask)
29 |             text_encodings = text_embeddings.last_hidden_state.mean(1)
30 | 
31 |         text_feat = self.text_fc(text_encodings)
32 |         text_mask = torch.ones_like(input_ids)  # [1, max_token_len]
33 |         return text_feat, text_embeddings.last_hidden_state, text_mask
34 | 


--------------------------------------------------------------------------------
/hulc2/affordance/models/visual_lang_encoders/base_lingunet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class BaseLingunet(nn.Module):
 6 |     """BaseClass with U-Net skip connections and [] language encoder"""
 7 | 
 8 |     def __init__(self, input_shape, output_dim, cfg, *args, **kwargs):
 9 |         super().__init__()
10 |         self.input_shape = input_shape
11 |         self.cfg = cfg
12 |         self.lang_fusion_type = self.cfg["lang_fusion_type"]
13 | 


--------------------------------------------------------------------------------
/hulc2/affordance/run_on_cluster/sbatch_eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Print some information about the job to STDOUT
 3 | echo "Workingdir: $PWD";
 4 | echo "Started at $(date)";
 5 | echo "Running job $SLURM_JOB_NAME";
 6 | echo "cpus per node: $SLURM_JOB_CPUS_PER_NODE";
 7 | echo "gres: $SLURM_GRES";
 8 | echo "mem: $SLURM_MEM_PER_NODE";
 9 | echo "ntasks: $SLURM_NTASKS";
10 | echo "JID $SLURM_JOB_ID on queue $SLURM_JOB_PARTITION";
11 | 
12 | export NCCL_DEBUG=INFO
13 | export PYTHONFAULTHANDLER=1
14 | export HYDRA_FULL_ERROR=1
15 | 
16 | # Job to perform
17 | source ~/.bashrc
18 | conda activate $1
19 | srun python ${@:2}
20 | 
21 | # Print some Information about the end-time to STDOUT
22 | echo "DONE";
23 | echo "Finished at $(date)";
24 | 


--------------------------------------------------------------------------------
/hulc2/affordance/run_on_cluster/sbatch_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Print some information about the job to STDOUT
 3 | echo "Workingdir: $PWD";
 4 | echo "Started at $(date)";
 5 | echo "Running job $SLURM_JOB_NAME";
 6 | echo "cpus per node: $SLURM_JOB_CPUS_PER_NODE";
 7 | echo "gres: $SLURM_GRES";
 8 | echo "mem: $SLURM_MEM_PER_NODE";
 9 | echo "ntasks: $SLURM_NTASKS";
10 | echo "JID $SLURM_JOB_ID on queue $SLURM_JOB_PARTITION";
11 | 
12 | export NCCL_DEBUG=INFO
13 | export PYTHONFAULTHANDLER=1
14 | export HYDRA_FULL_ERROR=1
15 | 
16 | # Job to perform
17 | source ~/.bashrc
18 | conda activate $1
19 | srun python $2 hydra.run.dir=$3 ${@:5}
20 | 
21 | # Print some Information about the end-time to STDOUT
22 | echo "DONE";
23 | echo "Finished at $(date)";
24 | 


--------------------------------------------------------------------------------
/hulc2/affordance/scripts/get_best_eval_model.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import os
 4 | 
 5 | import hydra
 6 | 
 7 | from hulc2.utils.utils import get_abspath
 8 | 
 9 | 
10 | def main(json_file):
11 |     with open(json_file) as f:
12 |         data = json.load(f)
13 |     best_model = max(data, key=lambda v: data[v]["avg_seq_len"])
14 |     print(best_model)
15 |     print(data[best_model]["avg_seq_len"])
16 |     print(data[best_model]["chain_sr"])
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     parser = argparse.ArgumentParser()
21 |     parser.add_argument("-f", "--file", type=str)
22 | 
23 |     args = parser.parse_args()
24 | 
25 |     json_file = get_abspath(args.file)
26 |     main(json_file)
27 | 


--------------------------------------------------------------------------------
/hulc2/affordance/scripts/transform_old_episodes_split.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | import json
 3 | import os
 4 | 
 5 | 
 6 | def read_json(json_file):
 7 |     with open(json_file) as f:
 8 |         data = json.load(f)
 9 |     return data
10 | 
11 | 
12 | def main(root_dir):
13 |     data_old_format = read_json(os.path.join(root_dir, "episodes_split.json"))
14 |     data_new_format = {"training": {}, "validation": {}}
15 | 
16 |     for split in ["training", "validation"]:
17 |         for ep in data_old_format[split]:
18 |             data_new_format[split][ep] = {"gripper_cam": [], "static_cam": []}
19 |             _gripper_data, _static_data = [], []
20 |             for frame in data_old_format[split][ep]:
21 |                 cam_type, _fram_name = frame.split("/")
22 |                 data_new_format[split][ep][cam_type].append(_fram_name)
23 | 
24 |     new_file = os.path.join(root_dir, "episodes_split_new.json")
25 |     with open(new_file, "w") as outfile:
26 |         json.dump(data_new_format, outfile, indent=2)
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     root_dir = "/mnt/ssd_shared/Users/Jessica/Documents/hulc2_ssd/datasets/real_world/500k_all_tasks_dataset_15hz"
31 |     main(root_dir)
32 | 


--------------------------------------------------------------------------------
/hulc2/affordance/test_move_to_pt.py:
--------------------------------------------------------------------------------
 1 | import hydra
 2 | import torch
 3 | 
 4 | from hulc2.env_wrappers.play_aff_lmp_wrapper import PlayLMPWrapper
 5 | 
 6 | 
 7 | @hydra.main(config_path="../../conf", config_name="cfg_high_level")
 8 | def main(cfg):
 9 |     # Load env
10 |     env = hydra.utils.instantiate(cfg.env)
11 |     env = PlayLMPWrapper(env, torch.device("cuda:0"))
12 |     agent = hydra.utils.instantiate(cfg.agent, env=env, aff_cfg=cfg.aff_detection)
13 |     obs = env.reset()
14 | 
15 |     captions = ["Lift the red block", "Stored the grasped block in the cabinet", "turn on the yellow light"]
16 |     for caption in captions:  # n instructions
17 |         # caption = "use the switch to turn on the light bulb" # input("Type an instruction \n")
18 |         # caption = "open the drawer"
19 |         # obs = env.reset()
20 |         agent.reset(caption)
21 |         if agent.model_free.lang_encoder is not None:
22 |             goal = {"lang": [caption]}
23 |         else:
24 |             goal = agent.encode(caption)
25 |         for j in range(cfg.max_timesteps):
26 |             action = agent.step(obs, goal)
27 |             obs, _, _, info = env.step(action)
28 |         agent.save_dir["rollout_counter"] += 1
29 |     agent.save_sequence_txt("sequence", captions)
30 |     agent.save_sequence()
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 


--------------------------------------------------------------------------------
/hulc2/affordance/utils/data_utils.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | import os
 3 | 
 4 | import hydra
 5 | import numpy as np
 6 | from scipy.spatial.transform.rotation import Rotation as R
 7 | 
 8 | 
 9 | def split_by_percentage(root_dir, episodes_split, data_percent):
10 |     new_episodes_split = deepcopy(episodes_split)
11 | 
12 |     # Change training split
13 |     split = "training"
14 |     # Get original data start end ids
15 |     start_end_ids = os.path.join(root_dir, "%s/ep_start_end_ids.npy" % split)
16 |     orig_start_end_ids = np.load(start_end_ids)
17 | 
18 |     # Split the dataset the same as it is split in learning_fom_play_repo
19 |     new_start_end_ids = get_split_data(orig_start_end_ids, data_percent)
20 |     for episode_dir, cam_frames in episodes_split[split].items():
21 |         for cam, frames in cam_frames.items():
22 |             valid_frames = []
23 |             cam_frame_ids = np.array([int(f.split("_")[-1]) for f in frames])
24 | 
25 |             # Check valid frames
26 |             if len(cam_frame_ids) > 0:
27 |                 for start, end in new_start_end_ids:
28 |                     cond = np.logical_and(cam_frame_ids >= start, cam_frame_ids <= end)
29 |                     inside_ep = np.where(cond)[0]
30 |                     valid_frames.extend([i for i in inside_ep])
31 | 
32 |             # Replace
33 |             new_episodes_split[split][episode_dir][cam] = list(np.array(frames)[valid_frames])
34 |     return new_episodes_split
35 | 
36 | 
37 | def get_split_data(play_start_end_ids, data_percent):
38 |     start_end_ids = np.array(play_start_end_ids)
39 |     cumsum = np.cumsum([e - s for s, e in play_start_end_ids])
40 | 
41 |     n_samples = int(cumsum[-1] * data_percent)
42 |     max_idx = min(n_samples, cumsum[-1]) if n_samples > 0 else cumsum[-1]
43 |     indices = [0]
44 |     for i in range(len(cumsum) - 1):
45 |         if cumsum[i] <= max_idx:
46 |             indices.append(i + 1)
47 | 
48 |     # Valid play-data start_end_ids episodes
49 |     start_end_ids = [start_end_ids[i] for i in indices]
50 |     diff = cumsum[indices[-1]] - n_samples
51 |     start_end_ids[-1][-1] = start_end_ids[-1][-1] - diff
52 |     return np.array(start_end_ids)
53 | 
54 | 
55 | def depth_img_from_uint16(depth_img, max_depth=4):
56 |     depth_img[np.isnan(depth_img)] = 0
57 |     return (depth_img.astype("float") / (2**16 - 1)) * max_depth
58 | 
59 | 
60 | def euler_to_quat(euler_angles):
61 |     """xyz euler angles to xyzw quat"""
62 |     return R.from_euler("xyz", euler_angles).as_quat()
63 | 
64 | 
65 | def quat_to_euler(quat):
66 |     """xyz euler angles to xyzw quat"""
67 |     return R.from_quat(quat).as_euler("xyz")
68 | 


--------------------------------------------------------------------------------
/hulc2/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/datasets/__init__.py


--------------------------------------------------------------------------------
/hulc2/datasets/random.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | 
 3 | import pytorch_lightning as pl
 4 | from pytorch_lightning.trainer.supporters import CombinedLoader
 5 | import torch
 6 | from torch.utils.data import DataLoader
 7 | import torchvision
 8 | 
 9 | 
10 | class RandomDataset(torch.utils.data.Dataset):
11 |     def __init__(self, n_examples: int = 64, window_size: int = 32, split: str = "train", transforms: List = []):
12 |         self.n_examples = n_examples
13 |         self.split = split
14 |         self.data = [
15 |             dict(
16 |                 images=torch.rand(window_size, 3, 200, 200),
17 |                 observations=torch.rand(window_size, 8),
18 |                 actions=torch.rand(window_size, 7),
19 |             )
20 |             for x in range(n_examples)
21 |         ]
22 |         self.transform = torchvision.transforms.Compose(transforms)
23 | 
24 |     def __getitem__(self, idx):
25 |         x = self.data[idx]
26 |         seq_acts = x["actions"]
27 |         seq_rgb_obs = (x["images"],)
28 |         seq_depth_obs = (x["images"],)
29 |         seq_state_obs = x["observations"]
30 |         seq_lang = torch.empty(0)
31 |         info = {}
32 |         return seq_state_obs, seq_rgb_obs, tuple([]), seq_acts, seq_lang, info, idx
33 | 
34 |     def __len__(self):
35 |         return self.n_examples
36 | 
37 | 
38 | class RandomDataModule(pl.LightningDataModule):
39 |     def __init__(self, batch_size: int = 16, train_transforms: List = [], val_transforms: List = [], **kwargs: Dict):
40 |         super().__init__()
41 |         self.batch_size = batch_size
42 |         self.train_dataset = RandomDataset(n_examples=32, window_size=16, split="train", transforms=train_transforms)
43 |         self.val_dataset = RandomDataset(n_examples=32, window_size=16, split="val", transforms=val_transforms)
44 |         self.modalities = ["vis"]
45 | 
46 |     def train_dataloader(self):
47 |         return {"vis": DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=0)}
48 | 
49 |     def val_dataloader(self):
50 |         val_dataloader = {"vis": DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=0)}
51 |         return CombinedLoader(val_dataloader, "max_size_cycle")
52 | 
53 |     @property
54 |     def len_train(self):
55 |         return len(self.train_dataset)
56 | 
57 |     @property
58 |     def len_valid(self):
59 |         return len(self.val_dataset)
60 | 


--------------------------------------------------------------------------------
/hulc2/datasets/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/datasets/utils/__init__.py


--------------------------------------------------------------------------------
/hulc2/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/evaluation/__init__.py


--------------------------------------------------------------------------------
/hulc2/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Vision networks
 2 | from hulc2.affordance.models.language_encoders.bert_lang_encoder import BERTLang
 3 | 
 4 | # Language encoders
 5 | from hulc2.affordance.models.language_encoders.clip_lang_encoder import CLIPLang
 6 | from hulc2.affordance.models.language_encoders.distilbert_lang_encoder import DistilBERTLang
 7 | from hulc2.affordance.models.language_encoders.sbert_lang_encoder import SBertLang
 8 | from hulc2.affordance.models.visual_lang_encoders.r3m_rn18 import R3M
 9 | from hulc2.affordance.models.visual_lang_encoders.rn50_clip_lingunet import CLIPLingUNet
10 | from hulc2.affordance.models.visual_lang_encoders.rn50_unet import RN50LingUNet
11 | from hulc2.affordance.models.visual_lang_encoders.rn_lingunet import RNLingunet
12 | 
13 | lang_encoders = {"clip": CLIPLang, "bert": BERTLang, "distilbert": DistilBERTLang, "sbert": SBertLang}
14 | 
15 | vision_encoders = {
16 |     # Lang Nets
17 |     "clip": CLIPLingUNet,
18 |     "rn": RNLingunet,  # RN50LingUNet,
19 |     "rn18": RNLingunet,
20 |     "r3m_rn18": R3M,
21 | }
22 | 
23 | # Depth estimatiom models
24 | from hulc2.affordance.models.depth.depth_gaussian import DepthEstimationGaussian
25 | from hulc2.affordance.models.depth.depth_logistics import DepthEstimationLogistics
26 | 
27 | deth_est_nets = {
28 |     # Depth Nets
29 |     "gaussian": DepthEstimationGaussian,
30 |     "logistic": DepthEstimationLogistics,
31 | }
32 | 


--------------------------------------------------------------------------------
/hulc2/models/auxiliary_loss_networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/auxiliary_loss_networks/__init__.py


--------------------------------------------------------------------------------
/hulc2/models/auxiliary_loss_networks/bc_z_lang_decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class BCZLangDecoder(nn.Module):
 6 |     def __init__(self, in_features: int, lang_dim: int):
 7 |         super().__init__()
 8 |         # include proprio info???
 9 |         self.mlp = nn.Sequential(
10 |             nn.Linear(in_features=in_features, out_features=512),
11 |             nn.ReLU(),
12 |             nn.Linear(in_features=512, out_features=lang_dim),
13 |         )
14 | 
15 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
16 |         x = self.mlp(x)
17 |         return x
18 | 


--------------------------------------------------------------------------------
/hulc2/models/auxiliary_loss_networks/mia_lang_discriminator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class MIALangDiscriminator(nn.Module):
 6 |     def __init__(self, in_features: int, lang_dim: int, dropout_p: float):
 7 |         super().__init__()
 8 |         self.mlp = nn.Sequential(
 9 |             nn.Linear(in_features=in_features + lang_dim, out_features=512),
10 |             nn.ReLU(),
11 |             nn.Dropout(dropout_p),
12 |             nn.Linear(in_features=512, out_features=1),
13 |         )
14 | 
15 |     def forward(self, vis_emb: torch.Tensor, lang_emb: torch.Tensor) -> torch.Tensor:
16 |         x = torch.cat([vis_emb, lang_emb], dim=-1)
17 |         x = self.mlp(x)
18 |         return x
19 | 


--------------------------------------------------------------------------------
/hulc2/models/auxiliary_loss_networks/proj_vis_lang.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class ProjVisLang(nn.Module):
 8 |     def __init__(self, im_dim: int, lang_dim: int, output_dim: int, proj_lang: bool = True):
 9 |         super().__init__()
10 |         self.mlp_im = nn.Sequential(
11 |             nn.Linear(in_features=im_dim, out_features=128),
12 |             nn.ReLU(),
13 |             nn.Linear(in_features=128, out_features=output_dim),
14 |         )
15 |         self.mlp_lang = None
16 |         if proj_lang:
17 |             self.mlp_lang = nn.Sequential(
18 |                 nn.Linear(in_features=lang_dim, out_features=128),
19 |                 nn.ReLU(),
20 |                 nn.Linear(in_features=128, out_features=output_dim),
21 |             )
22 | 
23 |     def forward(self, vis_emb: torch.Tensor, lang_emb: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
24 |         vis_emb = self.mlp_im(vis_emb)
25 |         if self.mlp_lang is not None:
26 |             lang_emb = self.mlp_lang(lang_emb)
27 |         return vis_emb, lang_emb
28 | 


--------------------------------------------------------------------------------
/hulc2/models/auxiliary_loss_networks/state_decoder.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class StateDecoder(nn.Module):
 8 |     def __init__(self, visual_features: int, n_state_obs: int):
 9 |         super().__init__()
10 |         self.mlp = nn.Sequential(
11 |             nn.Linear(in_features=visual_features, out_features=40),
12 |             nn.ReLU(),
13 |             nn.Linear(in_features=40, out_features=40),
14 |             nn.ReLU(),
15 |             nn.Linear(in_features=40, out_features=n_state_obs),
16 |         )
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         x = self.mlp(x)
20 |         return x
21 | 


--------------------------------------------------------------------------------
/hulc2/models/decoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/decoders/__init__.py


--------------------------------------------------------------------------------
/hulc2/models/decoders/action_decoder.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Tuple
 2 | 
 3 | import torch
 4 | from torch import nn
 5 | 
 6 | 
 7 | class ActionDecoder(nn.Module):
 8 |     def act(
 9 |         self,
10 |         latent_plan: torch.Tensor,
11 |         perceptual_emb: torch.Tensor,
12 |         latent_goal: torch.Tensor,
13 |         robot_obs: Optional[torch.Tensor] = None,
14 |     ) -> torch.Tensor:
15 |         raise NotImplementedError
16 | 
17 |     def loss(
18 |         self,
19 |         latent_plan: torch.Tensor,
20 |         perceptual_emb: torch.Tensor,
21 |         latent_goal: torch.Tensor,
22 |         actions: torch.Tensor,
23 |         robot_obs: Optional[torch.Tensor] = None,
24 |     ) -> torch.Tensor:
25 |         raise NotImplementedError
26 | 
27 |     def loss_and_act(
28 |         self,
29 |         latent_plan: torch.Tensor,
30 |         perceptual_emb: torch.Tensor,
31 |         latent_goal: torch.Tensor,
32 |         actions: torch.Tensor,
33 |         robot_obs: Optional[torch.Tensor] = None,
34 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
35 |         raise NotImplementedError
36 | 
37 |     def _sample(self, *args, **kwargs):
38 |         raise NotImplementedError
39 | 
40 |     def forward(
41 |         self, latent_plan: torch.Tensor, perceptual_emb: torch.Tensor, latent_goal: torch.Tensor
42 |     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
43 |         raise NotImplementedError
44 | 
45 |     def clear_hidden_state(self) -> None:
46 |         pass
47 | 


--------------------------------------------------------------------------------
/hulc2/models/decoders/clip_proj.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class ClipProj(nn.Module):
 8 |     def __init__(self, im_dim: int, lang_dim: int, output_dim: int, proj_lang: bool = True):
 9 |         super().__init__()
10 |         self.mlp_im = nn.Sequential(
11 |             nn.Linear(in_features=im_dim, out_features=128),
12 |             nn.ReLU(),
13 |             nn.Linear(in_features=128, out_features=output_dim),
14 |         )
15 |         self.mlp_lang = None
16 |         if proj_lang:
17 |             self.mlp_lang = nn.Sequential(
18 |                 nn.Linear(in_features=lang_dim, out_features=128),
19 |                 nn.ReLU(),
20 |                 nn.Linear(in_features=128, out_features=output_dim),
21 |             )
22 | 
23 |     def forward(self, vis_emb: torch.Tensor, lang_emb: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
24 |         vis_emb = self.mlp_im(vis_emb)
25 |         if self.mlp_lang is not None:
26 |             lang_emb = self.mlp_lang(lang_emb)
27 |         return vis_emb, lang_emb
28 | 


--------------------------------------------------------------------------------
/hulc2/models/decoders/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/decoders/utils/__init__.py


--------------------------------------------------------------------------------
/hulc2/models/decoders/utils/rnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def rnn_decoder(in_features: int, hidden_size: int, num_layers: int, policy_rnn_dropout_p: float) -> torch.nn.Module:
 6 |     return nn.RNN(
 7 |         input_size=in_features,
 8 |         hidden_size=hidden_size,
 9 |         num_layers=num_layers,
10 |         nonlinearity="relu",
11 |         bidirectional=False,
12 |         batch_first=True,
13 |         dropout=policy_rnn_dropout_p,
14 |     )
15 | 
16 | 
17 | def lstm_decoder(in_features: int, hidden_size: int, num_layers: int, policy_rnn_dropout_p: float) -> torch.nn.Module:
18 |     return nn.LSTM(
19 |         input_size=in_features,
20 |         hidden_size=hidden_size,
21 |         num_layers=num_layers,
22 |         bidirectional=False,
23 |         batch_first=True,
24 |         dropout=policy_rnn_dropout_p,
25 |     )
26 | 
27 | 
28 | def gru_decoder(in_features: int, hidden_size: int, num_layers: int, policy_rnn_dropout_p: float) -> torch.nn.Module:
29 |     return nn.GRU(
30 |         input_size=in_features,
31 |         hidden_size=hidden_size,
32 |         num_layers=num_layers,
33 |         bidirectional=False,
34 |         batch_first=True,
35 |         dropout=policy_rnn_dropout_p,
36 |     )
37 | 
38 | 
39 | def mlp_decoder(in_features: int, hidden_size: int, num_layers: int, policy_rnn_dropout_p: float) -> torch.nn.Module:
40 |     return nn.Sequential(
41 |         nn.Linear(in_features=in_features, out_features=hidden_size),
42 |         nn.ReLU(),
43 |         nn.Linear(in_features=hidden_size, out_features=hidden_size),
44 |         nn.ReLU(),
45 |         nn.Linear(in_features=hidden_size, out_features=hidden_size),
46 |     )
47 | 


--------------------------------------------------------------------------------
/hulc2/models/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/encoders/__init__.py


--------------------------------------------------------------------------------
/hulc2/models/encoders/clip_lang_encoder.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from hulc2.models.perceptual_encoders.clip import build_model, load_clip, tokenize
 7 | 
 8 | 
 9 | class LangClip(nn.Module):
10 |     def __init__(self, freeze_backbone: bool = True, model_name: str = "RN50"):
11 |         super(LangClip, self).__init__()
12 |         self.device = "cuda" if torch.cuda.is_available() else "cpu"
13 |         # Load CLIP model
14 |         print(f"loading language CLIP model with backbone: {model_name}")
15 |         self._load_clip(model_name)
16 |         if freeze_backbone:
17 |             for param in self.clip_rn50.parameters():
18 |                 param.requires_grad = False
19 | 
20 |     def _load_clip(self, model_name: str) -> None:
21 |         model, _ = load_clip(model_name, device=self.device)
22 |         self.clip_rn50 = build_model(model.state_dict()).to(self.device)
23 |         del model
24 | 
25 |     def forward(self, x: List) -> torch.Tensor:
26 |         with torch.no_grad():
27 |             tokens = tokenize(x).to(self.device)
28 |             emb = self.clip_rn50.encode_text(tokens)
29 |         return torch.unsqueeze(emb, 1)
30 | 


--------------------------------------------------------------------------------
/hulc2/models/encoders/goal_encoders.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | class VisualGoalEncoder(nn.Module):
 9 |     def __init__(
10 |         self,
11 |         hidden_size: int,
12 |         latent_goal_features: int,
13 |         in_features: int,
14 |         l2_normalize_goal_embeddings: bool,
15 |         activation_function: str,
16 |     ):
17 |         super().__init__()
18 |         self.l2_normalize_output = l2_normalize_goal_embeddings
19 |         self.act_fn = getattr(nn, activation_function)()
20 |         self.mlp = nn.Sequential(
21 |             nn.Linear(in_features=in_features, out_features=hidden_size),
22 |             self.act_fn,
23 |             nn.Linear(in_features=hidden_size, out_features=hidden_size),
24 |             self.act_fn,
25 |             nn.Linear(in_features=hidden_size, out_features=latent_goal_features),
26 |         )
27 |         self.ln = nn.LayerNorm(latent_goal_features)
28 | 
29 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
30 |         x = self.mlp(x)
31 |         if self.l2_normalize_output:
32 |             x = F.normalize(x, p=2, dim=1)
33 |         x = self.ln(x)
34 |         return x
35 | 
36 | 
37 | class LanguageGoalEncoder(nn.Module):
38 |     def __init__(
39 |         self,
40 |         lang_net,
41 |         in_features: int,
42 |         hidden_size: int,
43 |         latent_goal_features: int,
44 |         l2_normalize_goal_embeddings: bool,
45 |         word_dropout_p: float,
46 |         activation_function: str,
47 |     ):
48 |         super().__init__()
49 |         self.lang_net = lang_net
50 |         self.l2_normalize_output = l2_normalize_goal_embeddings
51 |         self.act_fn = getattr(nn, activation_function)()
52 |         self.mlp = nn.Sequential(
53 |             nn.Dropout(word_dropout_p),
54 |             nn.Linear(in_features=in_features, out_features=hidden_size),
55 |             self.act_fn,
56 |             nn.Linear(in_features=hidden_size, out_features=hidden_size),
57 |             self.act_fn,
58 |             nn.Linear(in_features=hidden_size, out_features=latent_goal_features),
59 |         )
60 |         self.ln = nn.LayerNorm(latent_goal_features)
61 | 
62 |     def forward(self, x: list) -> torch.Tensor:
63 |         # Takes a list of strings and returns the embeddings
64 |         if self.lang_net is not None:
65 |             x = self.lang_net(x)
66 | 
67 |         x = self.mlp(x)
68 |         if self.l2_normalize_output:
69 |             x = F.normalize(x, p=2, dim=1)
70 |         x = self.ln(x)
71 |         return x
72 | 


--------------------------------------------------------------------------------
/hulc2/models/encoders/lang_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class LanguageEncoder(nn.Module):
 6 |     def __init__(
 7 |         self,
 8 |         language_features: int,
 9 |         hidden_size: int,
10 |         out_features: int,
11 |         word_dropout_p: float,
12 |         activation_function: str,
13 |     ):
14 |         super().__init__()
15 |         self.act_fn = getattr(nn, activation_function)()
16 |         self.mlp = nn.Sequential(
17 |             nn.Dropout(word_dropout_p),
18 |             nn.Linear(in_features=language_features, out_features=hidden_size),
19 |             self.act_fn,
20 |             nn.Linear(in_features=hidden_size, out_features=hidden_size),
21 |             self.act_fn,
22 |             nn.Linear(in_features=hidden_size, out_features=out_features),
23 |         )
24 | 
25 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
26 |         x = self.mlp(x)
27 |         return x
28 | 


--------------------------------------------------------------------------------
/hulc2/models/encoders/language_network.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List
 3 | 
 4 | import numpy as np
 5 | from sentence_transformers import SentenceTransformer
 6 | import torch
 7 | from torch import nn, Tensor
 8 | from tqdm.autonotebook import trange
 9 | 
10 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
11 | 
12 | 
13 | class SBert(nn.Module):
14 |     def __init__(self, nlp_model: str, freeze_backbone=True) -> None:
15 |         super(SBert, self).__init__()
16 |         self.freeze_backbone = freeze_backbone
17 |         self.model = SentenceTransformer(nlp_model)
18 |         _embd_dim = 384
19 |         # self.text_fc = nn.Linear(_embd_dim, 1024)
20 | 
21 |     def forward(self, x: List) -> torch.Tensor:
22 |         enc = self.encode(x)
23 |         # enc = self.text_fc(enc)
24 |         return enc  # torch.unsqueeze(enc, 1)
25 | 
26 |     def encode(self, sentences: List[str], normalize_embeddings: bool = False) -> Tensor:
27 |         """
28 |         Computes sentence embeddings
29 | 
30 |         :param sentences: the sentences to embed
31 |         :param normalize_embeddings: If set to true, returned vectors will have length 1. In that case, the faster dot-product (util.dot_score) instead of cosine similarity can be used.
32 | 
33 |         :return:
34 |            A stacked tensor is returned
35 |         """
36 |         if self.freeze_backbone:
37 |             self.model.eval()
38 | 
39 |         all_embeddings = []
40 |         length_sorted_idx = np.argsort([-self.model._text_length(sen) for sen in sentences])
41 |         sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
42 | 
43 |         features = self.model.tokenize(sentences_sorted)
44 |         features = self.batch_to_device(features, self.model._target_device)
45 | 
46 |         with torch.set_grad_enabled(not self.freeze_backbone):
47 |             out_features = self.model.forward(features)
48 |             embeddings = out_features["sentence_embedding"]
49 |             embeddings = embeddings.detach()
50 |             if normalize_embeddings:
51 |                 embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
52 |             all_embeddings.extend(embeddings)
53 | 
54 |         # undo sort and convert to tensor
55 |         all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)]
56 |         all_embeddings = torch.stack(all_embeddings)
57 |         return all_embeddings
58 | 
59 |     def batch_to_device(self, batch, target_device):
60 |         """
61 |         send a pytorch batch to a device (CPU/GPU)
62 |         """
63 |         for key in batch:
64 |             if isinstance(batch[key], Tensor):
65 |                 batch[key] = batch[key].to(target_device)
66 |         return batch
67 | 


--------------------------------------------------------------------------------
/hulc2/models/perceptual_encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/perceptual_encoders/__init__.py


--------------------------------------------------------------------------------
/hulc2/models/perceptual_encoders/proprio_encoder.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch import nn
 3 | 
 4 | 
 5 | class IdentityEncoder(nn.Module):
 6 |     def __init__(self, proprioception_dims):
 7 |         super(IdentityEncoder, self).__init__()
 8 |         # remove a dimension if we convert robot orientation quaternion to euler angles
 9 |         self.n_state_obs = int(np.sum(np.diff([list(x) for x in [list(y) for y in proprioception_dims.keep_indices]])))
10 |         self.identity = nn.Identity()
11 | 
12 |     @property
13 |     def out_features(self):
14 |         return self.n_state_obs
15 | 
16 |     def forward(self, x):
17 |         return self.identity(x)
18 | 


--------------------------------------------------------------------------------
/hulc2/models/perceptual_encoders/tactile_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | import torchvision.models as models
 5 | 
 6 | 
 7 | class TactileEncoder(nn.Module):
 8 |     def __init__(self, visual_features: int, freeze_tactile_backbone: bool = True):
 9 |         super(TactileEncoder, self).__init__()
10 |         # Load pre-trained resnet-18
11 |         net = models.resnet18(pretrained=True)
12 |         # Remove the last fc layer, and rebuild
13 |         modules = list(net.children())[:-1]
14 |         self.net = nn.Sequential(*modules)
15 |         if freeze_tactile_backbone:
16 |             for param in self.net.parameters():
17 |                 param.requires_grad = False
18 |         self.fc1 = nn.Linear(1024, 512)
19 |         self.fc2 = nn.Linear(512, visual_features)
20 | 
21 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
22 |         x_l = self.net(x[:, :3, :, :]).squeeze()
23 |         x_r = self.net(x[:, 3:, :, :]).squeeze()
24 |         x = torch.cat((x_l, x_r), dim=-1)
25 |         # Add fc layer for final prediction
26 |         output = F.relu(self.fc1(x))  # batch, 512
27 |         output = self.fc2(output)  # batch, 64
28 |         return output
29 | 


--------------------------------------------------------------------------------
/hulc2/models/perceptual_encoders/vision_clip.py:
--------------------------------------------------------------------------------
 1 | from sentence_transformers import SentenceTransformer
 2 | import torch
 3 | from torch import nn
 4 | import torch.nn.functional as F
 5 | import torchvision
 6 | 
 7 | from hulc2.models.perceptual_encoders.clip import build_model, load_clip, tokenize
 8 | 
 9 | 
10 | class VisionClip(nn.Module):
11 |     def __init__(
12 |         self, device: torch.device, visual_features: int, freeze_backbone: bool = True, model_name: str = "RN50"
13 |     ):
14 |         super(VisionClip, self).__init__()
15 |         # Load CLIP model
16 |         print(f"loading vision CLIP model with backbone: {model_name}")
17 |         self.clip_model, _ = load_clip(model_name, device=device)
18 |         if freeze_backbone:
19 |             for param in self.clip_model.parameters():
20 |                 param.requires_grad = False
21 |         if "RN50" in model_name:
22 |             self.fc1 = nn.Linear(1024, 512)
23 |             self.fc2 = nn.Linear(512, visual_features)
24 |         elif "ViT-B/32" in model_name:
25 |             self.fc1 = nn.Linear(512, 256)
26 |             self.fc2 = nn.Linear(256, visual_features)
27 | 
28 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
29 |         x = self.clip_model.encode_image(x)  # type:ignore
30 |         output = F.relu(self.fc1(x))  # batch, 512
31 |         output = self.fc2(output)  # batch, 64
32 |         return output
33 | 


--------------------------------------------------------------------------------
/hulc2/models/perceptual_encoders/vision_network_conv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from typing import Dict, Optional, Tuple
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from torch.nn.parameter import Parameter
 9 | 
10 | 
11 | class VisionNetworkConv(nn.Module):
12 |     # reference: https://arxiv.org/pdf/2005.07648.pdf
13 |     def __init__(
14 |         self,
15 |         activation_function: str,
16 |         dropout_vis_fc: float,
17 |         l2_normalize_output: bool,
18 |         visual_features: int,
19 |         num_c: int,
20 |     ):
21 |         super(VisionNetworkConv, self).__init__()
22 |         self.l2_normalize_output = l2_normalize_output
23 |         self.act_fn = getattr(nn, activation_function)()
24 |         # model
25 |         self.conv_model = nn.Sequential(
26 |             # input shape: [N, 3, 200, 200]
27 |             nn.Conv2d(in_channels=num_c, out_channels=32, kernel_size=8, stride=4),  # shape: [N, 32, 49, 49]
28 |             nn.BatchNorm2d(32),
29 |             self.act_fn,
30 |             nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=2),  # shape: [N, 64, 23, 23]
31 |             nn.BatchNorm2d(64),
32 |             self.act_fn,
33 |             nn.Conv2d(in_channels=64, out_channels=64, kernel_size=4, stride=2),  # shape: [N, 64, 10, 10]
34 |             nn.BatchNorm2d(64),
35 |             self.act_fn,
36 |             nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2),  # shape: [N, 128, 4, 4]
37 |             nn.BatchNorm2d(128),
38 |             self.act_fn,
39 |             nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=1),  # shape: [N, 256, 1, 1]
40 |             nn.BatchNorm2d(256),
41 |             self.act_fn,
42 |         )
43 |         self.fc1 = nn.Sequential(
44 |             nn.Linear(in_features=256, out_features=512),
45 |             self.act_fn,
46 |             nn.Dropout(dropout_vis_fc),
47 |         )  # shape: [N, 512]
48 |         self.fc2 = nn.Linear(in_features=512, out_features=visual_features)  # shape: [N, 64]
49 | 
50 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
51 |         x = self.conv_model(x)
52 |         x = torch.flatten(x, start_dim=1)
53 |         x = self.fc1(x)
54 |         x = self.fc2(x)
55 |         if self.l2_normalize_output:
56 |             x = F.normalize(x, p=2, dim=1)
57 |         return x  # shape: [N, 64]
58 | 


--------------------------------------------------------------------------------
/hulc2/models/perceptual_encoders/vision_r3m.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from r3m import load_r3m
 6 | 
 7 | 
 8 | class VisionR3M(nn.Module):
 9 |     def __init__(
10 |         self, device: torch.device, visual_features: int, resnet_model: str = "resnet18", freeze_backbone: bool = True
11 |     ):
12 |         super(VisionR3M, self).__init__()
13 |         # Load pre-trained R3M resnet-18
14 |         self.r3m = load_r3m(resnet_model, device).module
15 |         # set all grads to false
16 |         for param in self.r3m.parameters():
17 |             param.requires_grad = False
18 |         if not freeze_backbone:
19 |             # finetune last layer
20 |             for param in self.r3m.convnet.layer4.parameters():
21 |                 param.requires_grad = True
22 |         self.fc1 = nn.Linear(512, 256)
23 |         self.fc2 = nn.Linear(256, visual_features)
24 | 
25 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
26 |         with torch.no_grad():
27 |             x = self.r3m(x)  # batch, 512, 1, 1
28 |         # Add fc layer for final prediction
29 |         x = torch.flatten(x, start_dim=1)  # batch, 512
30 |         output = F.relu(self.fc1(x))  # batch, 256
31 |         output = self.fc2(output)  # batch, 64
32 |         return output
33 | 


--------------------------------------------------------------------------------
/hulc2/models/perceptual_encoders/vision_resnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | import torchvision.models as models
 5 | 
 6 | 
 7 | class VisionResnet(nn.Module):
 8 |     def __init__(self, visual_features: int, freeze_backbone: bool = True):
 9 |         super(VisionResnet, self).__init__()
10 |         # Load pre-trained resnet-18
11 |         net = models.resnet18(pretrained=True)
12 |         # Remove the last fc layer, and rebuild
13 |         modules = list(net.children())[:-1]
14 |         for param in net.parameters():
15 |             param.requires_grad = False
16 | 
17 |         # Only finetune last layer
18 |         if not freeze_backbone:
19 |             for param in net.layer4.parameters():
20 |                 param.requires_grad = True
21 |         self.net = nn.Sequential(*modules)
22 |         self.fc1 = nn.Linear(512, 256)
23 |         self.fc2 = nn.Linear(256, visual_features)
24 | 
25 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
26 |         x = self.net(x)  # batch, 512, 1, 1
27 |         # Add fc layer for final prediction
28 |         x = torch.flatten(x, start_dim=1)  # batch, 512
29 |         output = F.relu(self.fc1(x))  # batch, 256
30 |         output = self.fc2(output)  # batch, 64
31 |         return output
32 | 


--------------------------------------------------------------------------------
/hulc2/models/perceptual_encoders/vision_resnet_aff.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from segmentation_models_pytorch.encoders import get_encoder
 3 | import torch
 4 | from torch import nn
 5 | import torch.nn.functional as F
 6 | import torchvision.models as models
 7 | 
 8 | 
 9 | class VisionResnetAff(nn.Module):
10 |     def __init__(self, visual_features: int, input_shape: list, depth: int = 3, freeze_backbone: bool = True):
11 |         super(VisionResnetAff, self).__init__()
12 |         # Load pre-trained resnet-18
13 |         self.net = get_encoder("resnet18", in_channels=input_shape[-1], depth=depth, weights="imagenet")
14 |         # Remove the last fc layer, and rebuild
15 |         for param in self.net.parameters():
16 |             param.requires_grad = False
17 |         if freeze_backbone:
18 |             for param in self.net.layer4.parameters():
19 |                 param.requires_grad = True
20 | 
21 |         out_shape = self.calc_img_enc_size(list(input_shape))
22 |         self.fc1 = nn.Linear(np.prod(out_shape), 512)
23 |         self.fc2 = nn.Linear(512, 256)
24 |         self.fc3 = nn.Linear(256, visual_features)
25 | 
26 |     def calc_img_enc_size(self, input_shape):
27 |         test_tensor = torch.zeros(input_shape).permute(2, 0, 1)
28 |         test_tensor = test_tensor.unsqueeze(0)
29 |         shape = self.net(test_tensor)[-1].shape[1:]
30 |         return shape
31 | 
32 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
33 |         x = self.net(x)[-1]  # batch, 128, n, n
34 |         # Add fc layer for final prediction
35 |         x = torch.flatten(x, start_dim=1)  # batch, n*n*128
36 |         output = F.relu(self.fc1(x))  # batch, 512
37 |         output = F.relu(self.fc2(output))  # batch, 256
38 |         output = self.fc3(output)  # batch, 64
39 |         return output
40 | 


--------------------------------------------------------------------------------
/hulc2/models/plan_encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/models/plan_encoders/__init__.py


--------------------------------------------------------------------------------
/hulc2/models/plan_encoders/plan_proposal_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from hulc2.utils.distributions import Distribution, State
 6 | 
 7 | 
 8 | class PlanProposalNetwork(nn.Module):
 9 |     def __init__(
10 |         self,
11 |         perceptual_features: int,
12 |         latent_goal_features: int,
13 |         plan_features: int,
14 |         activation_function: str,
15 |         hidden_size: int,
16 |         dist: Distribution,
17 |     ):
18 |         super(PlanProposalNetwork, self).__init__()
19 |         self.perceptual_features = perceptual_features
20 |         self.latent_goal_features = latent_goal_features
21 |         self.plan_features = plan_features
22 |         self.hidden_size = hidden_size
23 |         self.in_features = self.perceptual_features + self.latent_goal_features
24 |         self.act_fn = getattr(nn, activation_function)()
25 |         self.dist = dist
26 |         self.fc_model = nn.Sequential(
27 |             nn.Linear(in_features=self.in_features, out_features=hidden_size),  # shape: [N, 136]
28 |             # nn.BatchNorm1d(hidden_size),
29 |             self.act_fn,
30 |             nn.Linear(in_features=hidden_size, out_features=hidden_size),
31 |             # nn.BatchNorm1d(hidden_size),
32 |             self.act_fn,
33 |             nn.Linear(in_features=hidden_size, out_features=hidden_size),
34 |             # nn.BatchNorm1d(hidden_size),
35 |             self.act_fn,
36 |             nn.Linear(in_features=hidden_size, out_features=hidden_size),
37 |             # nn.BatchNorm1d(hidden_size),
38 |             self.act_fn,
39 |         )
40 |         self.fc_state = self.dist.build_state(self.hidden_size, self.plan_features)
41 | 
42 |     def forward(self, initial_percep_emb: torch.Tensor, latent_goal: torch.Tensor) -> State:
43 |         x = torch.cat([initial_percep_emb, latent_goal], dim=-1)
44 |         x = self.fc_model(x)
45 |         my_state = self.fc_state(x)
46 |         state = self.dist.forward_dist(my_state)
47 |         return state
48 | 


--------------------------------------------------------------------------------
/hulc2/rollout/gpt3_planning.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import openai
 4 | 
 5 | openai_key = os.environ["OPENAI_KEY"]
 6 | openai.api_key = openai_key
 7 | 
 8 | gpt3_prompt = """
 9 | state = {'drawer_open': False, 'blocks_on_table': ['red'], 'buttons_on': ['green']}
10 | # put away the red block.
11 | open_drawer()
12 | pick_and_place('red', 'drawer')
13 | close_drawer()
14 | ###
15 | state = {'drawer_open': False, 'blocks_on_table': [], 'buttons_on': ['yellow']}
16 | # turn off the lights.
17 | push_button('yellow')
18 | ###
19 | state = {'drawer_open': False, 'blocks_on_table': ['red', 'green', 'blue'], 'buttons_on': ['green', 'yellow']}
20 | """
21 | 
22 | gpt_version = "text-davinci-002"
23 | 
24 | 
25 | def LM(prompt, max_tokens=128, temperature=0, stop=None):
26 |     response = openai.Completion.create(
27 |         engine=gpt_version, prompt=prompt, max_tokens=max_tokens, temperature=temperature, stop=stop
28 |     )
29 |     return response["choices"][0]["text"].strip()
30 | 
31 | 
32 | user_input = "tidy up the workspace and turn off all the lights"
33 | if user_input[-1] != ".":
34 |     user_input += "."
35 | 
36 | context = gpt3_prompt
37 | context += "# " + user_input + "\n"
38 | response = LM(context, stop=["###"])
39 | context += response + "\n"
40 | 
41 | step_text = ""
42 | 
43 | 
44 | def pick_and_place(obj1, obj2):
45 |     global step_text
46 |     step_text = f"Pick the {obj1} block and place it on the {obj2}."
47 | 
48 | 
49 | def open_drawer():
50 |     global step_text
51 |     step_text = "pull the handle to open the drawer"
52 | 
53 | 
54 | def close_drawer():
55 |     global step_text
56 |     step_text = "pull the handle to close the drawer"
57 | 
58 | 
59 | def push_button(obj1):
60 |     global step_text
61 |     if "green" in obj1:
62 |         step_text = "press the button to turn on the led light"
63 |     if "yellow" in obj1:
64 |         step_text = "use the switch to turn on the light bulb"
65 | 
66 | 
67 | # Execute commands given by LM.
68 | step_cmds = response.split("\n")
69 | print("LM generated plan:")
70 | for step_cmd in step_cmds:
71 |     step_cmd = step_cmd.replace("robot.", "")
72 |     # print(step_cmd)
73 |     exec(step_cmd)
74 |     print("Step:", step_text)
75 |     # obs = run_hucl(obs, step_text)
76 | 


--------------------------------------------------------------------------------
/hulc2/scripts/utils/colors.yaml:
--------------------------------------------------------------------------------
 1 | red:
 2 |   - "red"
 3 | 
 4 | blue:
 5 |   - "blue"
 6 | 
 7 | green:
 8 |   - "green"
 9 | 
10 | yellow:
11 |   - "yellow"
12 | 
13 | purple:
14 |   - "purple"
15 | 
16 | orange:
17 |   - "orange"
18 | 
19 | pink:
20 |   - "pink"
21 | 


--------------------------------------------------------------------------------
/hulc2/scripts/utils/config/lang_model/bert.yaml:
--------------------------------------------------------------------------------
1 | _target_: webapp.language_encoders.sbert.SBert
2 | _recursive_: False
3 | 


--------------------------------------------------------------------------------
/hulc2/scripts/utils/config/lang_model/clip.yaml:
--------------------------------------------------------------------------------
1 | _target_: webapp.language_encoders.clip.CLIPLang
2 | freeze_backbone: True
3 | model_name: RN50
4 | 


--------------------------------------------------------------------------------
/hulc2/scripts/utils/config/retrieve_data.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - lang_model: bert
 3 | 
 4 | ignore_empty_tasks: True
 5 | 
 6 | lang_model:
 7 |   nlp_model: paraphrase-MiniLM-L3-v2
 8 | 
 9 | save_path: ./annotations
10 | database_path: webapp/database.db
11 | # Original dataset to be reduced...
12 | # Only needs to load ep_start_end_ids.npy
13 | dataset_dir: ../datasets/unprocessed/real_world/real_world_play_processed
14 | 
15 | 
16 | #-- Hydra config --#
17 | hydra_outputs: ./hydra_outputs/tmp/
18 | hydra:
19 |   run:
20 |     dir: ${hydra_outputs}/${now:%Y-%m-%d}/${now:%H-%M-%S} # Output
21 | 


--------------------------------------------------------------------------------
/hulc2/scripts/utils/tasks.yaml:
--------------------------------------------------------------------------------
 1 | open_drawer:
 2 |   - "Opening the drawer"
 3 | 
 4 | close_drawer:
 5 |   - "Closing the drawer"
 6 | 
 7 | move_slide_left:
 8 |   - "Moving sliding door to the left"
 9 | 
10 | move_slide_right:
11 |   - "Moving sliding door to the right"
12 | 
13 | turn_on_[x]_led:
14 |   - "turn on the [x] light"
15 | 
16 | turn_off_[x]_led:
17 |   - "turn off the [x] light"
18 | 
19 | rotate_[x]_block_right:
20 |   - "grasp the [x] block, then rotate it right"
21 | 
22 | rotate_[x]_block_left:
23 |   - "grasp the [x] block, then rotate it left"
24 | 
25 | push_[x]_block_left:
26 |   - "push the [x] block towards the left"
27 | 
28 | push_[x]_block_right:
29 |   - "push the [x] block towards the right"
30 | 
31 | place_[x]_box:
32 |   - "Place the [x] block inside the box"
33 | 
34 | place_[x]_drawer:
35 |   - "Place the [x] block inside the drawer"
36 | 
37 | place_[x]_left_cabinet:
38 |   - "Place the [x] block inside the left cabinet"
39 | 
40 | place_[x]_right_cabinet:
41 |   - "Place the [x] block inside the right cabinet"
42 | 
43 | place_[x]_table:
44 |   - "Place the [x] block on the table"
45 | 
46 | place_[x]_drawer_top:
47 |   - "Place the [x] block on top of the drawer"
48 | 
49 | unstack_[x]_block:
50 |   - "Unstack the [x] block"
51 | 
52 | lift_[x]_block:
53 |   - "Lift the [x] block"
54 | 
55 | push_[x]_block_in_drawer:
56 |   - "push the [x] block inside the drawer"
57 | 
58 | stack_[x]_on_[y]:
59 |   - "Stack the [x] block on top of the [y] block"
60 | 


--------------------------------------------------------------------------------
/hulc2/scripts/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | 
 3 | import yaml
 4 | 
 5 | 
 6 | def read_tasks():
 7 |     file_dir = pathlib.Path(__file__).parent.resolve()
 8 |     file = file_dir / "tasks.yaml"
 9 |     with open(file.as_posix(), "r") as stream:
10 |         try:
11 |             tasks = yaml.safe_load(stream)
12 |         except yaml.YAMLError as exc:
13 |             print(exc)
14 |     return tasks
15 | 
16 | 
17 | def read_colors():
18 |     file_dir = pathlib.Path(__file__).parent.resolve()
19 |     file = file_dir / "colors.yaml"
20 |     with open(file.as_posix(), "r") as stream:
21 |         try:
22 |             colors = yaml.safe_load(stream)
23 |         except yaml.YAMLError as exc:
24 |             print(exc)
25 |     return colors
26 | 


--------------------------------------------------------------------------------
/hulc2/scripts/viz_annotations.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import os
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | 
10 | def add_img_text(img, text_label):
11 |     font_scale = 0.6
12 |     thickness = 2
13 |     color = (0, 0, 0)
14 |     im_w, im_h = img.shape[:2]
15 |     x1, y1 = 10, 20
16 |     (w, h), _ = cv2.getTextSize(text_label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
17 |     out_img = cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1 + h), color, -1)
18 |     out_img = cv2.putText(
19 |         out_img,
20 |         text_label,
21 |         org=(x1, y1),
22 |         fontFace=cv2.FONT_HERSHEY_SIMPLEX,
23 |         fontScale=font_scale,
24 |         color=(255, 255, 255),
25 |         thickness=thickness,
26 |     )
27 |     return out_img
28 | 
29 | 
30 | def main():
31 |     # Please first run get_annotations to generate auto_lang_ann.npy
32 |     lang_ann_path = (
33 |         Path(__file__).resolve().parents[1] / "annotations" / "lang_paraphrase-MiniLM-L3-v2" / "auto_lang_ann.npy"
34 |     )
35 | 
36 |     # Path where dataset is
37 |     dataset_path = (
38 |         "/mnt/ssd_shared/Users/Jessica/Documents/Thesis_ssd/datasets/unprocessed/real_world/500k_all_tasks_dataset_15hz"
39 |     )
40 | 
41 |     annotations = np.load(lang_ann_path.resolve(), allow_pickle=True).item()
42 |     indices = [317, 723, 22]
43 |     for index in indices:
44 |         idx = index - 1
45 |         caption = annotations["language"]["ann"][idx]
46 |         start_fr, end_fr = annotations["info"]["indx"][idx]
47 |         for fr in range(start_fr, end_fr):
48 |             frame_file = os.path.join(dataset_path, "episode_%07d.npz" % fr)
49 |             step_file = np.load(frame_file)
50 |             img = step_file["rgb_static"]
51 |             w, h = img.shape[:2]
52 |             img = cv2.resize(img, (h * 3, w * 3))
53 |             img = add_img_text(img, caption)
54 |             cv2.imshow("img", img[:, :, ::-1])
55 |             cv2.waitKey(0)
56 |         cv2.waitKey(1)
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     main()
61 | 


--------------------------------------------------------------------------------
/hulc2/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/utils/__init__.py


--------------------------------------------------------------------------------
/hulc2/utils/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/hulc2/utils/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/hulc2/utils/data_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import DefaultDict
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def get_split_data(play_start_end_ids, data_percent, lang_data=None):
 7 |     start_end_ids = np.array(play_start_end_ids)
 8 |     cumsum = np.cumsum([e - s for s, e in play_start_end_ids])
 9 | 
10 |     n_samples = int(cumsum[-1] * data_percent)
11 |     max_idx = min(n_samples, cumsum[-1]) if n_samples > 0 else cumsum[-1]
12 |     indices = [0]
13 |     for i in range(len(cumsum) - 1):
14 |         if cumsum[i] <= max_idx:
15 |             indices.append(i + 1)
16 | 
17 |     # Valid play-data start_end_ids episodes
18 |     start_end_ids = [start_end_ids[i] for i in indices]
19 |     diff = cumsum[indices[-1]] - n_samples
20 |     start_end_ids[-1][-1] = start_end_ids[-1][-1] - diff
21 | 
22 |     # Only add frames w/lang that are inside selected non-lang frames
23 |     if lang_data is not None:
24 |         lang_data = get_split_lang_sequences(start_end_ids, lang_data)
25 |     return np.array(start_end_ids), lang_data
26 | 
27 | 
28 | def get_split_lang_sequences(start_end_ids, lang_data, asarray=True):
29 |     split_lang_data = {
30 |         "language": {"ann": [], "task": [], "emb": []},
31 |         "info": {"episodes": [], "indx": []},
32 |     }
33 |     # Language annotated episodes(64 frames)
34 |     # keys = [(start_i, end_i), ...]
35 |     keys = np.array([idx for idx in lang_data["info"]["indx"]])
36 |     for start, end in start_end_ids:
37 |         # Check if language annotated episode frames(64) are part of frames selected for non-language annotated frames(play data episodes).
38 |         # i.e. Check that both language annotated and non-language come frome the same data
39 |         cond = np.logical_and(keys[:, 0] >= start, keys[:, 1] <= end)
40 |         inside_ep = np.where(cond)[0]
41 | 
42 |         # If lang-annotated ep is inside selected play-data ep copy selected ep
43 |         for i in inside_ep:
44 |             split_lang_data["language"]["ann"].append(lang_data["language"]["ann"][i])
45 |             split_lang_data["language"]["task"].append(lang_data["language"]["task"][i])
46 |             split_lang_data["language"]["emb"].append(lang_data["language"]["emb"][i])
47 |             split_lang_data["info"]["indx"].append(lang_data["info"]["indx"][i])
48 | 
49 |     split_lang_data["language"]["emb"] = np.array(split_lang_data["language"]["emb"])
50 |     return split_lang_data
51 | 


--------------------------------------------------------------------------------
/hulc2/utils/data_visualization.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | import sys
 4 | 
 5 | from calvin.utils.utils import get_git_commit_hash, get_last_checkpoint, print_system_env_info
 6 | import hydra
 7 | import numpy
 8 | from omegaconf import DictConfig, ListConfig, OmegaConf
 9 | from pytorch_lightning import seed_everything, Trainer
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | from matplotlib.animation import ArtistAnimation
14 | import matplotlib.pyplot as plt
15 | import numpy as np
16 | 
17 | 
18 | def visualize(data):
19 |     seq_img = data[1][0][0].numpy()
20 |     title = data[4][0]
21 |     s, c, h, w = seq_img.shape
22 |     seq_img = np.transpose(seq_img, (0, 2, 3, 1))
23 |     imgs = []
24 |     fig = plt.figure()
25 |     for j in range(s):
26 |         # imgRGB = seq_img[j].astype(int)
27 |         imgRGB = seq_img[j]
28 |         imgRGB = (imgRGB - imgRGB.min()) / (imgRGB.max() - imgRGB.min())
29 |         img = plt.imshow(imgRGB, animated=True)
30 |         imgs.append([img])
31 |     anim = ArtistAnimation(fig, imgs, interval=50)
32 |     plt.title(title)
33 |     plt.show()
34 | 
35 | 
36 | @hydra.main(config_path="../../conf", config_name="default.yaml")
37 | def train(cfg: DictConfig) -> None:
38 |     # sets seeds for numpy, torch, python.random and PYTHONHASHSEED.
39 |     seed_everything(cfg.seed)
40 |     data_module = hydra.utils.instantiate(cfg.dataset, num_workers=0)
41 |     data_module.setup()
42 |     train = data_module.train_dataloader()
43 |     dataset = train["lang"]
44 |     logger.info(f"Dataset Size: {len(dataset)}")
45 |     for i, lang in enumerate(dataset):
46 |         logger.info(f"Element : {i}")
47 |         visualize(lang)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     train()
52 | 


--------------------------------------------------------------------------------
/hulc2/utils/dataset_pipeline.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "Running from src: $1"
 3 | echo "Output to: $2"
 4 | 
 5 | # Job to perform
 6 | # source ~/.bashrc
 7 | # conda activate $1
 8 | # srun python ${@:2}
 9 | 
10 | python preprocess_real_data.py --dataset_root $1 --output_dir $2_processed
11 | python render_low_freq.py --dataset_root $2_processed --output_dir  $2_15hz
12 | python split_dataset.py --dataset_root $2_15hz
13 | 


--------------------------------------------------------------------------------
/hulc2/utils/dataset_task_statistics.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | import logging
 3 | 
 4 | import hydra
 5 | from omegaconf import DictConfig
 6 | from pytorch_lightning import seed_everything
 7 | from tqdm import tqdm
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | def count_tasks(batch, env, tasks, task_counter):
13 |     state_obs, rgb_obs, depth_obs, actions, _, reset_info, idx = batch
14 |     batch_size = state_obs.shape[0]
15 |     for i in range(batch_size):
16 |         # reset env to state of last step in the episode (goal state)
17 |         env.reset(reset_info, i, -1)
18 |         goal_info = env.get_info()
19 |         # reset env to state of first step in the episode
20 |         env.reset(reset_info, i, 0)
21 |         start_info = env.get_info()
22 |         # check if task was achieved in sequence
23 |         task_info = tasks.get_task_info(start_info, goal_info)
24 |         task_counter += Counter(task_info)
25 | 
26 | 
27 | @hydra.main(config_path="../../conf", config_name="config")
28 | def compute_dataset_statistics(cfg: DictConfig) -> None:
29 |     """"""
30 |     seed_everything(cfg.seed)
31 | 
32 |     # since we don't use the trainer during inference, manually set up datamodule
33 |     data_module = hydra.utils.instantiate(cfg.dataset, batch_size=32, num_workers=4)
34 |     data_module.prepare_data()
35 |     data_module.setup()
36 |     train_dataloader = data_module.train_dataloader()
37 |     val_dataloader = data_module.val_dataloader()
38 | 
39 |     env = hydra.utils.instantiate(cfg.rollout.env_cfg, train_dataloader.dataset.dataset_loader, "cpu")
40 |     tasks = hydra.utils.instantiate(cfg.rollout.task_cfg)
41 | 
42 |     task_counter = Counter()  # type: ignore
43 |     logger.info(
44 |         f"training dataset with {len(train_dataloader.dataset.dataset_loader.max_batched_length_per_demo)} "
45 |         f"episodes and {len(train_dataloader.dataset.dataset_loader.episode_lookup)} frames"
46 |     )
47 | 
48 |     for batch in tqdm(train_dataloader):
49 |         count_tasks(batch, env, tasks, task_counter)
50 |     logger.info(f"training tasks: {task_counter}")
51 | 
52 |     task_counter = Counter()
53 |     logger.info(
54 |         f"training dataset with {len(val_dataloader.dataset.dataset_loader.max_batched_length_per_demo)} "
55 |         f"episodes and {len(val_dataloader.dataset.dataset_loader.episode_lookup)} frames"
56 |     )
57 |     for batch in tqdm(val_dataloader):
58 |         count_tasks(batch, env, tasks, task_counter)
59 |     logger.info(f"validation tasks: {task_counter}")
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     compute_dataset_statistics()
64 | 


--------------------------------------------------------------------------------
/hulc2/utils/distributions.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | from typing import Union
 3 | 
 4 | import torch
 5 | from torch.distributions import Independent, Normal, OneHotCategoricalStraightThrough  # type: ignore
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | DiscState = namedtuple("DiscState", ["logit"])
10 | ContState = namedtuple("ContState", ["mean", "std"])
11 | 
12 | State = Union[DiscState, ContState]
13 | 
14 | 
15 | class Distribution:
16 |     def __init__(self, **kwargs):
17 |         self.dist = kwargs.get("dist")
18 |         assert self.dist == "discrete" or self.dist == "continuous"
19 |         if self.dist == "discrete":
20 |             self.category_size = kwargs.get("category_size")
21 |             self.class_size = kwargs.get("class_size")
22 | 
23 |     def get_dist(self, state):
24 |         if self.dist == "discrete":
25 |             shape = state.logit.shape
26 |             logits = torch.reshape(state.logit, shape=(*shape[:-1], self.category_size, self.class_size))
27 |             return Independent(OneHotCategoricalStraightThrough(logits=logits), 1)
28 |         elif self.dist == "continuous":
29 |             return Independent(Normal(state.mean, state.std), 1)
30 | 
31 |     def detach_state(self, state):
32 |         if self.dist == "discrete":
33 |             return DiscState(state.logit.detach())
34 |         elif self.dist == "continuous":
35 |             return ContState(state.mean.detach(), state.std.detach())
36 | 
37 |     def sample_latent_plan(self, distribution):
38 |         sampled_plan = distribution.sample()
39 |         if self.dist == "discrete":
40 |             sampled_plan = torch.flatten(sampled_plan, start_dim=-2, end_dim=-1)
41 |         return sampled_plan
42 | 
43 |     def build_state(self, hidden_size, plan_features):
44 |         fc_state = []
45 |         if self.dist == "discrete":
46 |             fc_state += [nn.Linear(hidden_size, plan_features)]
47 |         elif self.dist == "continuous":
48 |             fc_state += [nn.Linear(hidden_size, 2 * plan_features)]
49 |         return nn.Sequential(*fc_state)
50 | 
51 |     def forward_dist(self, x):
52 |         if self.dist == "discrete":
53 |             prior_logit = x
54 |             state = DiscState(prior_logit)  # type: State
55 |         elif self.dist == "continuous":
56 |             mean, var = torch.chunk(x, 2, dim=-1)
57 |             min_std = 0.0001
58 |             std = F.softplus(var) + min_std
59 |             state = ContState(mean, std)
60 |         return state
61 | 


--------------------------------------------------------------------------------
/hulc2/utils/kl_callbacks.py:
--------------------------------------------------------------------------------
 1 | from pytorch_lightning import Callback, LightningModule, Trainer
 2 | import torch
 3 | 
 4 | 
 5 | def sigmoid(scale: float, shift: float, x: int) -> float:
 6 |     return torch.sigmoid(torch.Tensor([(x - shift) / (scale / 12)])).item()
 7 | 
 8 | 
 9 | class KLSchedule(Callback):
10 |     """
11 |     Base class for KL Annealing
12 |     """
13 | 
14 |     def __init__(self, start_epoch: int, end_epoch: int, max_kl_beta: float):
15 |         self.start_epoch = start_epoch
16 |         self.end_epoch = end_epoch
17 |         self.max_kl_beta = max_kl_beta
18 | 
19 |     def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
20 |         epoch = pl_module.current_epoch
21 |         kl_beta = self._anneal_fn(epoch)
22 |         pl_module.set_kl_beta(kl_beta)  # type: ignore
23 | 
24 |     def _anneal_fn(self, epoch):
25 |         raise NotImplementedError
26 | 
27 | 
28 | class KLConstantSchedule(KLSchedule):
29 |     def __init__(self):
30 |         pass
31 | 
32 |     def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
33 |         pass
34 | 
35 |     def _anneal_fn(self, epoch: int) -> None:
36 |         pass
37 | 
38 | 
39 | class KLSigmoidSchedule(KLSchedule):
40 |     def _anneal_fn(self, epoch: int) -> float:
41 |         if epoch < self.start_epoch:
42 |             kl_beta = 0.0
43 |         elif epoch > self.end_epoch:
44 |             kl_beta = self.max_kl_beta
45 |         else:
46 |             scale = self.end_epoch - self.start_epoch
47 |             shift = (self.end_epoch + self.start_epoch) / 2
48 |             kl_beta = sigmoid(scale=scale, shift=shift, x=epoch) * self.max_kl_beta
49 |         return kl_beta
50 | 
51 | 
52 | class KLLinearSchedule(KLSchedule):
53 |     def _anneal_fn(self, epoch: int) -> float:
54 |         if epoch < self.start_epoch:
55 |             kl_beta = 0.0
56 |         elif epoch > self.end_epoch:
57 |             kl_beta = self.max_kl_beta
58 |         else:
59 |             kl_beta = self.max_kl_beta * (epoch - self.start_epoch) / (self.end_epoch - self.start_epoch)
60 |         return kl_beta
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     import matplotlib
65 |     import matplotlib.pyplot as plt
66 | 
67 |     matplotlib.use("TkAgg")
68 |     import numpy as np
69 | 
70 |     kl = KLLinearSchedule(10, 50, 0.1)
71 |     x = np.arange(200)
72 |     y = [kl._anneal_fn(i) for i in x]
73 |     plt.plot(x, y)
74 | 
75 |     kl2 = KLSigmoidSchedule(10, 50, 0.1)
76 |     x = np.arange(200)
77 |     y = [kl2._anneal_fn(i) for i in x]
78 |     plt.plot(x, y)
79 | 
80 |     plt.show()
81 | 


--------------------------------------------------------------------------------
/hulc2/utils/real_world_dataset_pipeline.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "Running from src: $1"
 3 | echo "Output to: $2"
 4 | 
 5 | # Job to perform
 6 | # source ~/.bashrc
 7 | # conda activate $1
 8 | # srun python ${@:2}
 9 | 
10 | python preprocess_real_data.py --dataset_root $1 --output_dir $2_processed
11 | python render_low_freq.py --dataset_root $2_processed --output_dir  $2_15hz
12 | python split_dataset.py --dataset_root $2_15hz
13 | 


--------------------------------------------------------------------------------
/hulc2/utils/relabel_with_new_lang_model.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pathlib import Path
 3 | from typing import Dict
 4 | 
 5 | import hydra
 6 | import numpy as np
 7 | from omegaconf import DictConfig, OmegaConf
 8 | import torch
 9 | 
10 | import hulc2
11 | 
12 | """This script allows for re-annotating video sequences of PlayData.
13 |    Parameters:
14 |         · +path=/path/to/current/auto_lang_ann.npy
15 |         · +name_folder=name_to_new_annotations
16 |    New annotations sampling from 'annotations=expert' defined in expert.yaml
17 |    NLP model selection:
18 |         · model.nlp_model=mini -> 'paraphrase-MiniLM-L6-v2'
19 |         · model.nlp_model=multi -> 'paraphrase-multilingual-mpnet-base-v2'
20 |         · model.nlp_model=mpnet -> 'paraphrase-mpnet-base-v2'
21 | """
22 | 
23 | 
24 | @hydra.main(config_path="../../conf", config_name="lang_ann.yaml")
25 | def main(cfg: DictConfig) -> None:
26 |     print("Loading data")
27 |     path = Path(cfg.path)
28 |     data = np.load(path, allow_pickle=True).reshape(-1)[0]
29 |     if "training" in cfg.path:
30 |         print("using training instructions...")
31 |         task_ann = cfg.train_instructions
32 |     else:
33 |         print("using validation instructions...")
34 |         task_ann = cfg.val_instructions
35 |     if cfg.reannotate:
36 |         print("Re-annotating sequences...")
37 |         data["language"]["ann"] = [
38 |             task_ann[task][np.random.randint(len(task_ann[task]))] for task in data["language"]["task"]
39 |         ]
40 |     print("Loading Language Model")
41 |     model = hydra.utils.instantiate(cfg.model)
42 |     print(f"Computing Embeddings with Model --> {cfg.model}")
43 |     data["language"]["emb"] = model(data["language"]["ann"]).cpu().numpy()
44 |     print("Saving data")
45 |     save_path = path.parent / ".." / cfg.name_folder
46 |     save_path.mkdir(exist_ok=True)
47 |     np.save(save_path / "auto_lang_ann.npy", data)
48 | 
49 |     if "validation" in cfg.path:
50 |         embeddings: Dict = {}
51 |         for task, ann in cfg.val_instructions.items():
52 |             embeddings[task] = {}
53 |             language_embedding = model(list(ann))
54 |             embeddings[task]["emb"] = language_embedding.cpu().numpy()
55 |             embeddings[task]["ann"] = ann
56 |         np.save(save_path / "embeddings", embeddings)
57 |         print("Done saving val language embeddings for Rollouts !")
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     main()
62 | 


--------------------------------------------------------------------------------
/hulc2/utils/tensor_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import numpy as np
 4 | from scipy.spatial.transform.rotation import Rotation as R
 5 | import torch
 6 | from torch.autograd import Variable
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def unravel_idx(indices, shape):
12 |     coord = []
13 |     for dim in reversed(shape):
14 |         coord.append(indices % dim)
15 |         indices = indices // dim
16 | 
17 |     coord = np.stack(coord[::-1], axis=-1)
18 |     return coord
19 | 
20 | 
21 | def calc_cnn_out_size(in_size, k, p=0, s=1):
22 |     out_size = ((in_size + 2 * p - k) / s) + 1
23 |     return int(out_size)
24 | 
25 | 
26 | def np_quat_to_scipy_quat(quat):
27 |     """wxyz to xyzw"""
28 |     return np.array([quat.x, quat.y, quat.z, quat.w])
29 | 
30 | 
31 | def pos_orn_to_matrix(pos, orn):
32 |     """
33 |     :param pos: np.array of shape (3,)
34 |     :param orn: np.array of shape (4,) -> quaternion xyzw
35 |                 np.quaternion -> quaternion wxyz
36 |                 np.array of shape (3,) -> euler angles xyz
37 |     :return: 4x4 homogeneous transformation
38 |     """
39 |     mat = np.eye(4)
40 |     if isinstance(orn, np.quaternion):
41 |         orn = np_quat_to_scipy_quat(orn)
42 |         mat[:3, :3] = R.from_quat(orn).as_matrix()
43 |     elif len(orn) == 4:
44 |         mat[:3, :3] = R.from_quat(orn).as_matrix()
45 |     elif len(orn) == 3:
46 |         mat[:3, :3] = R.from_euler("xyz", orn).as_matrix()
47 |     mat[:3, 3] = pos
48 |     return mat
49 | 
50 | 
51 | def tt(x, device):
52 |     if isinstance(x, dict):
53 |         dict_of_list = {}
54 |         for key, val in x.items():
55 |             dict_of_list[key] = Variable(torch.from_numpy(val).float().to(device), requires_grad=False)
56 |         return dict_of_list
57 |     else:
58 |         return Variable(torch.from_numpy(x).float().to(device), requires_grad=False)
59 | 
60 | 
61 | def torch_to_numpy(x):
62 |     return x.detach().cpu().numpy()
63 | 


--------------------------------------------------------------------------------
/hulc2/utils/visualizations.py:
--------------------------------------------------------------------------------
 1 | # Force matplotlib to not use any Xwindows backend.
 2 | import matplotlib
 3 | import numpy as np
 4 | from pytorch_lightning.loggers import WandbLogger
 5 | import torch
 6 | import wandb
 7 | 
 8 | matplotlib.use("Agg")
 9 | import matplotlib.pyplot as plt
10 | 
11 | 
12 | def visualize_temporal_consistency(max_batched_length_per_demo, gpus, sampled_plans, all_idx, step, logger, prefix=""):
13 |     """compute t-SNE plot of embeddings os a task to visualize temporal consistency"""
14 |     labels = []
15 |     for demo in max_batched_length_per_demo:
16 |         labels = np.concatenate((labels, np.arange(demo) / float(demo)), axis=0)
17 |     # because with ddp, data doesn't come ordered anymore
18 |     labels = labels[torch.flatten(all_idx).cpu()]
19 |     colors = [plt.cm.Spectral(y_i) for y_i in labels]
20 |     assert sampled_plans.shape[0] == len(labels), "plt X shape {}, label len {}".format(
21 |         sampled_plans.shape[0], len(labels)
22 |     )
23 | 
24 |     from MulticoreTSNE import MulticoreTSNE as TSNE
25 | 
26 |     x_tsne = TSNE(perplexity=40, n_jobs=8).fit_transform(sampled_plans.cpu())
27 | 
28 |     plt.close("all")
29 |     fig, ax = plt.subplots()
30 |     _ = ax.scatter(x_tsne[:, 0], x_tsne[:, 1], c=colors, cmap=plt.cm.Spectral)
31 |     fig.suptitle("Temporal Consistency of Latent space")
32 |     ax.axis("off")
33 |     if isinstance(logger, WandbLogger):
34 |         logger.experiment.log({prefix + "latent_embedding": wandb.Image(fig)})
35 |     else:
36 |         logger.experiment.add_figure(prefix + "latent_embedding", fig, global_step=step)
37 | 


--------------------------------------------------------------------------------
/hulc2/utils/visualize_calvin_dataset.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | from pathlib import Path
 3 | 
 4 | import cv2
 5 | import numpy as np
 6 | 
 7 | if __name__ == "__main__":
 8 |     parser = ArgumentParser(description="Interactive visualization of CALVIN dataset")
 9 |     parser.add_argument("path", type=str, help="Path to dir containing scene_info.npy")
10 |     parser.add_argument("-d", "--data", nargs="*", default=["rgb_static", "rgb_gripper"], help="Data to visualize")
11 |     args = parser.parse_args()
12 | 
13 |     if not Path(args.path).is_dir():
14 |         print(f"Path {args.path} is either not a directory, or does not exist.")
15 |         exit()
16 | 
17 |     indices = next(iter(np.load(f"{args.path}/scene_info.npy", allow_pickle=True).item().values()))
18 |     indices = list(range(indices[0], indices[1] + 1))
19 | 
20 |     annotations = np.load(f"{args.path}/lang_annotations/auto_lang_ann.npy", allow_pickle=True).item()
21 |     annotations = list(zip(annotations["info"]["indx"], annotations["language"]["ann"]))
22 | 
23 |     idx = 0
24 |     ann_idx = -1
25 | 
26 |     while True:
27 |         t = np.load(f"{args.path}/episode_{indices[idx]:07d}.npz", allow_pickle=True)
28 | 
29 |         for d in args.data:
30 |             if d not in t:
31 |                 print(f"Data {d} cannot be found in transition")
32 |                 continue
33 | 
34 |             cv2.imshow(d, t[d][:, :, ::-1])
35 | 
36 |         for n, ((low, high), ann) in enumerate(annotations):
37 |             if indices[idx] >= low and indices[idx] <= high:
38 |                 if n != ann_idx:
39 |                     print(f"{ann}")
40 |                     ann_idx = n
41 | 
42 |         key = cv2.waitKey(0)
43 |         if key == ord("q"):
44 |             break
45 |         elif key == 83:  # Right arrow
46 |             idx = (idx + 1) % len(indices)
47 |         elif key == 81:  # Left arrow
48 |             idx = (len(indices) + idx - 1) % len(indices)
49 |         else:
50 |             print(f'Unrecognized keycode "{key}"')
51 | 


--------------------------------------------------------------------------------
/hulc2/utils/visualize_real_data.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | 
 4 | import cv2
 5 | import numpy as np
 6 | import tqdm
 7 | 
 8 | 
 9 | def normalize_depth(img):
10 |     img_mask = img == 0
11 |     istats = (np.min(img[img > 0]), np.max(img))
12 |     imrange = (img.astype("float32") - istats[0]) / (istats[1] - istats[0])
13 |     imrange[img_mask] = 0
14 |     imrange = 255.0 * imrange
15 |     imsz = imrange.shape
16 |     nchan = 1
17 |     if len(imsz) == 3:
18 |         nchan = imsz[2]
19 |     imgcanvas = np.zeros((imsz[0], imsz[1], nchan), dtype="uint8")
20 |     imgcanvas[0 : imsz[0], 0 : imsz[1]] = imrange.reshape((imsz[0], imsz[1], nchan))
21 |     return imgcanvas
22 | 
23 | 
24 | # Ger valid numpy files with raw data
25 | def get_files(path, extension, recursive=False):
26 |     if not os.path.isdir(path):
27 |         print("path does not exist: %s" % path)
28 |     search_str = "/*.%s" % extension if not recursive else "**/*.%s" % extension
29 |     files = glob.glob(path + search_str)
30 |     if not files:
31 |         print("No *.%s files found in %s" % (extension, path))
32 |     files.sort()
33 |     return files
34 | 
35 | 
36 | def viz_data(data_dir):
37 |     """Visualize teleop data recorded with Panda robot and check actions are valid"""
38 |     files = get_files(data_dir, "npz", recursive=True)  # Sorted files
39 |     # Remove camera calibration npz from iterable files
40 |     files = [f for f in files if "camera_info.npz" not in f]
41 | 
42 |     for idx, filename in enumerate(tqdm.tqdm(files)):
43 |         data = np.load(filename, allow_pickle=True)
44 |         if data is None:
45 |             continue  # Skip file
46 | 
47 |         new_size = (400, 400)
48 |         for key in ["rgb_static", "depth_static", "rgb_gripper", "depth_gripper"]:
49 |             img = cv2.resize(data[key], new_size)
50 |             if "rgb" in key:
51 |                 cv2.imshow(key, img[:, :, ::-1])
52 |             else:
53 |                 img2 = normalize_depth(img)
54 |                 img2 = cv2.applyColorMap(img2, cv2.COLORMAP_JET)
55 |                 cv2.imshow(key, img2)
56 |         cv2.waitKey(1)
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     data_dir = "/tmp/test_dataset"
61 |     viz_data(data_dir)
62 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd calvin_env/tacto
 4 | pip install -e .
 5 | cd ..
 6 | pip install -e .
 7 | cd ..
 8 | pip install -e .
 9 | cd r3m
10 | pip install -e .
11 | 


--------------------------------------------------------------------------------
/media/hulc2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mees/hulc2/56e51106a84080a93a12bdf232ca6fbb4303f01a/media/hulc2.gif


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | # https://github.com/psf/black
 3 | line-length = 120
 4 | target-version = ["py38"]
 5 | exclude = "(.eggs|.git|.hg|.mypy_cache|.nox|.tox|.venv|.svn|_build|buck-out|build|dist)"
 6 | 
 7 | [tool.isort]
 8 | profile = "black"
 9 | line_length = 120
10 | force_sort_within_sections = "True"
11 | order_by_type = "False"
12 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | black
2 | flake8
3 | isort
4 | pre-commit
5 | mypy
6 | pytest
7 | pytest-cov
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cmake
 2 | wheel
 3 | numpy>1.2
 4 | hydra-core==1.1.1
 5 | hydra-colorlog
 6 | matplotlib
 7 | opencv-python
 8 | omegaconf
 9 | kaggle
10 | plotly
11 | ftfy
12 | pyhash
13 | pytorch-lightning==1.8.3
14 | torch==1.12.1
15 | torchvision
16 | MulticoreTSNE
17 | gitpython
18 | scipy
19 | sentence-transformers
20 | pytorch3d
21 | gym
22 | moviepy
23 | tqdm
24 | termcolor
25 | wandb
26 | pypng
27 | segmentation-models-pytorch
28 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Setup hulc2 installation."""
 4 | 
 5 | from os import path as op
 6 | import re
 7 | 
 8 | from setuptools import find_packages, setup
 9 | 
10 | 
11 | def _read(f):
12 |     return open(op.join(op.dirname(__file__), f)).read() if op.exists(f) else ""
13 | 
14 | 
15 | _meta = _read("hulc2/__init__.py")
16 | 
17 | 
18 | def find_meta(_meta, string):
19 |     l_match = re.search(r"^" + string + r'\s*=\s*"(.*)"', _meta, re.M)
20 |     if l_match:
21 |         return l_match.group(1)
22 |     raise RuntimeError(f"Unable to find {string} string.")
23 | 
24 | 
25 | install_requires = [
26 |     l for l in _read("requirements.txt").split("\n") if l and not l.startswith("#") and not l.startswith("-")
27 | ]
28 | 
29 | meta = dict(
30 |     name=find_meta(_meta, "__project__"),
31 |     version=find_meta(_meta, "__version__"),
32 |     license=find_meta(_meta, "__license__"),
33 |     description="Grounding Language with Visual Affordances over Unstructured Data",
34 |     platforms=("Any"),
35 |     zip_safe=False,
36 |     keywords="pytorch hulc2".split(),
37 |     author=find_meta(_meta, "__author__"),
38 |     author_email=find_meta(_meta, "__email__"),
39 |     url=" https://github.com/mees/hulc2",
40 |     packages=find_packages(exclude=["tests"]),
41 |     install_requires=install_requires,
42 | )
43 | 
44 | if __name__ == "__main__":
45 |     print("find_package", find_packages(exclude=["tests"]))
46 |     setup(**meta)
47 | 


--------------------------------------------------------------------------------
/setup_local.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Setup hulc2 installation."""
 4 | 
 5 | from os import path as op
 6 | import re
 7 | 
 8 | from setuptools import find_packages, setup
 9 | 
10 | 
11 | def _read(f):
12 |     return open(op.join(op.dirname(__file__), f)).read() if op.exists(f) else ""
13 | 
14 | 
15 | _meta = _read("hulc2/__init__.py")
16 | 
17 | 
18 | def find_meta(_meta, string):
19 |     l_match = re.search(r"^" + string + r'\s*=\s*"(.*)"', _meta, re.M)
20 |     if l_match:
21 |         return l_match.group(1)
22 |     raise RuntimeError(f"Unable to find {string} string.")
23 | 
24 | 
25 | meta = dict(
26 |     name=find_meta(_meta, "__project__"),
27 |     version=find_meta(_meta, "__version__"),
28 |     license=find_meta(_meta, "__license__"),
29 |     description="Grounding Language with Visual Affordances over Unstructured Data",
30 |     platforms=("Any"),
31 |     zip_safe=False,
32 |     keywords="pytorch hulc2".split(),
33 |     author=find_meta(_meta, "__author__"),
34 |     author_email=find_meta(_meta, "__email__"),
35 |     url=" https://github.com/mees/hulc2",
36 |     packages=find_packages(exclude=["tests"]),
37 | )
38 | 
39 | if __name__ == "__main__":
40 |     print("find_package", find_packages(exclude=["tests"]))
41 |     setup(**meta)
42 | 


--------------------------------------------------------------------------------
/slurm_scripts/sbatch_eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Print some information about the job to STDOUT
 3 | echo "Workingdir: $PWD";
 4 | echo "Started at $(date)";
 5 | echo "Running job $SLURM_JOB_NAME";
 6 | echo "cpus per node: $SLURM_JOB_CPUS_PER_NODE";
 7 | echo "gres: $SLURM_GRES";
 8 | echo "mem: $SLURM_MEM_PER_NODE";
 9 | echo "ntasks: $SLURM_NTASKS";
10 | echo "JID $SLURM_JOB_ID on queue $SLURM_JOB_PARTITION";
11 | 
12 | export NCCL_DEBUG=INFO
13 | export PYTHONFAULTHANDLER=1
14 | export HYDRA_FULL_ERROR=1
15 | 
16 | # Job to perform
17 | source ~/.bashrc
18 | conda activate $1
19 | srun python ${@:2}
20 | 
21 | # Print some Information about the end-time to STDOUT
22 | echo "DONE";
23 | echo "Finished at $(date)";
24 | 


--------------------------------------------------------------------------------
/slurm_scripts/sbatch_lfp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Print some information about the job to STDOUT
 3 | echo "Workingdir: $PWD";
 4 | echo "Started at $(date)";
 5 | echo "Running job $SLURM_JOB_NAME";
 6 | echo "cpus per node: $SLURM_JOB_CPUS_PER_NODE";
 7 | echo "gres: $SLURM_GRES";
 8 | echo "mem: $SLURM_MEM_PER_NODE";
 9 | echo "ntasks: $SLURM_NTASKS";
10 | echo "JID $SLURM_JOB_ID on queue $SLURM_JOB_PARTITION";
11 | 
12 | export NCCL_DEBUG=INFO
13 | export PYTHONFAULTHANDLER=1
14 | export HYDRA_FULL_ERROR=1
15 | 
16 | # Job to perform
17 | source ~/.bashrc
18 | conda activate $1
19 | timeout 23.8h srun python $2 slurm=true hydra.run.dir=$3 trainer.gpus=$4 ${@:5}
20 | 
21 | if [[ $? -eq 124 ]]; then
22 | echo "Time limit exceeded. Resubmit job.";
23 | ssh ${USER}@kis2bat3 <<ENDSSH
24 | sh $3/resume_training.sh
25 | ENDSSH
26 | fi
27 | 
28 | rm /dev/shm/train_*
29 | rm /dev/shm/val_*
30 | 
31 | # Print some Information about the end-time to STDOUT
32 | echo "DONE";
33 | echo "Finished at $(date)";
34 | 


--------------------------------------------------------------------------------
/slurm_scripts/slurm_eval.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import subprocess
 3 | import sys
 4 | 
 5 | import numpy as np
 6 | 
 7 | from hulc2.utils.utils import get_all_checkpoints
 8 | 
 9 | 
10 | def main():
11 |     """
12 |     This script calls the evaluate.sh script of the specified training_dir 8 times with different checkpoints
13 |     """
14 |     training_dir = Path(sys.argv[1])
15 | 
16 |     max_epoch = int(sys.argv[2]) if len(sys.argv) > 2 else np.inf
17 | 
18 |     checkpoints = get_all_checkpoints(training_dir)
19 |     epochs = [str(e) for chk in checkpoints if (e := int(chk.stem.split("=")[1])) <= max_epoch]
20 |     split_epochs = np.array_split(epochs, 8)
21 |     epoch_args = [",".join(arr) for arr in split_epochs if len(arr)]
22 |     for epoch_arg in epoch_args:
23 |         cmd = [(training_dir / "evaluate.sh").as_posix(), "--checkpoints", epoch_arg]
24 |         output = subprocess.check_output(cmd)
25 |         print(output.decode("utf-8"))
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     main()
30 | 


--------------------------------------------------------------------------------