├── README.md
├── RoboMD_paper.pdf
├── configs
    └── action_dicts.py
├── env
    ├── can_env.py
    ├── latent_action_env.py
    ├── lift_env.py
    ├── square_env.py
    ├── stack_env.py
    └── thread_env.py
├── file.png
├── images
    ├── fig2.png
    ├── fig3.png
    ├── fig4.png
    ├── github.png
    └── logo.png
├── index.html
├── requirements.txt
├── scripts
    └── convert_pairwise.py
├── train_continuous.py
├── train_discrete.py
├── train_embedding.py
├── utils
    ├── losses.py
    ├── robot_dataset.py
    └── vit_clip_model.py
└── videos
    ├── RSS.mp4
    ├── real_world
        ├── 1.mp4
        ├── 10.mp4
        ├── 11.mp4
        ├── 12.mp4
        ├── 13.mp4
        ├── 2.mp4
        ├── 3.mp4
        ├── 4.mp4
        ├── 5.mp4
        ├── 6.mp4
        ├── 7.mp4
        ├── 8.mp4
        └── 9.mp4
    └── sim_videos
        ├── Can
            ├── BC
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   └── 4.mp4
            ├── BCQ
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   └── 4.mp4
            ├── BCT
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   └── 4.mp4
            ├── Diff
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   └── 4.mp4
            └── HBC
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   └── 4.mp4
        ├── Lift
            ├── BC
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   ├── 5.mp4
            │   └── 6.mp4
            ├── BCQ
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   ├── 5.mp4
            │   └── 6.mp4
            ├── BCT
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   ├── 5.mp4
            │   └── 6.mp4
            ├── Diff
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   ├── 5.mp4
            │   └── 6.mp4
            └── HBC
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   ├── 5.mp4
            │   └── 6.mp4
        ├── Square
            ├── BC
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   └── 5.mp4
            ├── BCQ
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   └── 5.mp4
            ├── BCT
            │   ├── 3.mp4
            │   └── 5.mp4
            ├── Diff
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   └── 5.mp4
            └── HBC
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   └── 5.mp4
        ├── Stack
            ├── BC
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   ├── 5.mp4
            │   └── 6.mp4
            ├── BCQ
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   ├── 5.mp4
            │   └── 6.mp4
            ├── BCT
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   ├── 5.mp4
            │   └── 6.mp4
            ├── Diff
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   ├── 5.mp4
            │   └── 6.mp4
            └── HBC
            │   ├── 1.mp4
            │   ├── 2.mp4
            │   ├── 3.mp4
            │   ├── 4.mp4
            │   ├── 5.mp4
            │   └── 6.mp4
        └── Thread
            ├── BC
                ├── 1.mp4
                ├── 2.mp4
                ├── 3.mp4
                ├── 4.mp4
                └── 5.mp4
            ├── BCQ
                ├── 1.mp4
                ├── 2.mp4
                ├── 3.mp4
                ├── 4.mp4
                └── 5.mp4
            ├── BCT
                ├── 1.avi
                └── 5.avi
            ├── Diff
                ├── 1.mp4
                ├── 2.mp4
                ├── 3.mp4
                ├── 4.mp4
                └── 5.mp4
            └── HBC
                ├── 1.mp4
                ├── 2.mp4
                ├── 3.mp4
                ├── 4.mp4
                └── 5.mp4


/README.md:
--------------------------------------------------------------------------------
 1 | # From Mystery to Mastery: Failure Diagnosis for Improving Manipulation Policies
 2 | 
 3 | We introduce RoboMD a deep reinforcement learning-based framework designed to identify failure modes in robotic manipulation policies. By simulating diverse conditions and quantifying failure probabilities, RoboFail provides insights into model robustness and adaptability.
 4 | 
 5 | ## Installation
 6 | 
 7 | ### Prerequisites
 8 | Ensure you have the following dependencies installed:
 9 | - Python 3.8+
10 | - CUDA (if using GPU)
11 | - Conda (recommended for managing environments)
12 | 
13 | 
14 | ### Setting Up the Environment
15 | 1. Clone the repository:
16 |    ```bash
17 |    git clone https://github.com/Robo-MD/Robo-MD-RSS.github.io.git
18 |    cd Robo-MD-RSS.github.io
19 |    ```
20 | 
21 | 2. Create a Conda environment:
22 |    ```bash
23 |    conda create --name robomd python=3.8 -y
24 |    conda activate robomd
25 |    ```
26 | 
27 | ### Installing Dependencies
28 | #### 1. **Install robosuite**
29 |    ```bash
30 |    pip install robosuite
31 |    ```
32 | 
33 | 
34 | #### 2. **Install robomimic**
35 |    ```bash
36 |    git clone https://github.com/ARISE-Initiative/robomimic.git
37 |    cd robomimic
38 |    pip install -e .
39 |    ```
40 | 
41 | #### 3. **Additional Dependencies**
42 |    Install required Python packages:
43 |    ```bash
44 |    pip install -r requirements.txt
45 |    ```
46 | 
47 | ## Project Structure
48 | 
49 | ```
50 | ├── configs/               # Configuration files for actions and training
51 | ├── env/                   # Environment implementations
52 | ├── scripts/               
53 | ├── utils/                 # Utility functions (e.g., loss computations)
54 | ├── train_continuous.py     # Training script for continuous latent actions
55 | ├── train_discrete.py       # Training script for discrete latent actions
56 | ├── train_embedding.py      # Training script for embedding learning
57 | ├── README.md               # Project documentation
58 | ├── requirements.txt        # Required dependencies
59 | ```
60 | 
61 | ---
62 | 
63 | ## Usage
64 | 
65 | ### Training with Continuous Actions
66 | 
67 | To train an RL policy using a latent action space, run:
68 | ```bash
69 | python train_continuous.py --name <run_name> --task <task_name> --agent <path_to_agent> --rl_timesteps 3000
70 | ```
71 | Example:
72 | ```bash
73 | python train_continuous.py --name latent_rl --task lift --agent models/bc_agent.pth --rl_timesteps 50000
74 | ```
75 | 
76 | ### Training Discrete Action Policies
77 | 
78 | For training RL with a discrete action space:
79 | ```bash
80 | python train_discrete.py --name <run_name> --task <task_name> --agent <path_to_agent> --rl_timesteps 3000
81 | ```
82 | 
83 | ### Training Embeddings
84 | 
85 | To train and store known embeddings:
86 | ```bash
87 | python train_embedding.py --path <dataset_path>
88 | ```
89 | This script extracts embeddings from a dataset and stores them in an HDF5 file.
90 | 
91 | 
92 | 
93 | 
94 | 
95 | 
96 | 
97 | ## License
98 | MIT License © 2024 RoboMD Team
99 | 


--------------------------------------------------------------------------------
/RoboMD_paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/RoboMD_paper.pdf


--------------------------------------------------------------------------------
/configs/action_dicts.py:
--------------------------------------------------------------------------------
  1 | ACTION_DICTS = {
  2 |     "can": {
  3 |         0:  "Change the can color to red.",
  4 |         1:  "Change the can color to green.",
  5 |         2:  "Change the can color to blue.",
  6 |         3:  "Change the can color to grey.",
  7 |         4:  "Change the box color to green.",
  8 |         5:  "Change the box color to blue.",
  9 |         6:  "Change the box color to red.",
 10 |         7:  "Change the box color to grey.",
 11 |         8:  "Resize the box to dimensions 0.3 0.3 0.02 (Length, Breadth, Height).",
 12 |         9:  "Resize the box to dimensions 0.2 0.2 0.02 (Length, Breadth, Height).",
 13 |         10: "Resize the box to dimensions 0.1 0.1 0.02 (Length, Breadth, Height).",
 14 |         11: "Change the robot color to red.",
 15 |         12: "Change the robot color to green.",
 16 |         13: "Change the robot color to cyan.",
 17 |         14: "Change the robot color to gray.",
 18 |         15: "Change the lighting color to red.",
 19 |         16: "Change the lighting color to green.",
 20 |         17: "Change the lighting color to blue.",
 21 |         18: "Change the lighting color to gray.",
 22 |     },
 23 |     "square": {
 24 |         0:  "Change the cylinder color to red.",
 25 |         1:  "Change the cylinder color to green.",
 26 |         2:  "Change the cylinder color to blue.",
 27 |         3:  "Change the cylinder color to gray.",
 28 |         4:  "Resize the cylinder to dimensions 0.03 0.1 (Height, Radius).",
 29 |         5:  "Resize the cylinder to dimensions 0.02 0.15 (Height, Radius).",
 30 |         6:  "Resize the cylinder to dimensions 0.02 0.13 (Height, Radius).",
 31 |         7:  "Resize the cylinder to dimensions 0.03 0.08 (Height, Radius).",
 32 |         8:  "Resize the box to dimensions 0.8 0.2 0.025 (Length, Breadth, Height).",
 33 |         9:  "Resize the box to dimensions 0.2 0.8 0.025 (Length, Breadth, Height).",
 34 |         10: "Change the lighting color to red.",
 35 |         11: "Change the lighting color to green.",
 36 |         12: "Change the lighting color to blue.",
 37 |         13: "Change the lighting color to gray.",
 38 |     },
 39 |     "lift": {
 40 |         0:  "Change the cube color to red",
 41 |         1:  "Change the cube color to green",
 42 |         2:  "Change the cube color to blue",
 43 |         3:  "Change the cubr color to gray",
 44 |         4:  "Change the table color to green",
 45 |         5:  "Change the table color to blue",
 46 |         6:  "Change the table color to red",
 47 |         7:  "Change the table color to gray",
 48 |         8:  "Resize the table to dimensions 0.8 0.2 0.025 (Length, Breadth, Height).",
 49 |         9:  "Resize the table to dimensions 0.2 0.8 0.025 (Length, Breadth, Height).",
 50 |         10: "Resize the cube to dimensions 0.04 0.04 0.04 (Length, Breadth, Height).",
 51 |         11: "Resize the cube to dimensions 0.01 0.01 0.01 (Length, Breadth, Height).",
 52 |         12: "Resize the cube to dimensions 0.04 0.01 0.01 (Length, Breadth, Height).",
 53 |         13: "Change the robot color to red.",
 54 |         14: "Change the robot color to green.",
 55 |         15: "Change the robot color to cyan.",
 56 |         16: "Change the robot color to gray.",
 57 |         17: "Change the lighting color to red.",
 58 |         18: "Change the lighting color to green.",
 59 |         19: "Change the lighting color to blue.",
 60 |         20: "Change the lighting color to gray.",
 61 |     },
 62 |     "stack": {
 63 |         # same as lift, adjusting if needed
 64 |         0:  "Change the cube color to red",
 65 |         1:  "Change the cube color to green",
 66 |         2:  "Change the cube color to blue",
 67 |         3:  "Change the cubr color to gray",
 68 |         4:  "Change the table color to green",
 69 |         5:  "Change the table color to blue",
 70 |         6:  "Change the table color to red",
 71 |         7:  "Change the table color to gray",
 72 |         8:  "Resize the table to dimensions 0.8 0.2 0.025 (Length, Breadth, Height).",
 73 |         9:  "Resize the table to dimensions 0.2 0.8 0.025 (Length, Breadth, Height).",
 74 |         10: "Resize the cube to dimensions 0.04 0.04 0.04 (Length, Breadth, Height).",
 75 |         11: "Resize the cube to dimensions 0.01 0.01 0.01 (Length, Breadth, Height).",
 76 |         12: "Resize the cube to dimensions 0.04 0.01 0.01 (Length, Breadth, Height).",
 77 |         13: "Change the robot color to red.",
 78 |         14: "Change the robot color to green.",
 79 |         15: "Change the robot color to cyan.",
 80 |         16: "Change the robot color to gray.",
 81 |         17: "Change the lighting color to red.",
 82 |         18: "Change the lighting color to green.",
 83 |         19: "Change the lighting color to blue.",
 84 |         20: "Change the lighting color to gray.",
 85 |     },
 86 |     "thread": {
 87 |         0:  "Resize the base of the needle to dimensions 0.025 0.02 0.02 (Length, Breadth, Height).",
 88 |         1:  "Resize the base of the needle to dimensions 0.025 0.02 0.03 (Length, Breadth, Height).",
 89 |         2:  "Resize the base of the needle to dimensions 0.02 0.025 0.02 (Length, Breadth, Height).",
 90 |         3:  "Resize the base of the needle to dimensions 0.02 0.02 0.025 (Length, Breadth, Height).",
 91 |         4:  "Change the needle color to red.",
 92 |         5:  "Change the needle color to green.",
 93 |         6:  "Change the needle color to blue.",
 94 |         7:  "Change the needle color to gray.",
 95 |         8:  "Resize the table to dimensions 0.3 0.3 0.02 (Length, Breadth, Height).",
 96 |         9:  "Resize the table to dimensions 0.2 0.2 0.02 (Length, Breadth, Height).",
 97 |         10: "Resize the table to dimensions 0.5 0.5 0.02 (Length, Breadth, Height).",
 98 |         11: "Change the robot gripper color to red.",
 99 |         12: "Change the robot gripper color to green.",
100 |         13: "Change the robot gripper color to cyan.",
101 |         14: "Change the robot gripper color to gray.",
102 |         15: "Change the lighting color to red.",
103 |         16: "Change the lighting color to green.",
104 |         17: "Change the lighting color to blue.",
105 |         18: "Change the lighting color to gray.",
106 |     }
107 | }


--------------------------------------------------------------------------------
/env/can_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import cv2
  4 | import xml.etree.ElementTree as ET
  5 | from copy import deepcopy
  6 | from PIL import Image
  7 | import torch
  8 | 
  9 | # Robomimic / stable-baselines3 / other imports as needed
 10 | # e.g., from stable_baselines3 import PPO
 11 | 
 12 | class CanEnv(gym.Env):
 13 |     """
 14 |     A custom Gym environment for controlling a robot environment
 15 |     and training with stable-baselines3.
 16 |     """
 17 |     def __init__(self, env, policy, rollout_horizon, video_record=False, collect_data=False, save_path="", device="cpu"):
 18 |         super(CanEnv, self).__init__()
 19 | 
 20 |         self.env = env
 21 |         self.policy = policy
 22 |         self.rollout_horizon = rollout_horizon
 23 |         self.video_record = video_record
 24 |         self.collect_data = collect_data
 25 |         self.save_path = save_path
 26 |         self.device = device
 27 | 
 28 |         # Action space: discrete with 19 possible actions
 29 |         self.action_space = gym.spaces.Discrete(19)
 30 | 
 31 |         # Observation space: simple 3 x 84 x 84 image
 32 |         # (You can adjust this to match your actual environment)
 33 |         self.observation_space = gym.spaces.Box(
 34 |             low=0, high=255, shape=(3, 84, 84), dtype=np.uint8
 35 |         )
 36 | 
 37 |         # Internal trackers
 38 |         self.steps = 0
 39 |         self.obs = None
 40 |         self.video_writer = None
 41 |         self.is_sequence = False  # If obs has extra dimension
 42 | 
 43 |         # Initial environment reset
 44 |         self.obs = self.env.reset()
 45 |         if len(self.obs["agentview_image"].shape) == 4: 
 46 |             # If there's a batch dimension
 47 |             self.is_sequence = True
 48 | 
 49 |     def reset(self):
 50 |         print("Resetting the environment...")
 51 |         st = self.env.get_state()
 52 | 
 53 |         # Parse and modify the XML as needed
 54 |         xml_str = st["model"]
 55 |         root = ET.fromstring(xml_str)
 56 | 
 57 |         # Example: restore some default colors or remove them
 58 |         for geom in root.findall(".//geom"):
 59 |             if "robot" in geom.attrib.get("name", ""):
 60 |                 if "rgba" in geom.attrib:
 61 |                     del geom.attrib["rgba"]
 62 | 
 63 |         for light in root.findall(".//light"):
 64 |             # Example: reset lighting to some default
 65 |             light.set("diffuse", "1 1 1")
 66 |             light.set("specular", "0.1 0.1 0.1")
 67 | 
 68 |         new_xml_str = ET.tostring(root, encoding="unicode")
 69 |         st["model"] = new_xml_str
 70 | 
 71 |         # Actually reset
 72 |         self.obs = self.env.reset_to(st)
 73 |         self.env.reset()
 74 | 
 75 |         # If recording video, initialize the VideoWriter
 76 |         if self.video_record:
 77 |             self.steps += 1
 78 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
 79 |             video_filename = f"episode_{self.steps}.avi"
 80 |             fourcc = cv2.VideoWriter_fourcc(*"MJPG")
 81 |             (h, w) = rendered_img.shape[:2]
 82 |             self.video_writer = cv2.VideoWriter(video_filename, fourcc, 30, (w, h))
 83 |             self._save_rendered_frame(rendered_img)
 84 | 
 85 |         if self.is_sequence:
 86 |             return self.obs["agentview_image"][0]
 87 | 
 88 |         return self.obs["agentview_image"]
 89 | 
 90 |     def step(self, action):
 91 |         """
 92 |         Apply one discrete action to the environment,
 93 |         change the XML accordingly, then run a short rollout.
 94 |         """
 95 |         self.steps += 1
 96 |         robot_state = self.env.get_state()
 97 | 
 98 |         # Parse XML
 99 |         xml_str = robot_state["model"]
100 |         root = ET.fromstring(xml_str)
101 | 
102 |         for geom in root.findall(".//geom"): 
103 |             # cylinder Color
104 |             if action == 0:
105 |                 if "name" in geom.attrib and geom.attrib["name"] == "Can_g0_visual":
106 |                     geom.attrib["rgba"] = "1 0 0 1" 
107 |             elif action == 1:
108 |                 if "name" in geom.attrib and geom.attrib["name"] == "Can_g0_visual": # Cube green color
109 |                     geom.attrib['rgba'] = "0 1 0 1"
110 |             elif action == 2:
111 |                 if "name" in geom.attrib and geom.attrib["name"] == "Can_g0_visual": # Cube blue color
112 |                     geom.attrib['rgba'] = "0 0 1 1"
113 |             elif action == 3:
114 |                 if "name" in geom.attrib and geom.attrib["name"] == "Can_g0_visual": 
115 |                     geom.attrib['rgba'] = "0.5 0.5 0.5 1"
116 |         
117 |         # Table Color
118 |             elif action == 4:
119 |                 if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
120 |                     geom.attrib['rgba'] = "0 1 0 1"
121 |             elif action == 5:
122 |                 if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
123 |                     geom.attrib['rgba'] = "0 0 1 1"
124 |             elif action == 6:
125 |                 if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
126 |                     geom.attrib['rgba'] = "1 0 0 1"
127 |             elif action == 7:
128 |                 if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
129 |                     geom.attrib['rgba'] = "0.7 0.7 0.7 1"
130 |         
131 |         # Table Size
132 |             elif action == 8:
133 |                 if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
134 |                     geom.attrib["size"] = "0.3 0.3 0.02"  # Modify size
135 |             elif action == 9:
136 |                 if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
137 |                     geom.attrib["size"] = "0.2 0.2 0.02"  # Modify size
138 |             elif action == 10:
139 |                 if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
140 |                     geom.attrib["size"] = "0.1 0.1 0.02"
141 |         
142 |         # Robot Color
143 |             elif action == 11:
144 |                 if "robot" in geom.attrib.get("name", "") or "robot" in geom.attrib.get("material", ""):
145 |                     geom.attrib["rgba"] = "0 0 1 1"  
146 | 
147 |             
148 |             elif action == 12:
149 |                 if "robot" in geom.attrib.get("name", "") or "robot" in geom.attrib.get("material", ""):
150 |                     geom.attrib['rgba'] = "0 1 0 1"  # Make them yellow
151 |             
152 |             elif action == 13:
153 |                 if "robot" in geom.attrib.get("name", "") or "robot" in geom.attrib.get("material", ""):
154 |                     geom.attrib['rgba'] = "0 1 1 1"  # Make them yellow
155 |             elif action == 14:
156 |                 if "robot" in geom.attrib.get("name", "") or "robot" in geom.attrib.get("material", ""):
157 |                     geom.attrib['rgba'] = "0.5 0.5 0.5 1"  # Make them yellow
158 |             
159 |             # Lighting
160 |         lights = root.findall(".//light")
161 | 
162 |         if action == 15:
163 |             for light in lights:
164 |                 r, g, b = 1, 0, 0
165 |                 light.set("diffuse", f"{r} {g} {b}")
166 |         
167 |         elif action == 16:
168 |             for light in lights:
169 |                 r, g, b = 0, 1, 0
170 |                 light.set("diffuse", f"{r} {g} {b}")
171 | 
172 |         elif action == 17:
173 |             for light in lights:
174 |                 r, g, b = 0, 0, 1
175 |                 light.set("diffuse", f"{r} {g} {b}")
176 |         
177 |         elif action == 18:
178 |             for light in lights:
179 |                 r, g, b = 0.5, 0.5, 0.5
180 |                 light.set("diffuse", f"{r} {g} {b}")
181 | 
182 |         # Update model
183 |         new_xml_str = ET.tostring(root, encoding="unicode")
184 |         robot_state["model"] = new_xml_str
185 | 
186 |         # Reset the environment to the new XML
187 |         self.obs = self.env.reset_to(robot_state)
188 | 
189 |         # If we're recording video, save the frame
190 |         if self.video_record:
191 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
192 |             self._save_rendered_frame(rendered_img)
193 | 
194 |         # Now do a short rollout for `rollout_horizon` steps
195 |         total_reward = 0.0
196 |         success = False
197 | 
198 |         for step_i in range(self.rollout_horizon):
199 |             # Query policy
200 |             with torch.no_grad():
201 |                 act = self.policy(ob=self.obs)
202 | 
203 |             # Step the environment
204 |             next_obs, r, done, _ = self.env.step(act)
205 |             total_reward += r
206 |             success = self.env.is_success()["task"]
207 | 
208 |             # Collect data if requested
209 |             if self.collect_data:
210 |                 # Save frames to a designated folder
211 |                 self._save_demo_frames(step_i, action)
212 | 
213 |             # If also recording video, save each new frame
214 |             if self.video_record:
215 |                 rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
216 |                 self._save_rendered_frame(rendered_img)
217 | 
218 |             # Break if done or success
219 |             if done or success:
220 |                 break
221 | 
222 |             self.obs = deepcopy(next_obs)
223 | 
224 |         # Write stats
225 |         stats = dict(Return=total_reward, Horizon=(step_i + 1), Success_Rate=float(success))
226 |         print(stats)
227 |         self._log_episode_stats(stats, action)
228 | 
229 |         # Terminal or not
230 |         done = True
231 |         if success:
232 |             # If success, we might do some custom logic
233 |             reward = -1
234 |             done = False
235 |         else:
236 |             reward = 1000
237 |             print("Episode Completed")
238 | 
239 |         # If done, stop video recording
240 |         if done and self.video_record and self.video_writer is not None:
241 |             self.video_writer.release()
242 |             print(f"Episode {self.steps} video saved.")
243 | 
244 |         if self.is_sequence:
245 |             return self.obs["agentview_image"][0], reward, done, {}
246 |         return self.obs["agentview_image"], reward, done, {}
247 | 
248 | 
249 |     def _save_rendered_frame(self, img_array):
250 |         """
251 |         Converts an RGB array to BGR for OpenCV and writes it to the video.
252 |         """
253 |         if img_array.dtype != np.uint8:
254 |             img_array = (img_array * 255).astype(np.uint8)
255 |         img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
256 |         if self.video_writer is not None:
257 |             self.video_writer.write(img_bgr)
258 | 
259 |     def _save_demo_frames(self, step_i, action):
260 |         """
261 |         Save frames to disk for data collection.
262 |         """
263 |         from PIL import Image
264 |         import os
265 | 
266 |         img_save_dir = os.path.join("can_rl_data", self.save_path, "demo")
267 |         step_save_dir = os.path.join(img_save_dir, f"demo{self.steps}_action_{action}")
268 |         os.makedirs(step_save_dir, exist_ok=True)
269 | 
270 |         rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
271 |         image_path = os.path.join(step_save_dir, f"frame_{step_i:04d}.png")
272 |         Image.fromarray(rendered_img).save(image_path)
273 | 
274 |     def _log_episode_stats(self, stats, action):
275 |         """
276 |         Write stats and actions to log files.
277 |         """
278 |         import os
279 | 
280 |         # Always log stats
281 |         with open(f"can_logs/{self.save_path}/episode_stats.txt", "a") as file:
282 |             file.write(str(stats) + "\n")
283 | 
284 |         # If collecting data, also log success/action
285 |         if self.collect_data:
286 |             with open(f"can_rl_data/{self.save_path}/success_rate.txt", "a") as file:
287 |                 file.write(str(stats["Success_Rate"]) + "\n")
288 |             with open(f"can_rl_data/{self.save_path}/actions.txt", "a") as file:
289 |                 file.write(str(action) + "\n")
290 | 
291 | 


--------------------------------------------------------------------------------
/env/latent_action_env.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import gym
  3 | import xml.etree.ElementTree as ET
  4 | from copy import deepcopy
  5 | import numpy as np
  6 | import cv2
  7 | import torch
  8 | 
  9 | from scipy.spatial.distance import cdist
 10 | from utils.losses import find_closest_value
 11 | 
 12 | class LatentActionEnv(gym.Env):
 13 |     """
 14 |     An environment that:
 15 |       1) Receives a 512-d float action (latent vector).
 16 |       2) Finds the nearest known embedding from 'embeddings_array'.
 17 |       3) Applies that action's XML modifications.
 18 |       4) Rolls out the robomimic policy for 'horizon' steps.
 19 |       5) Rewards are based on success/failure / distance / repeated actions, etc.
 20 | 
 21 |     Observations: 3x84x84 images from 'agentview_image'.
 22 |     """
 23 | 
 24 |     def __init__(self, env, policy, horizon, embeddings_array, values_array,
 25 |                  log_dir, task_name, video_record=False):
 26 |         super().__init__()
 27 | 
 28 |         # RL action is 512-d latent
 29 |         self.action_space = gym.spaces.Box(low=-2, high=2, shape=(512,), dtype=np.float32)
 30 |         # Observation is the image
 31 |         self.observation_space = gym.spaces.Box(low=0, high=255, shape=(3, 84, 84), dtype=np.uint8)
 32 | 
 33 |         self.env = env
 34 |         self.policy = policy
 35 |         self.horizon = horizon
 36 |         self.embeddings_array = embeddings_array
 37 |         self.values_array = values_array
 38 |         self.log_dir = log_dir
 39 |         self.task_name = task_name
 40 | 
 41 |         self.video_record = video_record
 42 |         self.video_writer = None
 43 |         self.steps = 0
 44 |         self.prev_action = -1
 45 | 
 46 |         # initial reset
 47 |         self.obs = self.env.reset()
 48 |         self.is_sequence = (len(self.obs["agentview_image"].shape) == 4)
 49 | 
 50 |     def reset(self):
 51 |         print('Resetting env')
 52 |         
 53 |         st = self.env.get_state()
 54 | 
 55 |         # Parse the XML model
 56 |         xml_str = st['model']
 57 |         root = ET.fromstring(xml_str)
 58 | 
 59 |         if self.task_name == 'lift' or self.task_name == 'stack':
 60 |         
 61 |             ##table visual size
 62 |             for geom in root.findall(".//geom"):
 63 |                 if geom.attrib['name'] == 'table_visual':
 64 |                     geom.attrib['size'] = "0.4 0.4 0.025"  #0.4 0.4 0.025
 65 | 
 66 |             #cube visual size
 67 |             for geom in root.findall(".//geom"):
 68 |                 if geom.attrib['name'] == 'cube_g0_vis':
 69 |                     geom.attrib['size'] = "0.0213203 0.0206657 0.020327"  #0.4 0.4 0.025
 70 | 
 71 |             #cube color
 72 |             for geom in root.findall(".//geom"):
 73 |                 if geom.attrib['name'] == 'cube_g0_vis':
 74 |                     geom.attrib['rgba'] = "1 0 0 1"  
 75 | 
 76 |             for geom in root.findall(".//geom"):
 77 |                 if "robot0_g" in geom.attrib['name']:  
 78 |                     if 'rgba' in geom.attrib:
 79 |                         del geom.attrib['rgba']
 80 | 
 81 |             #table color
 82 |             for geom in root.findall(".//geom"):
 83 |                 if geom.attrib['name'] == 'table_visual':
 84 |                     geom.attrib['rgba'] = "0.5 0.5 0.5 1" 
 85 |             
 86 |             for light in root.findall(".//light"):
 87 |                 # Example of restoring some typical defaults:
 88 |                 light.set("diffuse", "1 1 1")
 89 |                 light.set("specular", "0.1 0.1 0.1")
 90 |                 light.set("pos", "1 1 1.5")
 91 |                 light.set("dir", "-0.19245 -0.19245 -0.96225")
 92 |             
 93 |         elif self.task_name == 'square':
 94 |                         ##table visual size
 95 |             for geom in root.findall(".//geom"):
 96 |                 if "name" in geom.attrib and geom.attrib["name"] == "table_visual":
 97 |                     geom.attrib['size'] = "0.4 0.4 0.025"  #0.4 0.4 0.025
 98 | 
 99 | 
100 |             #cube color
101 |             for body in root.findall(".//body"):
102 |                 if body.attrib.get("name") == "peg2":
103 |                     for geom in body.findall(".//geom"):
104 |                         # Maybe match both type=cylinder AND a certain size
105 |                         if geom.attrib.get("type") == "cylinder" and geom.attrib.get("size") == "0.02 0.1":
106 |                 
107 |                             if "material" in geom.attrib:
108 |                                 del geom.attrib["material"]
109 |                             #geom.set("rgba", "0.5 0.5 0 1") 
110 | 
111 | 
112 |             
113 |             for light in root.findall(".//light"):
114 |                 # Example of restoring some typical defaults:
115 |                 light.set("diffuse", "1 1 1")
116 |                 light.set("specular", "0.1 0.1 0.1")
117 |                 light.set("pos", "1 1 1.5")
118 |                 light.set("dir", "-0.19245 -0.19245 -0.96225")
119 | 
120 |         elif self.task_name == 'can':
121 |             for geom in root.findall(".//geom"):
122 |                 if "robot" in geom.attrib.get("name", "") or "robot" in geom.attrib.get("material", ""): 
123 |                     if 'rgba' in geom.attrib:
124 |                         del geom.attrib['rgba']
125 | 
126 |             
127 |             for light in root.findall(".//light"):
128 |                 # Example of restoring some typical defaults:
129 |                 light.set("diffuse", "1 1 1")
130 |                 light.set("specular", "0.1 0.1 0.1")
131 |                 light.set("pos", "1 1 1.5")
132 |                 light.set("dir", "-0.19245 -0.19245 -0.96225")
133 | 
134 |         
135 |         new_xml_str = ET.tostring(root, encoding='unicode')
136 | 
137 |         # Update the model in the state
138 |         st['model'] = new_xml_str
139 | 
140 |         self.obs = self.env.reset_to(st)
141 | 
142 |         self.env.reset() 
143 | 
144 |         if self.video_record:
145 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
146 |             self.save_rendered_frame(rendered_img)
147 | 
148 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)  # Set width and height
149 |             original_height, original_width = rendered_img.shape[:2]
150 | 
151 |             #Initialize the VideoWriter with the rendered frame dimensions
152 |             video_filename = f'episode_{self.steps}.avi'
153 |             self.video_writer = cv2.VideoWriter(
154 |                 video_filename, cv2.VideoWriter_fourcc(*'M', 'J', 'P', 'G'), 30, (original_width, original_height)
155 |             )
156 | 
157 |         self.policy.start_episode()
158 |         if self.is_sequence:
159 |             return self.obs["agentview_image"][0]
160 |         
161 |         return self.obs["agentview_image"]
162 |     
163 |     def step(self, action):
164 |         self.steps+=1
165 | 
166 |         # Find the index of the closest action
167 |         action, penalty = find_closest_value(action, self.embeddings_array, self.values_array)
168 | 
169 |         print(f'Closest known action : {action}')
170 |         with open(f"{self.log_dir}/actions.txt", "a") as file:
171 |             file.write(str(action) + '\n')
172 | 
173 |         same_action_penalty = 0
174 |         if self.prev_action == action:
175 |             same_action_penalty = 100
176 |             self.prev_action = action
177 |         
178 |         if self.steps == 1:
179 |             self.prev_action = action
180 | 
181 |         robot_state = self.env.get_state()
182 | 
183 |         # Parse the XML model
184 |         xml_str = robot_state['model']
185 |         root = ET.fromstring(xml_str)
186 | 
187 | 
188 |         if self.task_name == 'lift' or self.task_name == 'stack':
189 |         
190 |             for geom in root.findall(".//geom"): 
191 |                 # Cube Color
192 |                 if action == 0:
193 |                     if geom.attrib['name'] == 'cube_g0_vis': # Cube red color
194 |                         geom.attrib['rgba'] = "1 0 0 1"  
195 |                 elif action == 1:
196 |                     if geom.attrib['name'] == 'cube_g0_vis': # Cube green color
197 |                         geom.attrib['rgba'] = "0 1 0 1"
198 |                 elif action == 2:
199 |                     if geom.attrib['name'] == 'cube_g0_vis': # Cube blue color
200 |                         geom.attrib['rgba'] = "0 0 1 1"
201 |                 elif action == 3:
202 |                     if geom.attrib['name'] == 'cube_g0_vis': 
203 |                         geom.attrib['rgba'] = "0.5 0.5 0.5 1"
204 |             
205 |             # Table Color
206 |                 elif action == 4:
207 |                     if geom.attrib['name'] == 'table_visual': # Table green color
208 |                         geom.attrib['rgba'] = "0 1 0 1"
209 |                 elif action == 5:
210 |                     if geom.attrib['name'] == 'table_visual': # Table blue color
211 |                         geom.attrib['rgba'] = "0 0 1 1"
212 |                 elif action == 6:
213 |                     if geom.attrib['name'] == 'table_visual': # Table red color
214 |                         geom.attrib['rgba'] = "1 0 0 1"
215 |                 elif action == 7:
216 |                     if geom.attrib['name'] == 'table_visual': # Table default color
217 |                         geom.attrib['rgba'] = "0.7 0.7 0.7 1"
218 |             
219 |             # Table Size
220 |                 elif action == 8:
221 |                     if geom.attrib['name'] == 'table_visual': 
222 |                         geom.attrib['size'] = "0.8 0.2 0.025"
223 |                 elif action == 9:
224 |                     if geom.attrib['name'] == 'table_visual': 
225 |                         geom.attrib['size'] = "0.2 0.8 0.025"
226 |             
227 |             # Cube Size
228 |                 elif action == 10:
229 |                     if geom.attrib['name'] == 'cube_g0_vis':
230 |                         geom.attrib['size'] = "0.04 0.04 0.04"  # enlarge the cube
231 |                 elif action == 11:
232 |                     if geom.attrib['name'] == 'cube_g0_vis':
233 |                         geom.attrib['size'] = "0.01 0.01 0.01"  # shrink the cube
234 |                 elif action == 12:
235 |                     if geom.attrib['name'] == 'cube_g0_vis':
236 |                         geom.attrib['size'] = "0.04 0.01 0.01"  
237 |             
238 |             # Robot Color
239 |                 elif action == 13:
240 |                     if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
241 |                         geom.attrib['rgba'] = "1 0 0 1"  # Make them yellow
242 |                 
243 |                 elif action == 14:
244 |                     if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
245 |                         geom.attrib['rgba'] = "0 1 0 1"  # Make them yellow
246 |                 
247 |                 elif action == 15:
248 |                     if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
249 |                         geom.attrib['rgba'] = "0 1 1 1"  # Make them yellow
250 |                 elif action == 16:
251 |                     if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
252 |                         geom.attrib['rgba'] = "0.5 0.5 0.5 1"  # Make them yellow
253 |                 
254 |                 # Lighting
255 |             lights = root.findall(".//light")
256 | 
257 |             if action == 17:
258 |                 for light in lights:
259 |                     r, g, b = 1, 0, 0
260 |                     light.set("diffuse", f"{r} {g} {b}")
261 |             
262 |             elif action == 18:
263 |                 for light in lights:
264 |                     r, g, b = 0, 1, 0
265 |                     light.set("diffuse", f"{r} {g} {b}")
266 | 
267 |             elif action == 19:
268 |                 for light in lights:
269 |                     r, g, b = 0, 0, 1
270 |                     light.set("diffuse", f"{r} {g} {b}")
271 |             
272 |             elif action == 20:
273 |                 for light in lights:
274 |                     r, g, b = 0.5, 0.5, 0.5
275 |                     light.set("diffuse", f"{r} {g} {b}")
276 |         
277 |         elif self.task_name == 'square':
278 | 
279 |             for body in root.findall(".//body"):
280 |                 if body.attrib.get("name") == "peg2":
281 |                     for geom in body.findall(".//geom"):
282 |                         # Maybe match both type=cylinder AND a certain size
283 |                         if geom.attrib.get("type") == "cylinder" and geom.attrib.get("size") == "0.02 0.1":
284 |                 
285 |                             if "material" in geom.attrib:
286 |                                 del geom.attrib["material"]
287 |                             if action == 0:
288 |                                 geom.set("rgba", "1 0 0 1") 
289 |                             elif action == 1:
290 |                                 geom.set("rgba", "0 1 0 1")
291 |                             elif action == 2:
292 |                                 geom.set("rgba", "0 0 1 1")
293 |                             elif action == 3:
294 |                                 geom.set("rgba", "0.5 0.5 0.5 1")
295 |                 
296 |             
297 |             for body in root.findall(".//body"):
298 |                 if body.attrib.get("name") == "peg2":
299 |                     for geom in body.findall(".//geom"):
300 |                         # Maybe match both type=cylinder AND a certain size
301 |                         if geom.attrib.get("type") == "cylinder" and geom.attrib.get("size") == "0.02 0.1":
302 |                 
303 |                             if "material" in geom.attrib:
304 |                                 del geom.attrib["material"]
305 |                             if action == 4:
306 |                                 geom.set("size", "0.03 0.1") 
307 |                             elif action == 5:
308 |                                 geom.set("size", "0.02 0.15")
309 |                             elif action == 6:
310 |                                 geom.set("size", "0.02 0.13")
311 |                             elif action == 7:
312 |                                 geom.set("size", "0.03 0.08")
313 |                         
314 |                 
315 |             for geom in root.findall(".//geom"): 
316 |             
317 |             # Table Size
318 |                 if action == 8:
319 |                     if "name" in geom.attrib and geom.attrib["name"] == "table_visual": 
320 |                         geom.attrib['size'] = "0.8 0.2 0.025"
321 |                 elif action == 9:
322 |                     if "name" in geom.attrib and geom.attrib["name"] == "table_visual":  
323 |                         geom.attrib['size'] = "0.2 0.8 0.025"
324 |             
325 |             # Lighting
326 |             lights = root.findall(".//light")
327 | 
328 |             if action == 10:
329 |                 for light in lights:
330 |                     r, g, b = 1, 0, 0
331 |                     light.set("diffuse", f"{r} {g} {b}")
332 |             
333 |             elif action == 11:
334 |                 for light in lights:
335 |                     r, g, b = 0, 1, 0
336 |                     light.set("diffuse", f"{r} {g} {b}")
337 | 
338 |             elif action == 12:
339 |                 for light in lights:
340 |                     r, g, b = 0, 0, 1
341 |                     light.set("diffuse", f"{r} {g} {b}")
342 |             
343 |             elif action == 13:
344 |                 for light in lights:
345 |                     r, g, b = 0.5, 0.5, 0.5
346 |                     light.set("diffuse", f"{r} {g} {b}")
347 |         
348 |         elif self.task_name == 'can':
349 |             for geom in root.findall(".//geom"): 
350 |                 # cylinder Color
351 |                 if action == 0:
352 |                     if "name" in geom.attrib and geom.attrib["name"] == "Can_g0_visual":
353 |                         geom.attrib["rgba"] = "1 0 0 1" 
354 |                 elif action == 1:
355 |                     if "name" in geom.attrib and geom.attrib["name"] == "Can_g0_visual": # Cube green color
356 |                         geom.attrib['rgba'] = "0 1 0 1"
357 |                 elif action == 2:
358 |                     if "name" in geom.attrib and geom.attrib["name"] == "Can_g0_visual": # Cube blue color
359 |                         geom.attrib['rgba'] = "0 0 1 1"
360 |                 elif action == 3:
361 |                     if "name" in geom.attrib and geom.attrib["name"] == "Can_g0_visual": 
362 |                         geom.attrib['rgba'] = "0.5 0.5 0.5 1"
363 |             
364 |             # Table Color
365 |                 elif action == 4:
366 |                     if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
367 |                         geom.attrib['rgba'] = "0 1 0 1"
368 |                 elif action == 5:
369 |                     if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
370 |                         geom.attrib['rgba'] = "0 0 1 1"
371 |                 elif action == 6:
372 |                     if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
373 |                         geom.attrib['rgba'] = "1 0 0 1"
374 |                 elif action == 7:
375 |                     if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
376 |                         geom.attrib['rgba'] = "0.7 0.7 0.7 1"
377 |             
378 |             # Table Size
379 |                 elif action == 8:
380 |                     if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
381 |                         geom.attrib["size"] = "0.3 0.3 0.02"  # Modify size
382 |                 elif action == 9:
383 |                     if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
384 |                         geom.attrib["size"] = "0.2 0.2 0.02"  # Modify size
385 |                 elif action == 10:
386 |                     if geom.attrib.get("type") == "box" and geom.attrib.get("material") == "light-wood":
387 |                         geom.attrib["size"] = "0.1 0.1 0.02"
388 |             
389 |             # Robot Color
390 |                 elif action == 11:
391 |                     if "robot" in geom.attrib.get("name", "") or "robot" in geom.attrib.get("material", ""):
392 |                         geom.attrib["rgba"] = "0 0 1 1"  
393 | 
394 |                 
395 |                 elif action == 12:
396 |                     if "robot" in geom.attrib.get("name", "") or "robot" in geom.attrib.get("material", ""):
397 |                         geom.attrib['rgba'] = "0 1 0 1"  # Make them yellow
398 |                 
399 |                 elif action == 13:
400 |                     if "robot" in geom.attrib.get("name", "") or "robot" in geom.attrib.get("material", ""):
401 |                         geom.attrib['rgba'] = "0 1 1 1"  # Make them yellow
402 |                 elif action == 14:
403 |                     if "robot" in geom.attrib.get("name", "") or "robot" in geom.attrib.get("material", ""):
404 |                         geom.attrib['rgba'] = "0.5 0.5 0.5 1"  # Make them yellow
405 |                 
406 |                 # Lighting
407 |             lights = root.findall(".//light")
408 | 
409 |             if action == 15:
410 |                 for light in lights:
411 |                     r, g, b = 1, 0, 0
412 |                     light.set("diffuse", f"{r} {g} {b}")
413 |             
414 |             elif action == 16:
415 |                 for light in lights:
416 |                     r, g, b = 0, 1, 0
417 |                     light.set("diffuse", f"{r} {g} {b}")
418 | 
419 |             elif action == 17:
420 |                 for light in lights:
421 |                     r, g, b = 0, 0, 1
422 |                     light.set("diffuse", f"{r} {g} {b}")
423 |             
424 |             elif action == 18:
425 |                 for light in lights:
426 |                     r, g, b = 0.5, 0.5, 0.5
427 |                     light.set("diffuse", f"{r} {g} {b}")
428 | 
429 | 
430 |         new_xml_str = ET.tostring(root, encoding='unicode')
431 | 
432 |         # Update the model in the state
433 |         robot_state['model'] = new_xml_str
434 | 
435 |         self.obs = self.env.reset_to(robot_state)
436 |         # self.env.reset()
437 | 
438 | 
439 |         if self.video_record:
440 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
441 |             self.save_rendered_frame(rendered_img)
442 | 
443 |         total_reward = 0.
444 |         traj = dict(actions=[], rewards=[], dones=[], states=[], initial_state_dict=robot_state)
445 | 
446 |         for step_i in range(self.horizon):
447 | 
448 |             # get action from policy
449 |             act = self.policy(ob=self.obs)
450 |             
451 |             # play action
452 |             next_obs, r, done, _ = self.env.step(act)
453 | 
454 |             # compute reward
455 |             total_reward += r
456 |             success = self.env.is_success()["task"]
457 | 
458 |             # visualization
459 |             #if render:
460 |                 #   self.env.render(mode="human", camera_name=camera_names[0])
461 | 
462 |             # collect transition
463 |             traj["actions"].append(act)
464 |             traj["rewards"].append(r)
465 |             traj["dones"].append(done)
466 |             traj["states"].append(robot_state["states"])
467 | 
468 | 
469 |             # Record each frame
470 |             if self.video_record:
471 |                 rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
472 |                 self.save_rendered_frame(rendered_img)
473 | 
474 | 
475 |             # break if done or if success
476 |             if done or success:
477 |                 break
478 | 
479 |             # update for next iter
480 |             self.obs = deepcopy(next_obs)
481 |             # st = self.env.get_state()
482 |             
483 |         stats = dict(Return=total_reward, Horizon=(step_i + 1), Success_Rate=float(success))
484 |     
485 |         
486 |         print(stats)
487 |         with open(f"{self.log_dir}/stats.txt", "a") as file:
488 |             file.write(str(stats) + '\n')
489 |             
490 |         
491 |         done = True
492 | 
493 |         if stats['Success_Rate'] == 1.0:
494 |             reward = -1 * (500/stats['Horizon']) * (1000/penalty + 1) - same_action_penalty
495 |             done = False
496 |             self.env.reset_to(robot_state)
497 |         else:
498 |             reward = 10000 / (penalty + 1) - same_action_penalty
499 |             print('Failure Found, episode Completed\n')
500 | 
501 |         if done and self.video_record:
502 |             self.video_writer.release()
503 |             print(f"Episode {self.steps - 1} video saved successfully.")
504 |         
505 |         print(f"Steps : {self.steps}")
506 | 
507 |         if self.is_sequence:
508 |             return self.obs["agentview_image"][0], reward, done, {}
509 |             
510 |         return self.obs["agentview_image"], reward, done, {}
511 |         
512 |     def save_rendered_frame(self, img_array):
513 |         # Convert to uint8 if necessary and ensure BGR format for OpenCV
514 |         if img_array.dtype != np.uint8:
515 |             img_array = (img_array * 255).astype(np.uint8)
516 | 
517 |         # Convert RGB to BGR (OpenCV expects BGR format)
518 |         img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
519 | 
520 |         # Write the rendered frame to the video
521 |         self.video_writer.write(img_bgr)
522 | 


--------------------------------------------------------------------------------
/env/lift_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import cv2
  4 | import xml.etree.ElementTree as ET
  5 | from copy import deepcopy
  6 | from PIL import Image
  7 | import torch
  8 | 
  9 | # Robomimic / stable-baselines3 / other imports as needed
 10 | # e.g., from stable_baselines3 import PPO
 11 | 
 12 | class LiftEnv(gym.Env):
 13 |     """
 14 |     A custom Gym environment for controlling a robot environment
 15 |     and training with stable-baselines3.
 16 |     """
 17 |     def __init__(self, env, policy, rollout_horizon, video_record=False, collect_data=False, save_path="", device="cpu"):
 18 |         super(LiftEnv, self).__init__()
 19 | 
 20 |         self.env = env
 21 |         self.policy = policy
 22 |         self.rollout_horizon = rollout_horizon
 23 |         self.video_record = video_record
 24 |         self.collect_data = collect_data
 25 |         self.save_path = save_path
 26 |         self.device = device
 27 | 
 28 |         # Action space: discrete with 19 possible actions
 29 |         self.action_space = gym.spaces.Discrete(19)
 30 | 
 31 |         # Observation space: simple 3 x 84 x 84 image
 32 |         # (You Lift adjust this to match your actual environment)
 33 |         self.observation_space = gym.spaces.Box(
 34 |             low=0, high=255, shape=(3, 84, 84), dtype=np.uint8
 35 |         )
 36 | 
 37 |         # Internal trackers
 38 |         self.steps = 0
 39 |         self.obs = None
 40 |         self.video_writer = None
 41 |         self.is_sequence = False  # If obs has extra dimension
 42 | 
 43 |         # Initial environment reset
 44 |         self.obs = self.env.reset()
 45 |         if len(self.obs["agentview_image"].shape) == 4: 
 46 |             # If there's a batch dimension
 47 |             self.is_sequence = True
 48 | 
 49 |     def reset(self):
 50 |         print("Resetting the environment...")
 51 |         st = self.env.get_state()
 52 | 
 53 |         # Parse and modify the XML as needed
 54 |         xml_str = st["model"]
 55 |         root = ET.fromstring(xml_str)
 56 | 
 57 |         ##table visual size
 58 |         for geom in root.findall(".//geom"):
 59 |             if geom.attrib['name'] == 'table_visual':
 60 |                 geom.attrib['size'] = "0.4 0.4 0.025"  #0.4 0.4 0.025
 61 | 
 62 |         #cube visual size
 63 |         for geom in root.findall(".//geom"):
 64 |             if geom.attrib['name'] == 'cube_g0_vis':
 65 |                 geom.attrib['size'] = "0.0213203 0.0206657 0.020327"  #0.4 0.4 0.025
 66 | 
 67 |         #cube color
 68 |         for geom in root.findall(".//geom"):
 69 |             if geom.attrib['name'] == 'cube_g0_vis':
 70 |                 geom.attrib['rgba'] = "1 0 0 1"  
 71 | 
 72 |         for geom in root.findall(".//geom"):
 73 |             if "robot0_g" in geom.attrib['name']:  
 74 |                 if 'rgba' in geom.attrib:
 75 |                     del geom.attrib['rgba']
 76 | 
 77 |         #table color
 78 |         for geom in root.findall(".//geom"):
 79 |             if geom.attrib['name'] == 'table_visual':
 80 |                 geom.attrib['rgba'] = "0.5 0.5 0.5 1" 
 81 |         
 82 |         for light in root.findall(".//light"):
 83 |             # Example of restoring some typical defaults:
 84 |             light.set("diffuse", "1 1 1")
 85 |             light.set("specular", "0.1 0.1 0.1")
 86 |             light.set("pos", "1 1 1.5")
 87 |             light.set("dir", "-0.19245 -0.19245 -0.96225")
 88 | 
 89 |         new_xml_str = ET.tostring(root, encoding="unicode")
 90 |         st["model"] = new_xml_str
 91 | 
 92 |         # Actually reset
 93 |         self.obs = self.env.reset_to(st)
 94 |         self.env.reset()
 95 | 
 96 |         # If recording video, initialize the VideoWriter
 97 |         if self.video_record:
 98 |             self.steps += 1
 99 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
100 |             video_filename = f"episode_{self.steps}.avi"
101 |             fourcc = cv2.VideoWriter_fourcc(*"MJPG")
102 |             (h, w) = rendered_img.shape[:2]
103 |             self.video_writer = cv2.VideoWriter(video_filename, fourcc, 30, (w, h))
104 |             self._save_rendered_frame(rendered_img)
105 | 
106 |         if self.is_sequence:
107 |             return self.obs["agentview_image"][0]
108 | 
109 |         return self.obs["agentview_image"]
110 | 
111 |     def step(self, action):
112 |         """
113 |         Apply one discrete action to the environment,
114 |         change the XML accordingly, then run a short rollout.
115 |         """
116 |         self.steps += 1
117 |         robot_state = self.env.get_state()
118 | 
119 |         # Parse XML
120 |         xml_str = robot_state["model"]
121 |         root = ET.fromstring(xml_str)
122 | 
123 |         for geom in root.findall(".//geom"): 
124 |         # Cube Color
125 |             if action == 0:
126 |                 if geom.attrib['name'] == 'cube_g0_vis': # Cube red color
127 |                     geom.attrib['rgba'] = "1 0 0 1"  
128 |             elif action == 1:
129 |                 if geom.attrib['name'] == 'cube_g0_vis': # Cube green color
130 |                     geom.attrib['rgba'] = "0 1 0 1"
131 |             elif action == 2:
132 |                 if geom.attrib['name'] == 'cube_g0_vis': # Cube blue color
133 |                     geom.attrib['rgba'] = "0 0 1 1"
134 |             elif action == 3:
135 |                 if geom.attrib['name'] == 'cube_g0_vis': 
136 |                     geom.attrib['rgba'] = "0.5 0.5 0.5 1"
137 |         
138 |         # Table Color
139 |             elif action == 4:
140 |                 if geom.attrib['name'] == 'table_visual': # Table green color
141 |                     geom.attrib['rgba'] = "0 1 0 1"
142 |             elif action == 5:
143 |                 if geom.attrib['name'] == 'table_visual': # Table blue color
144 |                     geom.attrib['rgba'] = "0 0 1 1"
145 |             elif action == 6:
146 |                 if geom.attrib['name'] == 'table_visual': # Table red color
147 |                     geom.attrib['rgba'] = "1 0 0 1"
148 |             elif action == 7:
149 |                 if geom.attrib['name'] == 'table_visual': # Table default color
150 |                     geom.attrib['rgba'] = "0.7 0.7 0.7 1"
151 |         
152 |         # Table Size
153 |             elif action == 8:
154 |                 if geom.attrib['name'] == 'table_visual': 
155 |                     geom.attrib['size'] = "0.8 0.2 0.025"
156 |             elif action == 9:
157 |                 if geom.attrib['name'] == 'table_visual': 
158 |                     geom.attrib['size'] = "0.2 0.8 0.025"
159 |         
160 |         # Cube Size
161 |             elif action == 10:
162 |                 if geom.attrib['name'] == 'cube_g0_vis':
163 |                     geom.attrib['size'] = "0.04 0.04 0.04"  # enlarge the cube
164 |             elif action == 11:
165 |                 if geom.attrib['name'] == 'cube_g0_vis':
166 |                     geom.attrib['size'] = "0.01 0.01 0.01"  # shrink the cube
167 |             elif action == 12:
168 |                 if geom.attrib['name'] == 'cube_g0_vis':
169 |                     geom.attrib['size'] = "0.04 0.01 0.01"  
170 |         
171 |         # Robot Color
172 |             elif action == 13:
173 |                 if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
174 |                     geom.attrib['rgba'] = "1 0 0 1"  # Make them yellow
175 |             
176 |             elif action == 14:
177 |                 if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
178 |                     geom.attrib['rgba'] = "0 1 0 1"  # Make them yellow
179 |             
180 |             elif action == 15:
181 |                 if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
182 |                     geom.attrib['rgba'] = "0 1 1 1"  # Make them yellow
183 |             elif action == 16:
184 |                 if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
185 |                     geom.attrib['rgba'] = "0.5 0.5 0.5 1"  # Make them yellow
186 |         
187 |         # Lighting
188 |         lights = root.findall(".//light")
189 | 
190 |         if action == 17:
191 |             for light in lights:
192 |                 r, g, b = 1, 0, 0
193 |                 light.set("diffuse", f"{r} {g} {b}")
194 |         
195 |         elif action == 18:
196 |             for light in lights:
197 |                 r, g, b = 0, 1, 0
198 |                 light.set("diffuse", f"{r} {g} {b}")
199 | 
200 |         elif action == 19:
201 |             for light in lights:
202 |                 r, g, b = 0, 0, 1
203 |                 light.set("diffuse", f"{r} {g} {b}")
204 |         
205 |         elif action == 20:
206 |             for light in lights:
207 |                 r, g, b = 0.5, 0.5, 0.5
208 |                 light.set("diffuse", f"{r} {g} {b}")
209 | 
210 |         # Update model
211 |         new_xml_str = ET.tostring(root, encoding="unicode")
212 |         robot_state["model"] = new_xml_str
213 | 
214 |         # Reset the environment to the new XML
215 |         self.obs = self.env.reset_to(robot_state)
216 | 
217 |         # If we're recording video, save the frame
218 |         if self.video_record:
219 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
220 |             self._save_rendered_frame(rendered_img)
221 | 
222 |         # Now do a short rollout for `rollout_horizon` steps
223 |         total_reward = 0.0
224 |         success = False
225 | 
226 |         for step_i in range(self.rollout_horizon):
227 |             # Query policy
228 |             with torch.no_grad():
229 |                 act = self.policy(ob=self.obs)
230 | 
231 |             # Step the environment
232 |             next_obs, r, done, _ = self.env.step(act)
233 |             total_reward += r
234 |             success = self.env.is_success()["task"]
235 | 
236 |             # Collect data if requested
237 |             if self.collect_data:
238 |                 # Save frames to a designated folder
239 |                 self._save_demo_frames(step_i, action)
240 | 
241 |             # If also recording video, save each new frame
242 |             if self.video_record:
243 |                 rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
244 |                 self._save_rendered_frame(rendered_img)
245 | 
246 |             # Break if done or success
247 |             if done or success:
248 |                 break
249 | 
250 |             self.obs = deepcopy(next_obs)
251 | 
252 |         # Write stats
253 |         stats = dict(Return=total_reward, Horizon=(step_i + 1), Success_Rate=float(success))
254 |         print(stats)
255 |         self._log_episode_stats(stats, action)
256 | 
257 |         # Terminal or not
258 |         done = True
259 |         if success:
260 |             # If success, we might do some custom logic
261 |             reward = -1
262 |             done = False
263 |         else:
264 |             reward = 1000
265 |             print("Episode Completed")
266 | 
267 |         # If done, stop video recording
268 |         if done and self.video_record and self.video_writer is not None:
269 |             self.video_writer.release()
270 |             print(f"Episode {self.steps} video saved.")
271 | 
272 |         if self.is_sequence:
273 |             return self.obs["agentview_image"][0], reward, done, {}
274 |         return self.obs["agentview_image"], reward, done, {}
275 | 
276 | 
277 |     def _save_rendered_frame(self, img_array):
278 |         """
279 |         Converts an RGB array to BGR for OpenCV and writes it to the video.
280 |         """
281 |         if img_array.dtype != np.uint8:
282 |             img_array = (img_array * 255).astype(np.uint8)
283 |         img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
284 |         if self.video_writer is not None:
285 |             self.video_writer.write(img_bgr)
286 | 
287 |     def _save_demo_frames(self, step_i, action):
288 |         """
289 |         Save frames to disk for data collection.
290 |         """
291 |         from PIL import Image
292 |         import os
293 | 
294 |         img_save_dir = os.path.join("lift_rl_data", self.save_path, "demo")
295 |         step_save_dir = os.path.join(img_save_dir, f"demo{self.steps}_action_{action}")
296 |         os.makedirs(step_save_dir, exist_ok=True)
297 | 
298 |         rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
299 |         image_path = os.path.join(step_save_dir, f"frame_{step_i:04d}.png")
300 |         Image.fromarray(rendered_img).save(image_path)
301 | 
302 |     def _log_episode_stats(self, stats, action):
303 |         """
304 |         Write stats and actions to log files.
305 |         """
306 |         import os
307 | 
308 |         # Always log stats
309 |         with open(f"lift_logs/{self.save_path}/episode_stats.txt", "a") as file:
310 |             file.write(str(stats) + "\n")
311 | 
312 |         # If collecting data, also log success/action
313 |         if self.collect_data:
314 |             with open(f"lift_rl_data/{self.save_path}/success_rate.txt", "a") as file:
315 |                 file.write(str(stats["Success_Rate"]) + "\n")
316 |             with open(f"lift_rl_data/{self.save_path}/actions.txt", "a") as file:
317 |                 file.write(str(action) + "\n")
318 | 
319 | 


--------------------------------------------------------------------------------
/env/square_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import cv2
  4 | import xml.etree.ElementTree as ET
  5 | from copy import deepcopy
  6 | from PIL import Image
  7 | import torch
  8 | 
  9 | # Robomimic / stable-baselines3 / other imports as needed
 10 | # e.g., from stable_baselines3 import PPO
 11 | 
 12 | class SquareEnv(gym.Env):
 13 |     """
 14 |     A custom Gym environment for controlling a robot environment
 15 |     and training with stable-baselines3.
 16 |     """
 17 |     def __init__(self, env, policy, rollout_horizon, video_record=False, collect_data=False, save_path="", device="cpu"):
 18 |         super(SquareEnv, self).__init__()
 19 | 
 20 |         self.env = env
 21 |         self.policy = policy
 22 |         self.rollout_horizon = rollout_horizon
 23 |         self.video_record = video_record
 24 |         self.collect_data = collect_data
 25 |         self.save_path = save_path
 26 |         self.device = device
 27 | 
 28 |         # Action space: discrete with 19 possible actions
 29 |         self.action_space = gym.spaces.Discrete(19)
 30 | 
 31 |         # Observation space: simple 3 x 84 x 84 image
 32 |         # (You cn adjust this to match your actual environment)
 33 |         self.observation_space = gym.spaces.Box(
 34 |             low=0, high=255, shape=(3, 84, 84), dtype=np.uint8
 35 |         )
 36 | 
 37 |         # Internal trackers
 38 |         self.steps = 0
 39 |         self.obs = None
 40 |         self.video_writer = None
 41 |         self.is_sequence = False  # If obs has extra dimension
 42 | 
 43 |         # Initial environment reset
 44 |         self.obs = self.env.reset()
 45 |         if len(self.obs["agentview_image"].shape) == 4: 
 46 |             # If there's a batch dimension
 47 |             self.is_sequence = True
 48 | 
 49 |     def reset(self):
 50 |         print("Resetting the environment...")
 51 |         st = self.env.get_state()
 52 | 
 53 |         # Parse and modify the XML as needed
 54 |         xml_str = st["model"]
 55 |         root = ET.fromstring(xml_str)
 56 | 
 57 |         ##table visual size
 58 |         for geom in root.findall(".//geom"):
 59 |             if "name" in geom.attrib and geom.attrib["name"] == "table_visual":
 60 |                 geom.attrib['size'] = "0.4 0.4 0.025"  #0.4 0.4 0.025
 61 | 
 62 | 
 63 |         #cube color
 64 |         for body in root.findall(".//body"):
 65 |             if body.attrib.get("name") == "peg2":
 66 |                 for geom in body.findall(".//geom"):
 67 |                     # Maybe match both type=cylinder AND a certain size
 68 |                     if geom.attrib.get("type") == "cylinder" and geom.attrib.get("size") == "0.02 0.1":
 69 |             
 70 |                         if "material" in geom.attrib:
 71 |                             del geom.attrib["material"]
 72 |                         #geom.set("rgba", "0.5 0.5 0 1") 
 73 | 
 74 | 
 75 |         
 76 |         for light in root.findall(".//light"):
 77 |             # Example of restoring some typical defaults:
 78 |             light.set("diffuse", "1 1 1")
 79 |             light.set("specular", "0.1 0.1 0.1")
 80 |             light.set("pos", "1 1 1.5")
 81 |             light.set("dir", "-0.19245 -0.19245 -0.96225")
 82 | 
 83 |         new_xml_str = ET.tostring(root, encoding="unicode")
 84 |         st["model"] = new_xml_str
 85 | 
 86 |         # Actually reset
 87 |         self.obs = self.env.reset_to(st)
 88 |         self.env.reset()
 89 | 
 90 |         # If recording video, initialize the VideoWriter
 91 |         if self.video_record:
 92 |             self.steps += 1
 93 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
 94 |             video_filename = f"episode_{self.steps}.avi"
 95 |             fourcc = cv2.VideoWriter_fourcc(*"MJPG")
 96 |             (h, w) = rendered_img.shape[:2]
 97 |             self.video_writer = cv2.VideoWriter(video_filename, fourcc, 30, (w, h))
 98 |             self._save_rendered_frame(rendered_img)
 99 | 
100 |         if self.is_sequence:
101 |             return self.obs["agentview_image"][0]
102 | 
103 |         return self.obs["agentview_image"]
104 | 
105 |     def step(self, action):
106 |         """
107 |         Apply one discrete action to the environment,
108 |         change the XML accordingly, then run a short rollout.
109 |         """
110 |         self.steps += 1
111 |         robot_state = self.env.get_state()
112 | 
113 |         # Parse XML
114 |         xml_str = robot_state["model"]
115 |         root = ET.fromstring(xml_str)
116 | 
117 |         for body in root.findall(".//body"):
118 |             if body.attrib.get("name") == "peg2":
119 |                 for geom in body.findall(".//geom"):
120 |                     # Maybe match both type=cylinder AND a certain size
121 |                     if geom.attrib.get("type") == "cylinder" and geom.attrib.get("size") == "0.02 0.1":
122 |             
123 |                         if "material" in geom.attrib:
124 |                             del geom.attrib["material"]
125 |                         if action == 0:
126 |                             geom.set("rgba", "1 0 0 1") 
127 |                         elif action == 1:
128 |                             geom.set("rgba", "0 1 0 1")
129 |                         elif action == 2:
130 |                             geom.set("rgba", "0 0 1 1")
131 |                         elif action == 3:
132 |                             geom.set("rgba", "0.5 0.5 0.5 1")
133 |             
134 |         
135 |         for body in root.findall(".//body"):
136 |             if body.attrib.get("name") == "peg2":
137 |                 for geom in body.findall(".//geom"):
138 |                     # Maybe match both type=cylinder AND a certain size
139 |                     if geom.attrib.get("type") == "cylinder" and geom.attrib.get("size") == "0.02 0.1":
140 |             
141 |                         if "material" in geom.attrib:
142 |                             del geom.attrib["material"]
143 |                         if action == 4:
144 |                             geom.set("size", "0.03 0.1") 
145 |                         elif action == 5:
146 |                             geom.set("size", "0.02 0.15")
147 |                         elif action == 6:
148 |                             geom.set("size", "0.02 0.13")
149 |                         elif action == 7:
150 |                             geom.set("size", "0.03 0.08")
151 | 
152 | 
153 | 
154 |         
155 |         
156 |         for geom in root.findall(".//geom"): 
157 |         
158 |         # Table Size
159 |             if action == 8:
160 |                 if "name" in geom.attrib and geom.attrib["name"] == "table_visual": 
161 |                     geom.attrib['size'] = "0.8 0.2 0.025"
162 |             elif action == 9:
163 |                 if "name" in geom.attrib and geom.attrib["name"] == "table_visual":  
164 |                     geom.attrib['size'] = "0.2 0.8 0.025"
165 |         
166 |         # Lighting
167 |         lights = root.findall(".//light")
168 | 
169 |         if action == 10:
170 |             for light in lights:
171 |                 r, g, b = 1, 0, 0
172 |                 light.set("diffuse", f"{r} {g} {b}")
173 |         
174 |         elif action == 11:
175 |             for light in lights:
176 |                 r, g, b = 0, 1, 0
177 |                 light.set("diffuse", f"{r} {g} {b}")
178 | 
179 |         elif action == 12:
180 |             for light in lights:
181 |                 r, g, b = 0, 0, 1
182 |                 light.set("diffuse", f"{r} {g} {b}")
183 |         
184 |         elif action == 13:
185 |             for light in lights:
186 |                 r, g, b = 0.5, 0.5, 0.5
187 |                 light.set("diffuse", f"{r} {g} {b}")
188 |         # Update model
189 |         new_xml_str = ET.tostring(root, encoding="unicode")
190 |         robot_state["model"] = new_xml_str
191 | 
192 |         # Reset the environment to the new XML
193 |         self.obs = self.env.reset_to(robot_state)
194 | 
195 |         # If we're recording video, save the frame
196 |         if self.video_record:
197 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
198 |             self._save_rendered_frame(rendered_img)
199 | 
200 |         # Now do a short rollout for `rollout_horizon` steps
201 |         total_reward = 0.0
202 |         success = False
203 | 
204 |         for step_i in range(self.rollout_horizon):
205 |             # Query policy
206 |             with torch.no_grad():
207 |                 act = self.policy(ob=self.obs)
208 | 
209 |             # Step the environment
210 |             next_obs, r, done, _ = self.env.step(act)
211 |             total_reward += r
212 |             success = self.env.is_success()["task"]
213 | 
214 |             # Collect data if requested
215 |             if self.collect_data:
216 |                 # Save frames to a designated folder
217 |                 self._save_demo_frames(step_i, action)
218 | 
219 |             # If also recording video, save each new frame
220 |             if self.video_record:
221 |                 rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
222 |                 self._save_rendered_frame(rendered_img)
223 | 
224 |             # Break if done or success
225 |             if done or success:
226 |                 break
227 | 
228 |             self.obs = deepcopy(next_obs)
229 | 
230 |         # Write stats
231 |         stats = dict(Return=total_reward, Horizon=(step_i + 1), Success_Rate=float(success))
232 |         print(stats)
233 |         self._log_episode_stats(stats, action)
234 | 
235 |         # Terminal or not
236 |         done = True
237 |         if success:
238 |             # If success, we might do some custom logic
239 |             reward = -1
240 |             done = False
241 |         else:
242 |             reward = 1000
243 |             print("Episode Completed")
244 | 
245 |         # If done, stop video recording
246 |         if done and self.video_record and self.video_writer is not None:
247 |             self.video_writer.release()
248 |             print(f"Episode {self.steps} video saved.")
249 | 
250 |         if self.is_sequence:
251 |             return self.obs["agentview_image"][0], reward, done, {}
252 |         return self.obs["agentview_image"], reward, done, {}
253 | 
254 | 
255 |     def _save_rendered_frame(self, img_array):
256 |         """
257 |         Converts an RGB array to BGR for OpenCV and writes it to the video.
258 |         """
259 |         if img_array.dtype != np.uint8:
260 |             img_array = (img_array * 255).astype(np.uint8)
261 |         img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
262 |         if self.video_writer is not None:
263 |             self.video_writer.write(img_bgr)
264 | 
265 |     def _save_demo_frames(self, step_i, action):
266 |         """
267 |         Save frames to disk for data collection.
268 |         """
269 |         from PIL import Image
270 |         import os
271 | 
272 |         img_save_dir = os.path.join("square_rl_data", self.save_path, "demo")
273 |         step_save_dir = os.path.join(img_save_dir, f"demo{self.steps}_action_{action}")
274 |         os.makedirs(step_save_dir, exist_ok=True)
275 | 
276 |         rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
277 |         image_path = os.path.join(step_save_dir, f"frame_{step_i:04d}.png")
278 |         Image.fromarray(rendered_img).save(image_path)
279 | 
280 |     def _log_episode_stats(self, stats, action):
281 |         """
282 |         Write stats and actions to log files.
283 |         """
284 |         import os
285 | 
286 |         # Always log stats
287 |         with open(f"square_logs/{self.save_path}/episode_stats.txt", "a") as file:
288 |             file.write(str(stats) + "\n")
289 | 
290 |         # If collecting data, also log success/action
291 |         if self.collect_data:
292 |             with open(f"square_rl_data/{self.save_path}/success_rate.txt", "a") as file:
293 |                 file.write(str(stats["Success_Rate"]) + "\n")
294 |             with open(f"square_rl_data/{self.save_path}/actions.txt", "a") as file:
295 |                 file.write(str(action) + "\n")
296 | 
297 | 


--------------------------------------------------------------------------------
/env/stack_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import cv2
  4 | import xml.etree.ElementTree as ET
  5 | from copy import deepcopy
  6 | from PIL import Image
  7 | import torch
  8 | 
  9 | # Robomimic / stable-baselines3 / other imports as needed
 10 | # e.g., from stable_baselines3 import PPO
 11 | 
 12 | class StackEnv(gym.Env):
 13 |     """
 14 |     A custom Gym environment for controlling a robot environment
 15 |     and training with stable-baselines3.
 16 |     """
 17 |     def __init__(self, env, policy, rollout_horizon, video_record=False, collect_data=False, save_path="", device="cpu"):
 18 |         super(StackEnv, self).__init__()
 19 | 
 20 |         self.env = env
 21 |         self.policy = policy
 22 |         self.rollout_horizon = rollout_horizon
 23 |         self.video_record = video_record
 24 |         self.collect_data = collect_data
 25 |         self.save_path = save_path
 26 |         self.device = device
 27 | 
 28 |         # Action space: discrete with 19 possible actions
 29 |         self.action_space = gym.spaces.Discrete(19)
 30 | 
 31 |         # Observation space: simple 3 x 84 x 84 image
 32 |         # (You can adjust this to match your actual environment)
 33 |         self.observation_space = gym.spaces.Box(
 34 |             low=0, high=255, shape=(3, 84, 84), dtype=np.uint8
 35 |         )
 36 | 
 37 |         # Internal trackers
 38 |         self.steps = 0
 39 |         self.obs = None
 40 |         self.video_writer = None
 41 |         self.is_sequence = False  # If obs has extra dimension
 42 | 
 43 |         # Initial environment reset
 44 |         self.obs = self.env.reset()
 45 |         if len(self.obs["agentview_image"].shape) == 4: 
 46 |             # If there's a batch dimension
 47 |             self.is_sequence = True
 48 | 
 49 |     def reset(self):
 50 |         print("Resetting the environment...")
 51 |         st = self.env.get_state()
 52 | 
 53 |         # Parse and modify the XML as needed
 54 |         xml_str = st["model"]
 55 |         root = ET.fromstring(xml_str)
 56 | 
 57 |         for geom in root.findall(".//geom"):
 58 |             if geom.attrib['name'] == 'table_visual':
 59 |                 geom.attrib['size'] = "0.4 0.4 0.025"  #0.4 0.4 0.025
 60 | 
 61 |         #cube visual size
 62 |         for geom in root.findall(".//geom"):
 63 |             if geom.attrib['name'] == 'cubeA_g0_vis':
 64 |                 geom.attrib['size'] = "0.0213203 0.0206657 0.020327"  #0.4 0.4 0.025
 65 | 
 66 |         #cube color
 67 |         for geom in root.findall(".//geom"):
 68 |             if geom.attrib['name'] == 'cubeA_g0_vis':
 69 |                 geom.attrib['rgba'] = "1 0 0 1"  
 70 | 
 71 |         for geom in root.findall(".//geom"):
 72 |             if "robot0_g" in geom.attrib['name']:  
 73 |                 if 'rgba' in geom.attrib:
 74 |                     del geom.attrib['rgba']
 75 | 
 76 |         #table color
 77 |         for geom in root.findall(".//geom"):
 78 |             if geom.attrib['name'] == 'table_visual':
 79 |                 geom.attrib['rgba'] = "0.5 0.5 0.5 1" 
 80 |         
 81 |         for light in root.findall(".//light"):
 82 |             # Example of restoring some typical defaults:
 83 |             light.set("diffuse", "1 1 1")
 84 |             light.set("specular", "0.1 0.1 0.1")
 85 |             light.set("pos", "1 1 1.5")
 86 |             light.set("dir", "-0.19245 -0.19245 -0.96225")
 87 | 
 88 |         new_xml_str = ET.tostring(root, encoding="unicode")
 89 |         st["model"] = new_xml_str
 90 | 
 91 |         # Actually reset
 92 |         self.obs = self.env.reset_to(st)
 93 |         self.env.reset()
 94 | 
 95 |         # If recording video, initialize the VideoWriter
 96 |         if self.video_record:
 97 |             self.steps += 1
 98 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
 99 |             video_filename = f"episode_{self.steps}.avi"
100 |             fourcc = cv2.VideoWriter_fourcc(*"MJPG")
101 |             (h, w) = rendered_img.shape[:2]
102 |             self.video_writer = cv2.VideoWriter(video_filename, fourcc, 30, (w, h))
103 |             self._save_rendered_frame(rendered_img)
104 | 
105 |         if self.is_sequence:
106 |             return self.obs["agentview_image"][0]
107 | 
108 |         return self.obs["agentview_image"]
109 | 
110 |     def step(self, action):
111 |         """
112 |         Apply one discrete action to the environment,
113 |         change the XML accordingly, then run a short rollout.
114 |         """
115 |         self.steps += 1
116 |         robot_state = self.env.get_state()
117 | 
118 |         # Parse XML
119 |         xml_str = robot_state["model"]
120 |         root = ET.fromstring(xml_str)
121 | 
122 |         for geom in root.findall(".//geom"): 
123 |         # Cube Color
124 |             if action == 0:
125 |                 if geom.attrib['name'] == 'cubeA_g0_vis': # Cube red color
126 |                     geom.attrib['rgba'] = "1 0 0 1"  
127 |             elif action == 1:
128 |                 if geom.attrib['name'] == 'cubeA_g0_vis': # Cube green color
129 |                     geom.attrib['rgba'] = "0 1 0 1"
130 |             elif action == 2:
131 |                 if geom.attrib['name'] == 'cubeA_g0_vis': # Cube blue color
132 |                     geom.attrib['rgba'] = "0 0 1 1"
133 |             elif action == 3:
134 |                 if geom.attrib['name'] == 'cubeA_g0_vis': 
135 |                     geom.attrib['rgba'] = "0.5 0.5 0.5 1"
136 |         
137 |         # Table Color
138 |             elif action == 4:
139 |                 if geom.attrib['name'] == 'table_visual': # Table green color
140 |                     geom.attrib['rgba'] = "0 1 0 1"
141 |             elif action == 5:
142 |                 if geom.attrib['name'] == 'table_visual': # Table blue color
143 |                     geom.attrib['rgba'] = "0 0 1 1"
144 |             elif action == 6:
145 |                 if geom.attrib['name'] == 'table_visual': # Table red color
146 |                     geom.attrib['rgba'] = "1 0 0 1"
147 |             elif action == 7:
148 |                 if geom.attrib['name'] == 'table_visual': # Table default color
149 |                     geom.attrib['rgba'] = "0.7 0.7 0.7 1"
150 |         
151 |         # Table Size
152 |             elif action == 8:
153 |                 if geom.attrib['name'] == 'table_visual': 
154 |                     geom.attrib['size'] = "0.8 0.2 0.025"
155 |             elif action == 9:
156 |                 if geom.attrib['name'] == 'table_visual': 
157 |                     geom.attrib['size'] = "0.2 0.8 0.025"
158 |         
159 |         # Cube Size
160 |             elif action == 10:
161 |                 if geom.attrib['name'] == 'cubeA_g0_vis':
162 |                     geom.attrib['size'] = "0.04 0.04 0.04"  # enlarge the cube
163 |             elif action == 11:
164 |                 if geom.attrib['name'] == 'cubeA_g0_vis':
165 |                     geom.attrib['size'] = "0.01 0.01 0.01"  # shrink the cube
166 |             elif action == 12:
167 |                 if geom.attrib['name'] == 'cubeA_g0_vis':
168 |                     geom.attrib['size'] = "0.04 0.01 0.01"  
169 |         
170 |         # Robot Color
171 |             elif action == 13:
172 |                 if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
173 |                     geom.attrib['rgba'] = "1 0 0 1"  # Make them yellow
174 |             
175 |             elif action == 14:
176 |                 if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
177 |                     geom.attrib['rgba'] = "0 1 0 1"  # Make them yellow
178 |             
179 |             elif action == 15:
180 |                 if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
181 |                     geom.attrib['rgba'] = "0 1 1 1"  # Make them yellow
182 |             elif action == 16:
183 |                 if "robot0_g" in geom.attrib['name']:  # or pick specific ones if needed
184 |                     geom.attrib['rgba'] = "0.5 0.5 0.5 1"  # Make them yellow
185 |         
186 |         # Lighting
187 |         lights = root.findall(".//light")
188 | 
189 |         if action == 17:
190 |             for light in lights:
191 |                 r, g, b = 1, 0, 0
192 |                 light.set("diffuse", f"{r} {g} {b}")
193 |         
194 |         elif action == 18:
195 |             for light in lights:
196 |                 r, g, b = 0, 1, 0
197 |                 light.set("diffuse", f"{r} {g} {b}")
198 | 
199 |         elif action == 19:
200 |             for light in lights:
201 |                 r, g, b = 0, 0, 1
202 |                 light.set("diffuse", f"{r} {g} {b}")
203 |         
204 |         elif action == 20:
205 |             for light in lights:
206 |                 r, g, b = 0.5, 0.5, 0.5
207 |                 light.set("diffuse", f"{r} {g} {b}")
208 | 
209 |         # Update model
210 |         new_xml_str = ET.tostring(root, encoding="unicode")
211 |         robot_state["model"] = new_xml_str
212 | 
213 |         # Reset the environment to the new XML
214 |         self.obs = self.env.reset_to(robot_state)
215 | 
216 |         # If we're recording video, save the frame
217 |         if self.video_record:
218 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
219 |             self._save_rendered_frame(rendered_img)
220 | 
221 |         # Now do a short rollout for `rollout_horizon` steps
222 |         total_reward = 0.0
223 |         success = False
224 | 
225 |         for step_i in range(self.rollout_horizon):
226 |             # Query policy
227 |             with torch.no_grad():
228 |                 act = self.policy(ob=self.obs)
229 | 
230 |             # Step the environment
231 |             next_obs, r, done, _ = self.env.step(act)
232 |             total_reward += r
233 |             success = self.env.is_success()["task"]
234 | 
235 |             # Collect data if requested
236 |             if self.collect_data:
237 |                 # Save frames to a designated folder
238 |                 self._save_demo_frames(step_i, action)
239 | 
240 |             # If also recording video, save each new frame
241 |             if self.video_record:
242 |                 rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
243 |                 self._save_rendered_frame(rendered_img)
244 | 
245 |             # Break if done or success
246 |             if done or success:
247 |                 break
248 | 
249 |             self.obs = deepcopy(next_obs)
250 | 
251 |         # Write stats
252 |         stats = dict(Return=total_reward, Horizon=(step_i + 1), Success_Rate=float(success))
253 |         print(stats)
254 |         self._log_episode_stats(stats, action)
255 | 
256 |         # Terminal or not
257 |         done = True
258 |         if success:
259 |             # If success, we might do some custom logic
260 |             reward = -1
261 |             done = False
262 |         else:
263 |             reward = 1000
264 |             print("Episode Completed")
265 | 
266 |         # If done, stop video recording
267 |         if done and self.video_record and self.video_writer is not None:
268 |             self.video_writer.release()
269 |             print(f"Episode {self.steps} video saved.")
270 | 
271 |         if self.is_sequence:
272 |             return self.obs["agentview_image"][0], reward, done, {}
273 |         return self.obs["agentview_image"], reward, done, {}
274 | 
275 | 
276 |     def _save_rendered_frame(self, img_array):
277 |         """
278 |         Converts an RGB array to BGR for OpenCV and writes it to the video.
279 |         """
280 |         if img_array.dtype != np.uint8:
281 |             img_array = (img_array * 255).astype(np.uint8)
282 |         img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
283 |         if self.video_writer is not None:
284 |             self.video_writer.write(img_bgr)
285 | 
286 |     def _save_demo_frames(self, step_i, action):
287 |         """
288 |         Save frames to disk for data collection.
289 |         """
290 |         from PIL import Image
291 |         import os
292 | 
293 |         img_save_dir = os.path.join("stack_rl_data", self.save_path, "demo")
294 |         step_save_dir = os.path.join(img_save_dir, f"demo{self.steps}_action_{action}")
295 |         os.makedirs(step_save_dir, exist_ok=True)
296 | 
297 |         rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
298 |         image_path = os.path.join(step_save_dir, f"frame_{step_i:04d}.png")
299 |         Image.fromarray(rendered_img).save(image_path)
300 | 
301 |     def _log_episode_stats(self, stats, action):
302 |         """
303 |         Write stats and actions to log files.
304 |         """
305 |         import os
306 | 
307 |         # Always log stats
308 |         with open(f"stack_logs/{self.save_path}/episode_stats.txt", "a") as file:
309 |             file.write(str(stats) + "\n")
310 | 
311 |         # If collecting data, also log success/action
312 |         if self.collect_data:
313 |             with open(f"stack_rl_data/{self.save_path}/success_rate.txt", "a") as file:
314 |                 file.write(str(stats["Success_Rate"]) + "\n")
315 |             with open(f"stack_rl_data/{self.save_path}/actions.txt", "a") as file:
316 |                 file.write(str(action) + "\n")
317 | 
318 | 


--------------------------------------------------------------------------------
/env/thread_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import cv2
  4 | import xml.etree.ElementTree as ET
  5 | from copy import deepcopy
  6 | from PIL import Image
  7 | import torch
  8 | 
  9 | # Robomimic / stable-baselines3 / other imports as needed
 10 | # e.g., from stable_baselines3 import PPO
 11 | 
 12 | class ThreadEnv(gym.Env):
 13 |     """
 14 |     A custom Gym environment for controlling a robot environment
 15 |     and training with stable-baselines3.
 16 |     """
 17 |     def __init__(self, env, policy, rollout_horizon, video_record=False, collect_data=False, save_path="", device="cpu"):
 18 |         super(ThreadEnv, self).__init__()
 19 | 
 20 |         self.env = env
 21 |         self.policy = policy
 22 |         self.rollout_horizon = rollout_horizon
 23 |         self.video_record = video_record
 24 |         self.collect_data = collect_data
 25 |         self.save_path = save_path
 26 |         self.device = device
 27 | 
 28 |         # Action space: discrete with 19 possible actions
 29 |         self.action_space = gym.spaces.Discrete(19)
 30 | 
 31 |         # Observation space: simple 3 x 84 x 84 image
 32 |  
 33 |         self.observation_space = gym.spaces.Box(
 34 |             low=0, high=255, shape=(3, 84, 84), dtype=np.uint8
 35 |         )
 36 | 
 37 |         # Internal trackers
 38 |         self.steps = 0
 39 |         self.obs = None
 40 |         self.video_writer = None
 41 |         self.is_sequence = False  # If obs has extra dimension
 42 | 
 43 |         # Initial environment reset
 44 |         self.obs = self.env.reset()
 45 |         if len(self.obs["agentview_image"].shape) == 4: 
 46 |             # If there's a batch dimension
 47 |             self.is_sequence = True
 48 | 
 49 |     def reset(self):
 50 |         print("Resetting the environment...")
 51 |         st = self.env.get_state()
 52 | 
 53 |         # Parse and modify the XML as needed
 54 |         xml_str = st["model"]
 55 |         root = ET.fromstring(xml_str)
 56 | 
 57 |         for geom in root.findall(".//geom"):
 58 |             if "name" in geom.attrib and geom.attrib["name"] == "table_visual":
 59 |                 geom.attrib['size'] = "0.4 0.4 0.025"  #0.4 0.4 0.025
 60 | 
 61 | 
 62 |         #cube color
 63 |         for geom in root.findall(".//geom"):
 64 |             if geom.attrib['name'] == 'needle_obj_handle_vis':
 65 |                 if 'rgba' in geom.attrib:
 66 |                     del geom.attrib['rgba'] 
 67 | 
 68 |             if "name" in geom.attrib and geom.attrib["name"] == "gripper0_finger1_visual":
 69 |                     geom.set("rgba", "0.499 0.499 0.499 1")
 70 |             if "name" in geom.attrib and geom.attrib["name"] == "gripper0_finger2_visual":
 71 |                     geom.set("rgba", "0.499 0.499 0.499 1") 
 72 | 
 73 | 
 74 |         
 75 |         for light in root.findall(".//light"):
 76 |             # Example of restoring some typical defaults:
 77 |             light.set("diffuse", "1 1 1")
 78 |             light.set("specular", "0.1 0.1 0.1")
 79 |             light.set("pos", "1 1 1.5")
 80 |             light.set("dir", "-0.19245 -0.19245 -0.96225")
 81 | 
 82 |         new_xml_str = ET.tostring(root, encoding="unicode")
 83 |         st["model"] = new_xml_str
 84 | 
 85 |         # Actually reset
 86 |         self.obs = self.env.reset_to(st)
 87 |         self.env.reset()
 88 | 
 89 |         # If recording video, initialize the VideoWriter
 90 |         if self.video_record:
 91 |             self.steps += 1
 92 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
 93 |             video_filename = f"episode_{self.steps}.avi"
 94 |             fourcc = cv2.VideoWriter_fourcc(*"MJPG")
 95 |             (h, w) = rendered_img.shape[:2]
 96 |             self.video_writer = cv2.VideoWriter(video_filename, fourcc, 30, (w, h))
 97 |             self._save_rendered_frame(rendered_img)
 98 | 
 99 |         if self.is_sequence:
100 |             return self.obs["agentview_image"][0]
101 | 
102 |         return self.obs["agentview_image"]
103 | 
104 |     def step(self, action):
105 |         """
106 |         Apply one discrete action to the environment,
107 |         change the XML accordingly, then run a short rollout.
108 |         """
109 |         self.steps += 1
110 |         robot_state = self.env.get_state()
111 | 
112 |         # Parse XML
113 |         xml_str = robot_state["model"]
114 |         root = ET.fromstring(xml_str)
115 | 
116 |         for geom in root.findall(".//geom"): 
117 |             # cylinder size
118 |             if action == 0:
119 |                 if "name" in geom.attrib and geom.attrib["name"] == "needle_obj_handle_vis":
120 |                     geom.attrib["size"] = "0.025 0.02 0.02" 
121 |             elif action == 1:
122 |                 if "name" in geom.attrib and geom.attrib["name"] == "needle_obj_handle_vis": # Cube green color
123 |                     geom.attrib['size'] = "0.025 0.02 0.03"
124 |             elif action == 2:
125 |                 if "name" in geom.attrib and geom.attrib["name"] == "needle_obj_handle_vis": # Cube blue color
126 |                     geom.attrib['size'] = "0.02 0.025 0.02"
127 |             elif action == 3:
128 |                 if "name" in geom.attrib and geom.attrib["name"] == "needle_obj_handle_vis": 
129 |                     geom.attrib['size'] = "0.02 0.02 0.025"
130 |         
131 |     
132 |             elif action == 4:
133 |                 if "name" in geom.attrib and geom.attrib["name"] == "needle_obj_handle_vis":
134 |                     geom.set("rgba", "1 0 0 1")
135 |             elif action == 5:
136 |                 if "name" in geom.attrib and geom.attrib["name"] == "needle_obj_handle_vis":
137 |                     geom.set("rgba", "0 1 0 1")
138 |             elif action == 6:
139 |                 if "name" in geom.attrib and geom.attrib["name"] == "needle_obj_handle_vis":
140 |                     geom.set("rgba", "0 0 1 1")
141 |             elif action == 7:
142 |                 if "name" in geom.attrib and geom.attrib["name"] == "needle_obj_handle_vis":
143 |                     geom.set("rgba", "1 0 1 1")
144 |         
145 |         # Table Size
146 |             elif action == 8:
147 |                 if "name" in geom.attrib and geom.attrib["name"] == "table_visual":
148 |                     geom.attrib["size"] = "0.3 0.3 0.02"  # Modify size
149 |             elif action == 9:
150 |                 if "name" in geom.attrib and geom.attrib["name"] == "table_visual":
151 |                     geom.attrib["size"] = "0.2 0.2 0.02"  # Modify size
152 |             elif action == 10:
153 |                 if "name" in geom.attrib and geom.attrib["name"] == "table_visual":
154 |                     geom.attrib["size"] = "0.5 0.5 0.02"
155 |         
156 |         # Robot Color
157 |             elif action == 11:
158 |                 if "name" in geom.attrib and geom.attrib["name"] == "gripper0_finger1_visual":
159 |                     geom.set("rgba", "1 0 0 1")
160 |                 if "name" in geom.attrib and geom.attrib["name"] == "gripper0_finger2_visual":
161 |                     geom.set("rgba", "1 0 0 1") 
162 | 
163 |             
164 |             elif action == 12:
165 |                 if "name" in geom.attrib and geom.attrib["name"] == "gripper0_finger1_visual":
166 |                     geom.set("rgba", "0 1 0 1")
167 |                 if "name" in geom.attrib and geom.attrib["name"] == "gripper0_finger2_visual":
168 |                     geom.set("rgba", "0 1 0 1") 
169 |             
170 |             elif action == 13:
171 |                 if "name" in geom.attrib and geom.attrib["name"] == "gripper0_finger1_visual":
172 |                     geom.set("rgba", "0 0 1 1")
173 |                 if "name" in geom.attrib and geom.attrib["name"] == "gripper0_finger2_visual":
174 |                     geom.set("rgba", "0 0 1 1") 
175 |             elif action == 14:
176 |                 if "name" in geom.attrib and geom.attrib["name"] == "gripper0_finger1_visual":
177 |                     geom.set("rgba", "1 1 0 1")
178 |                 if "name" in geom.attrib and geom.attrib["name"] == "gripper0_finger2_visual":
179 |                     geom.set("rgba", "1 1 0 1") 
180 |             
181 |             # Lighting
182 |         lights = root.findall(".//light")
183 | 
184 |         if action == 15:
185 |             for light in lights:
186 |                 r, g, b = 1, 0, 0
187 |                 light.set("diffuse", f"{r} {g} {b}")
188 |         
189 |         elif action == 16:
190 |             for light in lights:
191 |                 r, g, b = 0, 1, 0
192 |                 light.set("diffuse", f"{r} {g} {b}")
193 | 
194 |         elif action == 17:
195 |             for light in lights:
196 |                 r, g, b = 0, 0, 1
197 |                 light.set("diffuse", f"{r} {g} {b}")
198 |         
199 |         elif action == 18:
200 |             for light in lights:
201 |                 r, g, b = 0.5, 0.5, 0.5
202 |                 light.set("diffuse", f"{r} {g} {b}")
203 | 
204 |         # Update model
205 |         new_xml_str = ET.tostring(root, encoding="unicode")
206 |         robot_state["model"] = new_xml_str
207 | 
208 |         # Reset the environment to the new XML
209 |         self.obs = self.env.reset_to(robot_state)
210 | 
211 |         # If we're recording video, save the frame
212 |         if self.video_record:
213 |             rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
214 |             self._save_rendered_frame(rendered_img)
215 | 
216 |         # Now do a short rollout for `rollout_horizon` steps
217 |         total_reward = 0.0
218 |         success = False
219 | 
220 |         for step_i in range(self.rollout_horizon):
221 |             # Query policy
222 |             with torch.no_grad():
223 |                 act = self.policy(ob=self.obs)
224 | 
225 |             # Step the environment
226 |             next_obs, r, done, _ = self.env.step(act)
227 |             total_reward += r
228 |             success = self.env.is_success()["task"]
229 | 
230 |             # Collect data if requested
231 |             if self.collect_data:
232 |                 # Save frames to a designated folder
233 |                 self._save_demo_frames(step_i, action)
234 | 
235 |             # If also recording video, save each new frame
236 |             if self.video_record:
237 |                 rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
238 |                 self._save_rendered_frame(rendered_img)
239 | 
240 |             # Break if done or success
241 |             if done or success:
242 |                 break
243 | 
244 |             self.obs = deepcopy(next_obs)
245 | 
246 |         # Write stats
247 |         stats = dict(Return=total_reward, Horizon=(step_i + 1), Success_Rate=float(success))
248 |         print(stats)
249 |         self._log_episode_stats(stats, action)
250 | 
251 |         # Terminal or not
252 |         done = True
253 |         if success:
254 |             # If success, we might do some custom logic
255 |             reward = -1
256 |             done = False
257 |         else:
258 |             reward = 1000
259 |             print("Episode Completed")
260 | 
261 |         # If done, stop video recording
262 |         if done and self.video_record and self.video_writer is not None:
263 |             self.video_writer.release()
264 |             print(f"Episode {self.steps} video saved.")
265 | 
266 |         if self.is_sequence:
267 |             return self.obs["agentview_image"][0], reward, done, {}
268 |         return self.obs["agentview_image"], reward, done, {}
269 | 
270 | 
271 |     def _save_rendered_frame(self, img_array):
272 |         """
273 |         Converts an RGB array to BGR for OpenCV and writes it to the video.
274 |         """
275 |         if img_array.dtype != np.uint8:
276 |             img_array = (img_array * 255).astype(np.uint8)
277 |         img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
278 |         if self.video_writer is not None:
279 |             self.video_writer.write(img_bgr)
280 | 
281 |     def _save_demo_frames(self, step_i, action):
282 |         """
283 |         Save frames to disk for data collection.
284 |         """
285 |         from PIL import Image
286 |         import os
287 | 
288 |         img_save_dir = os.path.join("thread_rl_data", self.save_path, "demo")
289 |         step_save_dir = os.path.join(img_save_dir, f"demo{self.steps}_action_{action}")
290 |         os.makedirs(step_save_dir, exist_ok=True)
291 | 
292 |         rendered_img = self.env.render(mode="rgb_array", width=300, height=300)
293 |         image_path = os.path.join(step_save_dir, f"frame_{step_i:04d}.png")
294 |         Image.fromarray(rendered_img).save(image_path)
295 | 
296 |     def _log_episode_stats(self, stats, action):
297 |         """
298 |         Write stats and actions to log files.
299 |         """
300 |         import os
301 | 
302 |         # Always log stats
303 |         with open(f"thread_logs/{self.save_path}/episode_stats.txt", "a") as file:
304 |             file.write(str(stats) + "\n")
305 | 
306 |         # If collecting data, also log success/action
307 |         if self.collect_data:
308 |             with open(f"thread_rl_data/{self.save_path}/success_rate.txt", "a") as file:
309 |                 file.write(str(stats["Success_Rate"]) + "\n")
310 |             with open(f"thread_rl_data/{self.save_path}/actions.txt", "a") as file:
311 |                 file.write(str(action) + "\n")
312 | 
313 | 


--------------------------------------------------------------------------------
/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/file.png


--------------------------------------------------------------------------------
/images/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/images/fig2.png


--------------------------------------------------------------------------------
/images/fig3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/images/fig3.png


--------------------------------------------------------------------------------
/images/fig4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/images/fig4.png


--------------------------------------------------------------------------------
/images/github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/images/github.png


--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/images/logo.png


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |   <meta charset="UTF-8" />
  5 |   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  6 |   <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
  7 | 
  8 |   <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/swiper@11/swiper-bundle.min.css">
  9 |   <script src="https://cdn.jsdelivr.net/npm/swiper@11/swiper-bundle.min.js"></script>
 10 | 
 11 | 
 12 |   <title>RoboMD</title>
 13 |   <style>
 14 |     /* Basic Reset */
 15 |     * {
 16 |       box-sizing: border-box;
 17 |       margin: 0;
 18 |       padding: 0;
 19 |     }
 20 | 
 21 |     body {
 22 |       font-family:'Google Sans', 'Noto Sans', 'Castoro', sans-serif;;
 23 |       background-color: #f4f4f4;
 24 |       padding: 20px;
 25 |       color: #333;
 26 |     }
 27 | 
 28 |     header {
 29 |       text-align: center;
 30 |       padding: 40px 20px 20px;
 31 |     }
 32 | 
 33 |     /* Container for Logo + Title */
 34 |     .title-container {
 35 |       display: inline-flex;
 36 |       justify-content: center;
 37 |       gap: -50px; /* Space between logo and text */
 38 |       max-width: 1000px;
 39 |       margin: 0 auto;
 40 |     }
 41 | 
 42 |     /* Logo Styling */
 43 |     /* .title-container img {
 44 |       width: 50px; 
 45 |       height: 50px;
 46 |       margin-top: 5px; 
 47 |     } */
 48 | 
 49 |     /* Title Styling */
 50 |     .main-title {
 51 |       font-size: 3rem; /* Bigger font */
 52 |       font-weight: bold;
 53 |       text-align: center; /* Ensures natural left alignment */
 54 |       line-height: 1.3; /* Space between lines */
 55 |       max-width: 900px;
 56 |     }
 57 | 
 58 |     .subtitle {
 59 |       font-size: 1.2rem;
 60 |       color: #555;
 61 |       margin-top: 5px;
 62 |     }
 63 | 
 64 | 
 65 |     .video-container {
 66 |       text-align: center;
 67 |       margin-bottom: 20px;
 68 |     }
 69 |     
 70 | 
 71 |     video {
 72 |       width: 120%;
 73 |       max-width: 1000px;
 74 |       border-radius: 4px;
 75 |       border: none; /* Remp4es the border */
 76 |       outline: none; /* Remp4es any focus outline */
 77 |     }
 78 | 
 79 |     .dropdown-container {
 80 |       display: flex;
 81 |       justify-content: center;
 82 |       gap: 20px;
 83 |       flex-wrap: wrap;
 84 |       margin-top: 20px;
 85 |     }
 86 | 
 87 |     select {
 88 |       padding: 10px;
 89 |       font-size: 1rem;
 90 |       border-radius: 4px;
 91 |       border: 1px solid #ccc;
 92 |       min-width: 150px;
 93 |     }
 94 | 
 95 |     /* Abstract Section Styling */
 96 |     .abstract-container {
 97 |         text-align: center;
 98 |         max-width: 800px;
 99 |         margin: 40px auto;
100 |         padding: 20px;
101 |     }
102 | 
103 |     .abstract-title {
104 |         font-size: 2rem;
105 |         font-weight: bold;
106 |         margin-bottom: 15px;
107 |     }
108 | 
109 |     .abstract-text {
110 |         font-size: 1.1rem;
111 |         text-align: justify;
112 |         line-height: 1.6;
113 |         color: #333;
114 |     }
115 | 
116 |     /* Summary Section */
117 |     .summary-container {
118 |     text-align: center;
119 |     max-width: 1000px;
120 |     margin: 40px auto;    /* centers the section on the page */
121 |     padding: 20px;
122 |     }
123 | 
124 |     .summary-title {
125 |     font-size: 2rem;
126 |     font-weight: bold;
127 |     margin-bottom: 15px;
128 |     }
129 | 
130 |     .summary-figure {
131 |     margin: 0 auto 20px auto; /* center figure, add bottom spacing */
132 |     }
133 | 
134 |     .summary-img {
135 |     width: 100%;
136 |     max-width: 1000px;     /* ensures figure doesn't get too large */
137 |     border-radius: 4px;
138 |     }
139 | 
140 |     .summary-text {
141 |     font-size: 1.1rem;
142 |     text-align: justify;
143 |     line-height: 1.6;
144 |     color: #333;
145 |     }
146 | 
147 |     /* Experiments & Results Section */
148 |     .experiments-container {
149 |     text-align: center;
150 |     max-width: 1000px;
151 |     margin: 40px auto;
152 |     padding: 20px;
153 |     }
154 | 
155 |     .experiments-title {
156 |     font-size: 2rem;
157 |     font-weight: bold;
158 |     margin-bottom: 15px;
159 |     }
160 | 
161 |     .experiments-figure {
162 |     margin: 0 auto 20px auto;
163 |     }
164 | 
165 |     .experiments-img {
166 |     width: 100%;
167 |     max-width: 1000px;
168 |     border-radius: 4px;
169 |     }
170 | 
171 |     .experiments-text {
172 |     font-size: 1.1rem;
173 |     text-align: justify;
174 |     line-height: 1.6;
175 |     color: #333;
176 |     }
177 | 
178 |     .slider-container {
179 |             width: 90%;
180 |             max-width: 1200px;
181 |             position: relative;
182 |             overflow: hidden;
183 |             border-radius: 10px;
184 |         }
185 | 
186 |         .slider {
187 |             display: flex;
188 |             transition: transform 0.5s ease-in-out;
189 |         }
190 | 
191 |         .slider video {
192 |             width: calc(100% / 3); /* Show 3 videos at a time */
193 |             height: auto;
194 |             flex-shrink: 0;
195 |             padding: 5px;
196 |             box-sizing: border-box;
197 |             border-radius: 20px;
198 |         }
199 | 
200 |         .slider-control {
201 |             position: absolute;
202 |             top: 50%;
203 |             transform: translateY(-50%);
204 |             background-color: rgba(0, 0, 0, 0.5);
205 |             color: white;
206 |             border: none;
207 |             padding: 10px;
208 |             cursor: pointer;
209 |             border-radius: 50%;
210 |             font-size: 24px;
211 |             transition: background-color 0.3s ease;
212 |         }
213 | 
214 |         .slider-control:hover {
215 |             background-color: rgba(0, 0, 0, 0.8);
216 |         }
217 | 
218 |         .slider-control.prev {
219 |             left: 10px;
220 |         }
221 | 
222 |         .slider-control.next {
223 |             right: 10px;
224 |         }
225 | 
226 |         @media (max-width: 768px) {
227 |             .slider video {
228 |                 width: calc(100% / 2); /* Show 2 videos at a time on smaller screens */
229 |             }
230 |         }
231 | 
232 |         @media (max-width: 480px) {
233 |             .slider video {
234 |                 width: 100%; /* Show 1 video at a time on mobile screens */
235 |             }
236 |         }
237 | 
238 |         #secondaryVideo {
239 |             width: 400px; /* Adjust width as needed */
240 |             height: auto;  /* Keeps aspect ratio */
241 |             max-width: 100%; /* Ensures it doesn't overflow */
242 |             border-radius: 8px; /* Optional rounded corners */
243 |         }
244 | 
245 |         .dropdown-container {
246 |         display: flex;
247 |         align-items: center; /* Centers items vertically */
248 |         gap: 10px; /* Adjusts spacing between elements */
249 |         }
250 | 
251 |         .dropdown-container select {
252 |         padding: 5px;
253 |         font-size: 14px;
254 |         }
255 | 
256 |         .title-container img {
257 |   width: 50px;
258 |   height: 50px;
259 | }
260 | 
261 | .code-data-container {
262 |   display: flex;
263 |   gap: 2rem;        /* space between each icon block */
264 |   flex-wrap: wrap;  /* let items wrap on smaller screens */
265 |   align-items: center;
266 |   justify-content: center; /* center the entire row horizontally */
267 | }
268 | 
269 |     /* Each icon + text is stacked in a column */
270 |     .icon-block {
271 |       display: flex;
272 |       flex-direction: column;
273 |       align-items: center;
274 |       text-align: center;
275 |     }
276 | 
277 |     .icon-block img {
278 |       width: 60px;  /* adjust as needed */
279 |       height: auto;
280 |       margin-bottom: 0.5rem;
281 |     }
282 | 
283 |     /* Style the links (teal color, underlined) */
284 |     .icon-block a {
285 |       color: #008B8B;
286 |       text-decoration: underline;
287 |       font-weight: 500;
288 |     }
289 |     
290 |     /* Subtle dashed or dotted underline look:
291 |        If you prefer dotted, replace 'dashed' with 'dotted'. */
292 |     .icon-block a {
293 |       text-decoration: none;
294 |       border-bottom: 1px dashed #008B8B;
295 |       padding-bottom: 2px;
296 |     }
297 |     .icon-block a:hover {
298 |       border-bottom-style: solid;  /* hover effect */
299 |     }
300 | 
301 |   </style>
302 | </head>
303 | <body>
304 | <header>
305 |     <div class="title-container">
306 |         <h1 class="main-title">
307 |             <img src="images/logo.png" alt="Robot Icon">
308 |             From Mystery to Mastery: Failure<br> 
309 |             Diagnosis for Improving Manipulation<br>
310 |             Policies
311 |         </h1>
312 |     </div>
313 |     <br>
314 |     <br>
315 |     <div class="subtitle">
316 |         <strong>Som Sagar<sup>1</sup>, Jiafei Duan<sup>2</sup>, Sreevishakh Vasudevan<sup>1</sup>, 
317 |         Yifan Zhou<sup>1</sup>, Heni Ben Amor<sup>1</sup>, Dieter Fox<sup>2,3</sup>, and 
318 |         Ransalu Senanayake<sup>1</sup></strong>
319 |         <br>
320 |         <span>
321 |             <sup>1</sup>Arizona State University, 
322 |             <sup>2</sup>University of Washington, 
323 |             <sup>3</sup>NVIDIA
324 |         </span>
325 |     </div>
326 | </header>
327 | <br>
328 | 
329 | <br>
330 | <center>
331 |   <div class="code-data-container">
332 |     
333 |     <!-- GitHub Repo -->
334 |     <div class="icon-block">
335 |       <img src="images/github.png" alt="GitHub Icon">
336 |       <a href="https://github.com/somsagar07/RoboMD" target="_blank">
337 |         Code
338 |       </a>
339 |     </div>
340 | 
341 |     <!-- Paper -->
342 | <!--     <div class="icon-block">
343 |       <img src="file.png" alt="Paper Icon">
344 |       <a href="RoboMD/RoboMD_paper.pdf" target="_blank">
345 |         Paper
346 |       </a>
347 |     </div> -->
348 | 
349 | <!-- PDF from External Webpage -->
350 | <div class="icon-block">
351 |   <img src="file.png" alt="PDF Icon">
352 |   <a href="https://arxiv.org/pdf/2412.02818" target="_blank">
353 |     PDF
354 |   </a>
355 | </div>
356 | </center>
357 | <br>
358 |     
359 |   <!-- Main Video Section -->
360 |   <div class="video-container">
361 |     <video id="mainVideo" autoplay muted loop playsinline>
362 |       <!-- Default video source -->
363 |       <source src="videos/RSS.mp4" type="video/mp4">
364 |       Your browser does not support the video tag.
365 |     </video>
366 |   </div>
367 | 
368 | 
369 | 
370 | 
371 | 
372 |   <!-- Abstract Section -->
373 |   <section class="abstract-container">
374 |       <h2 class="abstract-title">Abstract</h2>
375 |       <p class="abstract-text">
376 |         Robot manipulation policies often fail for unknown reasons, posing significant challenges for real-world deployment. Researchers and engineers typically address these failures using heuristic approaches, which are not only labor-intensive and costly but also prone to overlooking critical failure modes (FMs). This paper introduces Robot Manipulation Diagnosis (RoboMD), a systematic framework designed to automatically identify FMs arising from unexpected changes in the environment. To navigate the vast space of potential FMs for a given pre-trained manipulation policy, we leverage deep reinforcement learning (deep RL) to explore and uncover these FMs using a specially trained vision-language embedding that encodes a notion of failures. This approach enables users to probabilistically quantify and rank failures in previously unseen environmental conditions. Through extensive experiments across various manipulation tasks and algorithms, we demonstrate RoboMD's effectiveness in diagnosing unknown failures in unstructured environments, providing a systematic pathway to enhance the robustness of manipulation policies.
377 |       </p>
378 |   </section>
379 | 
380 |     <!-- Video Slider -->
381 | 
382 |     <center>
383 |         <div class="subtitle"> <b>Real World Variations</b></div>
384 |         <div class="slider-container">
385 |             <div class="slider">
386 |                 <video autoplay muted loop playsinline>
387 |                     <source src="videos/real_world/1.mp4" type="video/mp4">
388 |                     Your browser does not support the video tag.
389 |                 </video>
390 |                 <video autoplay muted loop playsinline>
391 |                     <source src="videos/real_world/2.mp4" type="video/mp4">
392 |                     Your browser does not support the video tag.
393 |                 </video>
394 |                 <video autoplay muted loop playsinline>
395 |                     <source src="videos/real_world/3.mp4" type="video/mp4">
396 |                     Your browser does not support the video tag.
397 |                 </video>
398 |                 <video autoplay muted loop playsinline>
399 |                     <source src="videos/real_world/4.mp4" type="video/mp4">
400 |                     Your browser does not support the video tag.
401 |                 </video>
402 |                 <video autoplay muted loop playsinline>
403 |                     <source src="videos/real_world/5.mp4" type="video/mp4">
404 |                     Your browser does not support the video tag.
405 |                 </video>
406 |                 <video autoplay muted loop playsinline>
407 |                     <source src="videos/real_world/6.mp4" type="video/mp4">
408 |                     Your browser does not support the video tag.
409 |                 </video>
410 |                 <video autoplay muted loop playsinline>
411 |                     <source src="videos/real_world/7.mp4" type="video/mp4">
412 |                     Your browser does not support the video tag.
413 |                 </video>
414 |                 <video autoplay muted loop playsinline>
415 |                     <source src="videos/real_world/8.mp4" type="video/mp4">
416 |                     Your browser does not support the video tag.
417 |                 </video>
418 |                 <video autoplay muted loop playsinline>
419 |                     <source src="videos/real_world/9.mp4" type="video/mp4">
420 |                     Your browser does not support the video tag.
421 |                 </video>
422 |                 <video autoplay muted loop playsinline>
423 |                     <source src="videos/real_world/10.mp4" type="video/mp4">
424 |                     Your browser does not support the video tag.
425 |                 </video>
426 |             </div>
427 |             <button class="slider-control prev" onclick="prevSlide()">&#10094;</button>
428 |             <button class="slider-control next" onclick="nextSlide()">&#10095;</button>
429 |         </div>
430 | 
431 | 
432 |     </center>
433 | 
434 |   
435 | 
436 |   <!-- Summary Section -->
437 | <section class="summary-container">
438 |     <h2 class="summary-title">Summary</h2>
439 |     <!-- Figure 1 -->
440 |     <figure class="summary-figure">
441 |       <!-- Update the src to point to your fig1 image -->
442 |       <img src="images/fig2.png" alt="Observe Failures, Uncover Failures, Adapt" class="summary-img">
443 |     </figure>
444 |     <!-- Summary Text -->
445 |     <p class="summary-text">
446 |         Robot Manipulation Diagnosis (RoboMD), a systematic framework designed to automatically identify FMs arising from unanticipated changes in the environment. Considering the vast space of potential FMs in a pre-trained manipulation policy, we leverage deep reinforcement learning (deep RL) to explore and uncover these FMs using a specially trained vision-language embedding that encodes a notion of failures. This approach enables users to probabilistically quantify and rank failures in previously unseen environmental conditions. Through extensive experiments across various manipulation tasks and algorithms, we demonstrate RoboMD's effectiveness in diagnosing unknown failures in unstructured environments, providing a systematic pathway to improve the robustness of manipulation policies.
447 |     </p>
448 |   </section>
449 |   
450 |   <!-- Experiments and Results Section -->
451 |   <section class="experiments-container">
452 |     <h2 class="experiments-title">Experiments and Results</h2>
453 |     <!-- Figure 2 -->
454 |     <figure class="experiments-figure">
455 |       <!-- Update the src to point to your fig2 image -->
456 |       <img src="images/fig3.png" alt="Radar plots for multiple models" class="experiments-img">
457 |     </figure>
458 |     <!-- Description / Explanation -->
459 |     <p class="experiments-text">
460 |         Individual FM analysis of multiple models. Each radar plot represents the failure likelihood of a specific actions. The axes correspond to different environmental setups (e.g., Red Cube, Green Table, Blue Table) (a) for real-world setup and (b,c) for simulation, and the numbers indicate the probability of failure for actions under each configuration.
461 |     </p>
462 |     <br><br>
463 |     <figure class="experiments-figure">
464 |         <!-- Update the src to point to your fig2 image -->
465 |         <img src="images/fig4.png" alt="Table" class="experiments-img">
466 |       </figure>
467 |       <!-- Description / Explanation -->
468 |       <p class="experiments-text">
469 |         Comparison of rankings for failure-inducing actions in continuous and discrete action spaces. ar represent actions performed in the real robot environment ar 1 = “Bread” (Unseen), ar 2 = “Red Cube”, ar 3 = “Milk Carton”, ar 4 = “Sprite”. as represent the actions performed in simluated environment as1 = “Red Table”, as2 = “Black Table” (Unseen), as3 = “Green Lighting.” Rank consistency indicates whether the rankings are preserved across the two formulations. The accuracy is computed over 21 environment variations.
470 |     </p>
471 |   </section>
472 |   
473 | 
474 | 
475 | 
476 | <center>
477 |     <div class="subtitle"> <b>Simulation Variations</b></div>
478 | </center>
479 | 
480 |   <!-- Dropdown Menus -->
481 |   <div class="dropdown-container">
482 |     <!-- Algorithm Dropdown: stays the same -->
483 |     Algo: <select id="algorithmSelect">
484 |       <option value="algo1">BC</option>
485 |       <option value="algo2">BCQ</option>
486 |       <option value="algo3">HBC</option>
487 |       <option value="algo4">BC Transformer</option>
488 |       <option value="algo5">Diffusion</option>
489 |     </select>
490 | 
491 |     <!-- Task Dropdown -->
492 |     Task: <select id="taskSelect">
493 |       <option value="task1" selected>Lift</option>
494 |       <option value="task2">Can</option>
495 |       <option value="task3">Square</option>
496 |       <option value="task4">Stack</option>
497 |       <option value="task5">Threading</option>
498 |     </select>
499 | 
500 |     <!-- Perturbation Dropdown (will be filled dynamically based on Task) -->
501 |     Variation: <select id="perturbationSelect">
502 |       <!-- Options will be injected by JavaScript -->
503 |     </select>
504 |   </div>
505 | 
506 |   <br><br>
507 | 
508 |   <div class="video-container">
509 |     <video id="secondaryVideo" autoplay muted loop playsinline>
510 |       <source src="videos/sim_videos/Lift/BC/default.mp4" type="video/mp4">
511 |       Your browser does not support the video tag.
512 |     </video>
513 |   </div>
514 | 
515 |   <br><br>
516 | 
517 |   <script>
518 |     /**********************************************************************
519 |      * Slider logic for the real-world videos (left/right arrow buttons)
520 |      **********************************************************************/
521 |     const slider = document.querySelector('.slider');
522 |     let currentIndex = 0;
523 |     const videosPerPage = 3; // Number of videos to show at a time
524 | 
525 |     function nextSlide() {
526 |       const totalVideos = document.querySelectorAll('.slider video').length;
527 |       if (currentIndex < totalVideos - videosPerPage) {
528 |         currentIndex++;
529 |       } else {
530 |         currentIndex = 0;
531 |       }
532 |       updateSlider();
533 |     }
534 | 
535 |     function prevSlide() {
536 |       const totalVideos = document.querySelectorAll('.slider video').length;
537 |       if (currentIndex > 0) {
538 |         currentIndex--;
539 |       } else {
540 |         currentIndex = totalVideos - videosPerPage;
541 |       }
542 |       updateSlider();
543 |     }
544 | 
545 |     function updateSlider() {
546 |       const offset = -currentIndex * (100 / videosPerPage);
547 |       slider.style.transform = `translateX(${offset}%)`;
548 |     }
549 | 
550 |     // Initialize Swiper (if you're using Swiper for something)
551 |     var swiper = new Swiper('.swiper-container', {
552 |       slidesPerView: 3,
553 |       spaceBetween: 20,
554 |       loop: true,
555 |       navigation: {
556 |         nextEl: '.swiper-button-next',
557 |         prevEl: '.swiper-button-prev',
558 |       },
559 |       pagination: {
560 |         el: '.swiper-pagination',
561 |         clickable: true,
562 |       },
563 |     });
564 | 
565 |     // Speed up playback rate for videos with class "video-speedup" if desired
566 |     document.querySelectorAll(".video-speedup").forEach(video => {
567 |       video.playbackRate = 2.0;
568 |     });
569 | 
570 |     /**********************************************************************
571 |      * (NEW) Dynamic population of Perturbation dropdown based on Task
572 |      **********************************************************************/
573 | 
574 |     // 1) Define possible perturbations for each task lift, can , square, stack, threading
575 |     const perturbationsByTask = {
576 |       task1: [ 
577 |         { value: "perturbation1", text: "Default" },
578 |         { value: "perturbation2", text: "Cube Shape" },
579 |         { value: "perturbation3", text: "Table Color" },
580 |         { value: "perturbation4", text: "Table Size" },
581 |         { value: "perturbation5", text: "Robot Color" },
582 |         { value: "perturbation6", text: "Lighting Color" },
583 |       ],
584 |       task2: [
585 |         { value: "perturbation1", text: "Default" },
586 |         { value: "perturbation2", text: "Can Color" },
587 |         { value: "perturbation3", text: "Robot Color" },
588 |         { value: "perturbation4", text: "Lighting Color" },
589 |       ],
590 |       task3: [
591 |         { value: "perturbation1", text: "Default" },
592 |         { value: "perturbation4", text: "Table Color" },
593 |         { value: "perturbation5", text: "Table Size" },
594 |         { value: "perturbation6", text: "Robot Color" },
595 |         { value: "perturbation7", text: "Lighting Color" },
596 |       ],
597 |       task4: [
598 |         { value: "perturbation1", text: "Default" },
599 |         { value: "perturbation2", text: "Cube Shape" },
600 |         { value: "perturbation3", text: "Table Color" },
601 |         { value: "perturbation4", text: "Table Size" },
602 |         { value: "perturbation5", text: "Robot Color" },
603 |         { value: "perturbation6", text: "Lighting Color" },
604 |       ],
605 |       task5: [
606 |         { value: "perturbation1", text: "Default" },
607 |         { value: "perturbation2", text: "Table Color" },
608 |         { value: "perturbation3", text: "Table Size" },
609 |         { value: "perturbation4", text: "Robot Color" },
610 |         { value: "perturbation5", text: "Lighting Color" },
611 |       ],
612 |     };
613 | 
614 |     // 2) References to our dropdowns + main video
615 |     const algorithmSelect = document.getElementById('algorithmSelect');
616 |     const taskSelect = document.getElementById('taskSelect');
617 |     const perturbationSelect = document.getElementById('perturbationSelect');
618 |     const selectionVideo = document.getElementById('secondaryVideo');
619 | 
620 | 
621 |     // 3) Populate the Perturbation dropdown based on the current Task
622 |     function updatePerturbationDropdown(selectedTask) {
623 |       // Clear existing options in the "perturbationSelect"
624 |       perturbationSelect.innerHTML = "";
625 | 
626 |       // If this task is not in our dictionary, do nothing
627 |       if (!perturbationsByTask[selectedTask]) return;
628 | 
629 |       // Otherwise, create an <option> for each possible perturbation
630 |       perturbationsByTask[selectedTask].forEach(({ value, text }) => {
631 |         const option = document.createElement('option');
632 |         option.value = value;
633 |         option.textContent = text;
634 |         perturbationSelect.appendChild(option);
635 |       });
636 |     }
637 | 
638 |     // 4) Map (algo + task + perturbation) => specific video path
639 |     const videoMapping = { 
640 |         'algo1-task1-perturbation1': 'videos/sim_videos/Lift/BC/1.mp4',
641 |         'algo1-task1-perturbation2': 'videos/sim_videos/Lift/BC/2.mp4',
642 |         'algo1-task1-perturbation3': 'videos/sim_videos/Lift/BC/3.mp4',
643 |         'algo1-task1-perturbation4': 'videos/sim_videos/Lift/BC/5.mp4',
644 |         'algo1-task1-perturbation5': 'videos/sim_videos/Lift/BC/4.mp4',
645 |         'algo1-task1-perturbation6': 'videos/sim_videos/Lift/BC/6.mp4',
646 | 
647 |         'algo1-task2-perturbation1': 'videos/sim_videos/Can/BC/1.mp4',
648 |         'algo1-task2-perturbation2': 'videos/sim_videos/Can/BC/2.mp4',
649 |         'algo1-task2-perturbation3': 'videos/sim_videos/Can/BC/3.mp4',
650 |         'algo1-task2-perturbation4': 'videos/sim_videos/Can/BC/4.mp4',
651 | 
652 |         'algo1-task3-perturbation1': 'videos/sim_videos/Square/BC/1.mp4',
653 |         'algo1-task3-perturbation4': 'videos/sim_videos/Square/BC/2.mp4',
654 |         'algo1-task3-perturbation5': 'videos/sim_videos/Square/BC/3.mp4',
655 |         'algo1-task3-perturbation6': 'videos/sim_videos/Square/BC/4.mp4',
656 |         'algo1-task3-perturbation7': 'videos/sim_videos/Square/BC/5.mp4',
657 | 
658 |         'algo1-task4-perturbation1': 'videos/sim_videos/Stack/BC/1.mp4',
659 |         'algo1-task4-perturbation2': 'videos/sim_videos/Stack/BC/2.mp4',
660 |         'algo1-task4-perturbation3': 'videos/sim_videos/Stack/BC/3.mp4',
661 |         'algo1-task4-perturbation4': 'videos/sim_videos/Stack/BC/4.mp4',
662 |         'algo1-task4-perturbation5': 'videos/sim_videos/Stack/BC/5.mp4',
663 |         'algo1-task4-perturbation6': 'videos/sim_videos/Stack/BC/6.mp4',
664 | 
665 |         'algo1-task5-perturbation1': 'videos/sim_videos/Threading/BC/1.mp4',
666 |         'algo1-task5-perturbation2': 'videos/sim_videos/Threading/BC/2.mp4',
667 |         'algo1-task5-perturbation3': 'videos/sim_videos/Threading/BC/3.mp4',
668 |         'algo1-task5-perturbation4': 'videos/sim_videos/Threading/BC/4.mp4',
669 |         'algo1-task5-perturbation5': 'videos/sim_videos/Threading/BC/5.mp4',
670 | 
671 |         //BCQ
672 |         'algo2-task1-perturbation1': 'videos/sim_videos/Lift/BCQ/1.mp4',
673 |         'algo2-task1-perturbation2': 'videos/sim_videos/Lift/BCQ/2.mp4',
674 |         'algo2-task1-perturbation3': 'videos/sim_videos/Lift/BCQ/3.mp4',
675 |         'algo2-task1-perturbation4': 'videos/sim_videos/Lift/BCQ/4.mp4',
676 |         'algo2-task1-perturbation5': 'videos/sim_videos/Lift/BCQ/5.mp4',
677 |         'algo2-task1-perturbation6': 'videos/sim_videos/Lift/BCQ/6.mp4',
678 | 
679 |         'algo2-task2-perturbation1': 'videos/sim_videos/Can/BCQ/1.mp4',
680 |         'algo2-task2-perturbation2': 'videos/sim_videos/Can/BCQ/2.mp4',
681 |         'algo2-task2-perturbation3': 'videos/sim_videos/Can/BCQ/3.mp4',
682 |         'algo2-task2-perturbation4': 'videos/sim_videos/Can/BCQ/4.mp4',
683 | 
684 |         'algo2-task3-perturbation1': 'videos/sim_videos/Square/BCQ/1.mp4',
685 |         'algo2-task3-perturbation4': 'videos/sim_videos/Square/BCQ/2.mp4',
686 |         'algo2-task3-perturbation5': 'videos/sim_videos/Square/BCQ/3.mp4',
687 |         'algo2-task3-perturbation6': 'videos/sim_videos/Square/BCQ/4.mp4',
688 |         'algo2-task3-perturbation7': 'videos/sim_videos/Square/BCQ/5.mp4',
689 | 
690 |         'algo2-task4-perturbation1': 'videos/sim_videos/Stack/BCQ/1.mp4',
691 |         'algo2-task4-perturbation2': 'videos/sim_videos/Stack/BCQ/2.mp4',
692 |         'algo2-task4-perturbation3': 'videos/sim_videos/Stack/BCQ/3.mp4',
693 |         'algo2-task4-perturbation4': 'videos/sim_videos/Stack/BCQ/4.mp4',
694 |         'algo2-task4-perturbation5': 'videos/sim_videos/Stack/BCQ/5.mp4',
695 |         'algo2-task4-perturbation6': 'videos/sim_videos/Stack/BCQ/6.mp4',
696 | 
697 |         'algo2-task5-perturbation1': 'videos/sim_videos/Threading/BCQ/1.mp4',
698 |         'algo2-task5-perturbation2': 'videos/sim_videos/Threading/BCQ/2.mp4',
699 |         'algo2-task5-perturbation3': 'videos/sim_videos/Threading/BCQ/3.mp4',
700 |         'algo2-task5-perturbation4': 'videos/sim_videos/Threading/BCQ/4.mp4',
701 |         'algo2-task5-perturbation5': 'videos/sim_videos/Threading/BCQ/5.mp4',
702 | 
703 |         //HBC
704 |         'algo3-task1-perturbation1': 'videos/sim_videos/Lift/HBC/1.mp4',
705 |         'algo3-task1-perturbation2': 'videos/sim_videos/Lift/HBC/2.mp4',
706 |         'algo3-task1-perturbation3': 'videos/sim_videos/Lift/HBC/3.mp4',
707 |         'algo3-task1-perturbation4': 'videos/sim_videos/Lift/HBC/4.mp4',
708 |         'algo3-task1-perturbation5': 'videos/sim_videos/Lift/HBC/5.mp4',
709 |         'algo3-task1-perturbation6': 'videos/sim_videos/Lift/HBC/6.mp4',
710 | 
711 |         'algo3-task2-perturbation1': 'videos/sim_videos/Can/HBC/1.mp4',
712 |         'algo3-task2-perturbation2': 'videos/sim_videos/Can/HBC/2.mp4',
713 |         'algo3-task2-perturbation3': 'videos/sim_videos/Can/HBC/3.mp4',
714 |         'algo3-task2-perturbation4': 'videos/sim_videos/Can/HBC/4.mp4',
715 | 
716 |         'algo3-task3-perturbation1': 'videos/sim_videos/Square/HBC/1.mp4',
717 |         'algo3-task3-perturbation4': 'videos/sim_videos/Square/HBC/2.mp4',
718 |         'algo3-task3-perturbation5': 'videos/sim_videos/Square/HBC/3.mp4',
719 |         'algo3-task3-perturbation6': 'videos/sim_videos/Square/HBC/4.mp4',
720 |         'algo3-task3-perturbation7': 'videos/sim_videos/Square/HBC/5.mp4',
721 | 
722 |         'algo3-task4-perturbation1': 'videos/sim_videos/Stack/HBC/1.mp4',
723 |         'algo3-task4-perturbation2': 'videos/sim_videos/Stack/HBC/2.mp4',
724 |         'algo3-task4-perturbation3': 'videos/sim_videos/Stack/HBC/3.mp4',
725 |         'algo3-task4-perturbation4': 'videos/sim_videos/Stack/HBC/4.mp4',
726 |         'algo3-task4-perturbation5': 'videos/sim_videos/Stack/HBC/5.mp4',
727 |         'algo3-task4-perturbation6': 'videos/sim_videos/Stack/HBC/6.mp4',
728 | 
729 |         'algo3-task5-perturbation1': 'videos/sim_videos/Threading/HBC/1.mp4',
730 |         'algo3-task5-perturbation2': 'videos/sim_videos/Threading/HBC/2.mp4',
731 |         'algo3-task5-perturbation3': 'videos/sim_videos/Threading/HBC/3.mp4',
732 |         'algo3-task5-perturbation4': 'videos/sim_videos/Threading/HBC/4.mp4',
733 |         'algo3-task5-perturbation5': 'videos/sim_videos/Threading/HBC/5.mp4',
734 | 
735 |         //BC Transformer
736 |         'algo4-task1-perturbation1': 'videos/sim_videos/Lift/BCT/1.mp4',
737 |         'algo4-task1-perturbation2': 'videos/sim_videos/Lift/BCT/2.mp4',
738 |         'algo4-task1-perturbation3': 'videos/sim_videos/Lift/BCT/3.mp4',
739 |         'algo4-task1-perturbation4': 'videos/sim_videos/Lift/BCT/4.mp4',
740 |         'algo4-task1-perturbation5': 'videos/sim_videos/Lift/BCT/5.mp4',
741 |         'algo4-task1-perturbation6': 'videos/sim_videos/Lift/BCT/6.mp4',
742 | 
743 |         'algo4-task2-perturbation1': 'videos/sim_videos/Can/BCT/1.mp4',
744 |         'algo4-task2-perturbation2': 'videos/sim_videos/Can/BCT/2.mp4',
745 |         'algo4-task2-perturbation3': 'videos/sim_videos/Can/BCT/3.mp4',
746 |         'algo4-task2-perturbation4': 'videos/sim_videos/Can/BCT/4.mp4',
747 | 
748 |         'algo4-task3-perturbation1': 'videos/sim_videos/Square/BCT/1.mp4',
749 |         'algo4-task3-perturbation4': 'videos/sim_videos/Square/BCT/2.mp4',
750 |         'algo4-task3-perturbation5': 'videos/sim_videos/Square/BCT/3.mp4',
751 |         'algo4-task3-perturbation6': 'videos/sim_videos/Square/BCT/4.mp4',
752 |         'algo4-task3-perturbation7': 'videos/sim_videos/Square/BCT/5.mp4',
753 | 
754 |         'algo4-task4-perturbation1': 'videos/sim_videos/Stack/BCT/1.mp4',
755 |         'algo4-task4-perturbation2': 'videos/sim_videos/Stack/BCT/2.mp4',
756 |         'algo4-task4-perturbation3': 'videos/sim_videos/Stack/BCT/3.mp4',
757 |         'algo4-task4-perturbation4': 'videos/sim_videos/Stack/BCT/4.mp4',
758 |         'algo4-task4-perturbation5': 'videos/sim_videos/Stack/BCT/5.mp4',
759 |         'algo4-task4-perturbation6': 'videos/sim_videos/Stack/BCT/6.mp4',
760 | 
761 |         'algo4-task5-perturbation1': 'videos/sim_videos/Threading/BCT/1.mp4',
762 |         'algo4-task5-perturbation2': 'videos/sim_videos/Threading/BCT/2.mp4',
763 |         'algo4-task5-perturbation3': 'videos/sim_videos/Threading/BCT/3.mp4',
764 |         'algo4-task5-perturbation4': 'videos/sim_videos/Threading/BCT/4.mp4',
765 |         'algo4-task5-perturbation5': 'videos/sim_videos/Threading/BCT/5.mp4',
766 | 
767 |         //diff
768 | 
769 |         'algo5-task1-perturbation1': 'videos/sim_videos/Lift/Diff/1.mp4',
770 |         'algo5-task1-perturbation2': 'videos/sim_videos/Lift/Diff/2.mp4',
771 |         'algo5-task1-perturbation3': 'videos/sim_videos/Lift/Diff/3.mp4',
772 |         'algo5-task1-perturbation4': 'videos/sim_videos/Lift/Diff/4.mp4',
773 |         'algo5-task1-perturbation5': 'videos/sim_videos/Lift/Diff/5.mp4',
774 |         'algo5-task1-perturbation6': 'videos/sim_videos/Lift/Diff/6.mp4',
775 | 
776 |         'algo5-task2-perturbation1': 'videos/sim_videos/Can/Diff/1.mp4',
777 |         'algo5-task2-perturbation2': 'videos/sim_videos/Can/Diff/2.mp4',
778 |         'algo5-task2-perturbation3': 'videos/sim_videos/Can/Diff/3.mp4',
779 |         'algo5-task2-perturbation4': 'videos/sim_videos/Can/Diff/4.mp4',
780 | 
781 |         'algo5-task3-perturbation1': 'videos/sim_videos/Square/Diff/1.mp4',
782 |         'algo5-task3-perturbation4': 'videos/sim_videos/Square/Diff/2.mp4',
783 |         'algo5-task3-perturbation5': 'videos/sim_videos/Square/Diff/3.mp4',
784 |         'algo5-task3-perturbation6': 'videos/sim_videos/Square/Diff/4.mp4',
785 |         'algo5-task3-perturbation7': 'videos/sim_videos/Square/Diff/5.mp4',
786 | 
787 |         'algo5-task4-perturbation1': 'videos/sim_videos/Stack/Diff/1.mp4',
788 |         'algo5-task4-perturbation2': 'videos/sim_videos/Stack/Diff/2.mp4',
789 |         'algo5-task4-perturbation3': 'videos/sim_videos/Stack/Diff/3.mp4',
790 |         'algo5-task4-perturbation4': 'videos/sim_videos/Stack/Diff/4.mp4',
791 |         'algo5-task4-perturbation5': 'videos/sim_videos/Stack/Diff/5.mp4',
792 |         'algo5-task4-perturbation6': 'videos/sim_videos/Stack/Diff/6.mp4',
793 | 
794 |         'algo5-task5-perturbation1': 'videos/sim_videos/Threading/Diff/1.mp4',
795 |         'algo5-task5-perturbation2': 'videos/sim_videos/Threading/Diff/2.mp4',
796 |         'algo5-task5-perturbation3': 'videos/sim_videos/Threading/Diff/3.mp4',
797 |         'algo5-task5-perturbation4': 'videos/sim_videos/Threading/Diff/4.mp4',
798 |         'algo5-task5-perturbation5': 'videos/sim_videos/Threading/Diff/5.mp4',
799 |     };
800 |     // 5) Update the "mainVideo" source whenever any dropdown changes
801 |     function updateVideo() {
802 |       const selectedAlgo = algorithmSelect.value;     // e.g. 'algo1'
803 |       const selectedTask = taskSelect.value;          // e.g. 'task2'
804 |       const selectedPert = perturbationSelect.value;  // e.g. 'perturbation1'
805 | 
806 |       // Construct a key and see if we have a specific match
807 |       const videoKey = `${selectedAlgo}-${selectedTask}-${selectedPert}`;
808 |       const videoUrl = videoMapping[videoKey] || 'videos/sim_videos/Lift/BC/default.mp4';
809 |       
810 | 
811 |       // Update the video source and play
812 |       selectionVideo.src = videoUrl;
813 |       selectionVideo.load();
814 |       selectionVideo.play();
815 |     }
816 | 
817 |     // 6) Event listeners for the dropdowns
818 |     algorithmSelect.addEventListener('change', updateVideo);
819 |     taskSelect.addEventListener('change', () => {
820 |       // Whenever the user picks a new Task, rebuild the Perturbation dropdown
821 |       updatePerturbationDropdown(taskSelect.value);
822 |       // Then optionally update the video
823 |       updateVideo();
824 |     });
825 |     perturbationSelect.addEventListener('change', updateVideo);
826 | 
827 |     // 7) On page load, initialize the Perturbation dropdown for the default Task
828 |     //    and set the video accordingly.
829 |     window.addEventListener('DOMContentLoaded', () => {
830 |       updatePerturbationDropdown(taskSelect.value);
831 |       updateVideo();
832 |     });
833 |   </script>
834 | </body>
835 | </html>
836 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Core dependencies
 2 | numpy
 3 | torch
 4 | h5py
 5 | opencv-python
 6 | gym
 7 | matplotlib
 8 | imageio
 9 | pillow
10 | scipy
11 | stable-baselines3
12 | 
13 | # Utilities for image and video processing
14 | opencv-python>=4.5
15 | Pillow>=9.0.0
16 | 
17 | 
18 | # If you use or plan to use MuJoCo or robosuite directly, include it:
19 | # robosuite>=1.3.0  # (only if needed)
20 | 
21 | 
22 | # robomimic==0.3.0
23 | # stable-baselines3==1.7.0
24 | # torch==1.12.0
25 | # gym==0.21.0
26 | # opencv-python==4.5.5.64
27 | # Pillow==9.3.0
28 | # numpy==1.22.4
29 | 
30 | 


--------------------------------------------------------------------------------
/scripts/convert_pairwise.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import numpy as np
  4 | import pandas as pd
  5 | from itertools import combinations
  6 | 
  7 | def main():
  8 |     parser = argparse.ArgumentParser(description="Create a pairwise (success - failure) comparison dataset from demos, given a single root directory.")
  9 |     
 10 |     # Only one argument: root directory
 11 |     parser.add_argument(
 12 |         "--root",
 13 |         type=str,
 14 |         required=True,
 15 |         help="Root directory that must contain success_rate.txt, actions.txt, and a 'demo' folder."
 16 |     )
 17 |     
 18 |     args = parser.parse_args()
 19 |     
 20 |     # Let the user know what is expected inside root
 21 |     print("Note: This script expects the following inside the provided root directory:")
 22 |     print("  1) success_rate.txt")
 23 |     print("  2) actions.txt")
 24 |     print("  3) A folder named 'demo' containing demonstration subfolders.")
 25 |     print("  4) The output CSV will be saved to output_pairwise_dataset.csv under the same root.")
 26 |     
 27 |     # Construct the expected full paths
 28 |     success_path = os.path.join(args.root, "success_rate.txt")
 29 |     action_path = os.path.join(args.root, "actions.txt")
 30 |     demo_folder_path = os.path.join(args.root, "demo")
 31 |     dataset_output_path = os.path.join(args.root, "output_pairwise_dataset.csv")
 32 |     
 33 |     # Verify files/folders
 34 |     if not os.path.isfile(success_path):
 35 |         print(f"File not found: {success_path}")
 36 |         print("Please place 'success_rate.txt' in the root directory.")
 37 |         return
 38 |     
 39 |     if not os.path.isfile(action_path):
 40 |         print(f"File not found: {action_path}")
 41 |         print("Please place 'actions.txt' in the root directory.")
 42 |         return
 43 |     
 44 |     if not os.path.isdir(demo_folder_path):
 45 |         print(f"Folder not found: {demo_folder_path}")
 46 |         print("Please create a 'demo' folder inside the root directory.")
 47 |         return
 48 | 
 49 |     # Load success labels and actions
 50 |     success_labels = np.loadtxt(success_path).astype(int)
 51 |     actions = np.loadtxt(action_path).astype(int)
 52 | 
 53 |     # Check demo subfolders
 54 |     demo_folders = sorted(folder for folder in os.listdir(demo_folder_path) if folder.startswith("demo"))
 55 |     num_demos = len(demo_folders)
 56 |     
 57 |     # Validate dataset consistency
 58 |     if len(success_labels) != num_demos or len(actions) != num_demos:
 59 |         print(
 60 |             f"Mismatch between number of demo subfolders ({num_demos}), "
 61 |             f"success labels ({len(success_labels)}) and actions ({len(actions)})."
 62 |         )
 63 |         return
 64 | 
 65 |     # Collect all demos info
 66 |     all_demos = []
 67 |     for i, demo_folder_name in enumerate(demo_folders):
 68 |         demo_full_path = os.path.join(demo_folder_path, demo_folder_name)
 69 |         
 70 |         if not os.path.isdir(demo_full_path):
 71 |             print(f"Expected a folder for demo, but found: {demo_full_path}")
 72 |             return
 73 |         
 74 |         all_demos.append({
 75 |             "demo_id": demo_folder_name,
 76 |             "demo_path": demo_full_path,
 77 |             "actions": actions[i],
 78 |             "success": success_labels[i]
 79 |         })
 80 | 
 81 |     # Create pairwise comparison dataset
 82 |     pairwise_data = []
 83 |     for (demo1, demo2) in combinations(all_demos, 2):
 84 |         # We only compare demos with different success labels
 85 |         if demo1["success"] == demo2["success"]:
 86 |             continue
 87 | 
 88 |         # Label is 1 if demo1 is more successful than demo2, otherwise 0
 89 |         label = 1 if demo1["success"] > demo2["success"] else 0
 90 |         pairwise_data.append({
 91 |             "demo1_id": demo1["demo_id"],
 92 |             "demo1_path": demo1["demo_path"],
 93 |             "demo1_actions": demo1["actions"],
 94 |             "demo1_success": demo1["success"],
 95 |             "demo2_id": demo2["demo_id"],
 96 |             "demo2_path": demo2["demo_path"],
 97 |             "demo2_actions": demo2["actions"],
 98 |             "demo2_success": demo2["success"],
 99 |             "label": label
100 |         })
101 | 
102 |     # Convert the list of dictionaries into a DataFrame
103 |     df = pd.DataFrame(pairwise_data)
104 | 
105 |     # Save to CSV
106 |     df.to_csv(dataset_output_path, index=False)
107 |     print(f"Pairwise comparison dataset created and saved to {dataset_output_path}")
108 | 
109 | if __name__ == "__main__":
110 |     main()
111 | 


--------------------------------------------------------------------------------
/train_continuous.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import h5py
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | # robomimic imports
  8 | import robomimic.utils.file_utils as FileUtils
  9 | import robomimic.utils.torch_utils as TorchUtils
 10 | 
 11 | # stable-baselines3
 12 | from stable_baselines3 import PPO
 13 | from stable_baselines3.common.vec_env import DummyVecEnv
 14 | 
 15 | # local imports
 16 | from env.latent_action_env import LatentActionEnv
 17 | from utils.losses import find_closest_value
 18 | 
 19 | def train_ppo(run_name, task_name, agent_path, rl_timesteps):
 20 |     """
 21 |     Main function to:
 22 |      1. Load Robomimic checkpoint + environment
 23 |      2. Load known embeddings (HDF5)
 24 |      3. Construct LatentActionEnv
 25 |      4. Train PPO on top of latent actions
 26 |     """
 27 | 
 28 |     device = TorchUtils.get_torch_device(try_to_use_cuda=True)
 29 |     print(f"Running PPO for {run_name}, task={task_name}, device={device}")
 30 | 
 31 |     # (A) Load robomimic policy
 32 |     policy, ckpt_dict = FileUtils.policy_from_checkpoint(
 33 |         ckpt_path=agent_path,
 34 |         device=device,
 35 |         verbose=True
 36 |     )
 37 |     config, _ = FileUtils.config_from_checkpoint(ckpt_dict=ckpt_dict)
 38 |     horizon = config.experiment.rollout.horizon
 39 |     print(f"Horizon from config: {horizon}")
 40 | 
 41 |     # (B) Create environment from checkpoint
 42 |     env, _ = FileUtils.env_from_checkpoint(
 43 |         ckpt_dict=ckpt_dict,
 44 |         env_name=None,
 45 |         render=False,
 46 |         render_offscreen=False,
 47 |         verbose=True
 48 |     )
 49 | 
 50 |     # (C) Load known embeddings
 51 |     embed_file = os.path.join(run_name, "known_embed_train.h5")
 52 |     if not os.path.isfile(embed_file):
 53 |         raise FileNotFoundError(f"Embedding file not found: {embed_file}")
 54 | 
 55 |     with h5py.File(embed_file, "r") as f:
 56 |         embeddings_array = np.array(f["embeddings"])
 57 |         values_array = np.array(f["values"])
 58 | 
 59 |     # (D) Create logs folder
 60 |     log_dir = os.path.join(run_name, "latent_ppo_logs")
 61 |     os.makedirs(log_dir, exist_ok=True)
 62 | 
 63 |     def make_env():
 64 |         return LatentActionEnv(
 65 |             env=env,
 66 |             policy=policy,
 67 |             horizon=horizon,
 68 |             embeddings_array=embeddings_array,
 69 |             values_array=values_array,
 70 |             log_dir=log_dir,
 71 |             task_name=task_name
 72 |         )
 73 | 
 74 |     vec_env = DummyVecEnv([make_env])
 75 | 
 76 |         # (F) Create and train PPO
 77 |     ppo_model = PPO(
 78 |         "CnnPolicy",
 79 |         vec_env,
 80 |         verbose=1,
 81 |         n_steps=10,
 82 |         batch_size=128,
 83 |         learning_rate=1e-5,
 84 |         ent_coef=0.4,
 85 |         clip_range=0.2,
 86 |         gamma=0.99,
 87 |         gae_lambda=0.95,
 88 |         max_grad_norm=0.5
 89 |     )
 90 |     print(f"Training PPO for {rl_timesteps} timesteps...")
 91 |     ppo_model.learn(total_timesteps=rl_timesteps)
 92 | 
 93 |     # (G) Save PPO model
 94 |     out_name = f"{run_name}/PPO_latent_{rl_timesteps}.zip"
 95 |     ppo_model.save(out_name)
 96 |     print(f"PPO model saved to: {out_name}")
 97 | 
 98 | 
 99 | def main():
100 |     parser = argparse.ArgumentParser(description="Train PPO with latent actions on a robomimic env.")
101 |     parser.add_argument("--name", "-n", type=str, required=True,
102 |                         help="Name of the run/folder (e.g. 'BC_can')")
103 |     parser.add_argument("--task", "-t", type=str, required=True,
104 |                         help="Task name (e.g. 'can', 'lift', etc.)")
105 |     parser.add_argument("--agent", "-a", type=str, required=True,
106 |                         help="Path to the robomimic agent checkpoint.")
107 |     parser.add_argument("--rl_timesteps", "-r", type=int, default=3000,
108 |                         help="Number of PPO training timesteps (default=3000)")
109 |     args = parser.parse_args()
110 | 
111 |     train_ppo(
112 |         run_name=args.name,
113 |         task_name=args.task,
114 |         agent_path=args.agent,
115 |         rl_timesteps=args.rl_timesteps
116 |     )
117 | 
118 | 
119 | if __name__ == "__main__":
120 |     main()
121 | 


--------------------------------------------------------------------------------
/train_discrete.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import torch
  4 | from stable_baselines3 import PPO
  5 | from stable_baselines3.common.vec_env import DummyVecEnv
  6 | 
  7 | # robomimic imports
  8 | import robomimic.utils.file_utils as FileUtils
  9 | import robomimic.utils.torch_utils as TorchUtils
 10 | 
 11 | # Local import of our environment
 12 | from env.can_env import CanEnv
 13 | from env.lift_env import LiftEnv
 14 | from env.thread_env import ThreadEnv
 15 | from env.square_env import SquareEnv
 16 | from env.stack_env import StackEnv
 17 | 
 18 | TASK_ENVS = {
 19 |     "can": CanEnv,
 20 |     "lift": LiftEnv,
 21 |     "thread": ThreadEnv,
 22 |     "square": SquareEnv,
 23 |     "stack": StackEnv,
 24 | }
 25 | 
 26 | 
 27 | def main(args):
 28 |     # Prepare directories
 29 |     log_dir = f"{args.task_name}_logs"
 30 |     data_dir = f"{args.task_name}_rl_data"
 31 | 
 32 |     # Create required directories
 33 |     os.makedirs(log_dir, exist_ok=True)
 34 |     os.makedirs(os.path.join(log_dir, args.save_path), exist_ok=True)
 35 | 
 36 |     if args.collect_data:
 37 |         os.makedirs(data_dir, exist_ok=True)
 38 |         os.makedirs(os.path.join(data_dir, args.save_path), exist_ok=True)
 39 | 
 40 |     # Print arguments for clarity
 41 |     print(f"Agent Path: {args.agent_path}")
 42 |     print(f"Video Record: {args.video_record}")
 43 |     print(f"RL Update Step: {args.rl_update_step}")
 44 |     print(f"RL Timesteps: {args.rl_timesteps}")
 45 |     print(f"Collect Data: {args.collect_data}")
 46 |     print(f"Render: {args.render}")
 47 | 
 48 |     # Load the policy checkpoint
 49 |     ckpt_path = args.agent_path
 50 |     device = TorchUtils.get_torch_device(try_to_use_cuda=True)
 51 |     policy, ckpt_dict = FileUtils.policy_from_checkpoint(ckpt_path=ckpt_path, device=device, verbose=True)
 52 | 
 53 |     # Determine horizon (if not specified, read from config)
 54 |     config, _ = FileUtils.config_from_checkpoint(ckpt_dict=ckpt_dict)
 55 |     rollout_horizon = config.experiment.rollout.horizon
 56 | 
 57 |     # Create environment from the checkpoint
 58 |     env, _ = FileUtils.env_from_checkpoint(
 59 |         ckpt_dict=ckpt_dict, 
 60 |         env_name=None,
 61 |         render=args.render,
 62 |         render_offscreen=False,  # If you need offscreen rendering set to True
 63 |         verbose=True
 64 |     )
 65 | 
 66 |     # Instantiate our custom environment
 67 |     env_class = TASK_ENVS.get(args.task_name)
 68 |     if env_class is None:
 69 |         raise ValueError(f"Unknown task name: {args.task_name}")
 70 | 
 71 |     def make_rl_env():
 72 |         return env_class(
 73 |             env=env,
 74 |             policy=policy,
 75 |             rollout_horizon=rollout_horizon,
 76 |             video_record=args.video_record,
 77 |             collect_data=args.collect_data,
 78 |             save_path=args.save_path,
 79 |             device=device
 80 |         )
 81 | 
 82 |     # Create a VecEnv for stable-baselines
 83 |     vec_env = DummyVecEnv([make_rl_env])
 84 | 
 85 |     # Create the PPO model
 86 |     ppo_model = PPO("CnnPolicy", vec_env, verbose=1, n_steps=args.rl_update_step)
 87 |     ppo_model.learn(total_timesteps=args.rl_timesteps)
 88 | 
 89 |     # Save the trained model
 90 |     os.makedirs("trained_rl_models", exist_ok=True)
 91 |     save_model_path = f"trained_rl_models/{args.save_path}_ppo_model_{args.rl_timesteps}"
 92 |     ppo_model.save(save_model_path)
 93 |     print(f"Training completed. Model saved to {save_model_path}")
 94 | 
 95 |     # Example: get action log probabilities
 96 |     observation = vec_env.reset()
 97 |     observation = torch.tensor(observation).float().to(ppo_model.device)
 98 |     with torch.no_grad():
 99 |         dist = ppo_model.policy.get_distribution(observation)
100 |     n_actions = vec_env.action_space.n
101 |     all_actions = torch.arange(n_actions).to(ppo_model.device)
102 |     log_probs = dist.log_prob(all_actions)
103 |     print("Log Probabilities of All Actions:", log_probs)
104 | 
105 |     if args.save_logs:
106 |         log_file = os.path.join(log_dir, args.save_path, "log_prob.txt")
107 |         with open(log_file, "a") as file:
108 |             file.write(str(log_probs) + "\n")
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     parser = argparse.ArgumentParser(description="Train RL agent to find failure modes for robosuite task.")
113 | 
114 |     parser.add_argument("--task_name", type=str, default="can", help="Name of the task/environment. Used to form directory names (e.g., can_logs, can_rl_data).")
115 |     parser.add_argument("--agent_path", type=str, required=True, help="Path to load the agent checkpoint (.pt file).")
116 |     parser.add_argument("--rl_timesteps", type=int, default=300, help="Number of training timesteps (default: 300)")
117 |     parser.add_argument("--rl_update_step", type=int, default=300, help="Number of steps per PPO update (default: 300)")
118 |     parser.add_argument("--video_record", action="store_true", help="Record training video if set.")
119 |     parser.add_argument("--render", action="store_true", help="Render rollout if set.")
120 |     parser.add_argument("--collect_data", action="store_true", help="If set, collect image data during rollouts.")
121 |     parser.add_argument("--save_path", type=str, default="default_run", help="Folder name to save logs and data.")
122 |     parser.add_argument("--save_logs", action="store_true", help="Save logs.")
123 | 
124 |     args = parser.parse_args()
125 |     main(args)
126 | 


--------------------------------------------------------------------------------
/train_embedding.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import torch
  4 | import h5py
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | 
  8 | from configs.action_dicts import ACTION_DICTS
  9 | from utils.robot_dataset import RobotDataset
 10 | from utils.vit_clip_model import ViTCLIPModel
 11 | from utils.losses import contrastive_loss, cosine_similarity_manual
 12 | 
 13 | from torch.utils.data import DataLoader, Subset
 14 | from torchvision import transforms
 15 | from torch.amp import autocast, GradScaler
 16 | import torch.nn as nn
 17 | import torch.nn.functional as F
 18 | from PIL import Image
 19 | 
 20 | 
 21 | 
 22 | def train_embed(args):
 23 |     # 1) Select correct action dictionary
 24 |     if args.task not in ACTION_DICTS:
 25 |         raise ValueError(f"Unknown task: {args.task}")
 26 |     action_description = ACTION_DICTS[args.task]
 27 | 
 28 |     description_to_index = {v: k for k, v in action_description.items()}
 29 | 
 30 |     def extract_embedding(model, folder_path, action, device):
 31 |         model.eval()
 32 | 
 33 |         # --- 1. Collect and transform all images from the folder ---
 34 |         transform = transforms.Compose([
 35 |             transforms.Resize((224, 224)),  # or whatever your model expects
 36 |             transforms.ToTensor(),
 37 |             transforms.Normalize(mean=[0.485, 0.456, 0.406],
 38 |                                 std=[0.229, 0.224, 0.225]),
 39 |         ])
 40 | 
 41 |         # Collect all image files (PNG, JPG, etc.)
 42 |         image_files = sorted(
 43 |             f for f in os.listdir(folder_path)
 44 |             if f.lower().endswith(".png") or f.lower().endswith(".jpg")
 45 |         )
 46 |         if not image_files:
 47 |             # If no images found, return an empty or zero tensor
 48 |             return torch.zeros(model.fc1.in_features, device=device)
 49 | 
 50 |         # Load & transform each image into a list of tensors
 51 |         image_tensors = []
 52 |         for img_file in image_files:
 53 |             img_path = os.path.join(folder_path, img_file)
 54 |             image = Image.open(img_path).convert("RGB")
 55 |             image_tensors.append(transform(image))
 56 |         
 57 |         # Stack => shape [seq_len, 3, H, W]
 58 |         images_tensor = torch.stack(image_tensors, dim=0)
 59 | 
 60 |         # Add a batch dimension => shape [1, seq_len, 3, H, W]
 61 |         images_tensor = images_tensor.unsqueeze(0).to(device)
 62 | 
 63 |         # --- 2. Pass the batch of frames + single action to the model ---
 64 |         with torch.no_grad():
 65 |             # model.get_embedding => shape [1, emb_dim], after averaging frames in encode_images
 66 |             final_embedding = model.get_embedding(images_tensor, [action])
 67 |             # For a single sample, remove the batch dimension
 68 |             final_embedding = final_embedding.squeeze(0)  # shape [emb_dim]
 69 |         
 70 |         return final_embedding.cpu()
 71 | 
 72 | 
 73 | 
 74 |     transform_img = transforms.Compose([
 75 |     transforms.Resize((224, 224)),
 76 |     transforms.ToTensor(),
 77 |     transforms.Normalize(
 78 |         mean=[0.485, 0.456, 0.406],
 79 |         std =[0.229, 0.224, 0.225],
 80 |         )
 81 |     ])
 82 |     actions_file = os.path.join(args.path, "actions.txt")
 83 |     success_file = os.path.join(args.path, "success_rate.txt")
 84 |     image_folder = os.path.join(args.path, "demo")
 85 | 
 86 |     dataset = RobotDataset(
 87 |         actions_file,
 88 |         success_file,
 89 |         image_folder,
 90 |         action_description,   # pass in the dictionary
 91 |         transform_img
 92 |     )
 93 |     
 94 |     # Subset if desired
 95 |     subset_indices = range(200)
 96 |     train_subset = Subset(dataset, subset_indices)
 97 | 
 98 |     dataloader = DataLoader(
 99 |         train_subset,
100 |         batch_size=8,
101 |         shuffle=True,
102 |         num_workers=8,
103 |         pin_memory=True
104 |     )
105 | 
106 |         # Subset if desired
107 |     subset_indices = range(200)
108 |     train_subset = Subset(dataset, subset_indices)
109 | 
110 |     dataloader = DataLoader(
111 |         train_subset,
112 |         batch_size=8,
113 |         shuffle=True,
114 |         num_workers=8,
115 |         pin_memory=True
116 |     )
117 | 
118 |     # 3) Model, optimizer, etc.
119 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
120 |     model = ViTCLIPModel(checkpoint_gradient=args.checkpoint_gradient).to(device)
121 |     criterion = nn.BCEWithLogitsLoss()
122 |     optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
123 |     scaler = GradScaler()
124 | 
125 |     print('training')
126 | 
127 |     # 4) Training loop
128 |     for epoch in range(args.epochs):
129 |         model.train()
130 |         total_loss = 0.0
131 |         for batch in dataloader:
132 |             actions_text = batch["action"]
133 |             success = batch["success"].float().to(device)
134 |             images = batch["images"].to(device)
135 | 
136 |             action_labels = torch.tensor(
137 |                 [description_to_index[a] for a in actions_text],
138 |                 dtype=torch.long,
139 |                 device=device
140 |             )
141 | 
142 |             optimizer.zero_grad()
143 | 
144 |             with autocast(dtype=torch.float16, device_type='cuda'):
145 |                 # classification
146 |                 logits = model(images, actions_text)
147 |                 loss_class = criterion(logits, success)
148 | 
149 |                 # contrastive
150 |                 embeddings = model.get_embedding(images, actions_text)
151 |                 # Convert text -> label indices if needed
152 |                 # ...
153 |                 loss_contra = contrastive_loss(embeddings, action_labels)
154 | 
155 |                 loss = loss_class + loss_contra
156 | 
157 |             scaler.scale(loss).backward()
158 |             scaler.step(optimizer)
159 |             scaler.update()
160 | 
161 |             total_loss += loss.item()
162 | 
163 |         print(f"[Epoch {epoch+1}/{args.epochs}] Loss: {total_loss:.4f}")
164 |     print('training complete')
165 |     save_path = "embedding_model.pth"  # Current directory
166 |     torch.save({
167 |         'model_state_dict': model.state_dict(),
168 |         'optimizer_state_dict': optimizer.state_dict(),
169 |         'epoch': args.epochs,
170 |         'loss': total_loss,
171 |     }, save_path)
172 |     print(f"Model saved to {save_path}")
173 | 
174 | 
175 |     
176 |     # Extract embeddings for all demos in the folder
177 |     embeddings_list = []
178 |     values_list = []
179 | 
180 |     folder_path = args.path + "demo"
181 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
182 | 
183 |     for folder_name in os.listdir(folder_path)[:15]:
184 |         print(folder_name)
185 |         sub_folder = os.path.join(folder_path, folder_name)
186 |         
187 |         if not os.path.isdir(sub_folder):
188 |             continue
189 |         
190 |         action_name = folder_name.split("_")[-1]  # e.g., "3"
191 |         
192 |         action = action_description[int(action_name)]
193 |     
194 |         embedding = extract_embedding(model, sub_folder, action, device)
195 |         embedding_array = embedding.numpy()
196 |         embeddings_list.append(embedding_array)
197 |         values_list.append(int(action_name))
198 |     
199 |     
200 |     embeddings_array = np.array(embeddings_list)
201 |     values_array = np.array(values_list)
202 | 
203 | 
204 |     # Suppose embeddings_array.shape = (N, D) and values_array.shape = (N,)
205 |     with h5py.File("known_embeddings.h5", "w") as f:
206 |         f.create_dataset("embeddings", data=embeddings_array)
207 |         f.create_dataset("values", data=values_array)
208 | 
209 | 
210 |     print(f'saved embeddings')
211 | 
212 | if __name__ == "__main__":
213 |     parser = argparse.ArgumentParser(description="Train embeddings for a given robosuite-based task.")
214 |     parser.add_argument("--task", type=str, required=True, help="Task name, e.g. 'can', 'square', etc.")
215 |     parser.add_argument("--path", type=str, required=True, help="Path to dataset.")
216 |     parser.add_argument("--epochs", type=int, default=100, help="Number of epochs.")
217 |     parser.add_argument("--checkpoint_gradient", action="store_true", help="Enable gradient checkpointing.")
218 |     args = parser.parse_args()
219 | 
220 |     train_embed(args)
221 | 


--------------------------------------------------------------------------------
/utils/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | def contrastive_loss(embeddings, labels, margin=1.0):
 5 |     pairwise_dist = torch.cdist(embeddings, embeddings, p=2)
 6 |     same_label_mask = (labels.unsqueeze(1) == labels.unsqueeze(0)).float()
 7 | 
 8 |     positive_loss = same_label_mask * pairwise_dist
 9 |     negative_loss = (1 - same_label_mask) * F.relu(margin - pairwise_dist)
10 | 
11 |     loss = positive_loss.sum() + negative_loss.sum()
12 |     return loss / same_label_mask.numel()
13 | 
14 | def cosine_similarity_manual(vec_a, vec_b):
15 |     import numpy as np
16 |     dot_product = np.dot(vec_a, vec_b)
17 |     norm_a = np.linalg.norm(vec_a)
18 |     norm_b = np.linalg.norm(vec_b)
19 |     sim = dot_product / (norm_a * norm_b + 1e-8)
20 |     return np.clip(sim, -1.0, 1.0)
21 |     
22 | def find_closest_value(new_embedding, embeddings_array, values_array):
23 |     """
24 |     If you want a standalone function for finding the closest embedding.
25 |     This is often used if you do it outside the environment class.
26 |     """
27 |     distances = cdist([new_embedding], embeddings_array, metric="euclidean")[0]
28 |     idx = np.argmin(distances)
29 |     return values_array[idx], distances[idx]
30 | 


--------------------------------------------------------------------------------
/utils/robot_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | from PIL import Image
 5 | import numpy as np
 6 | 
 7 | 
 8 | class RobotDataset(Dataset):
 9 |     """
10 |     Loads:
11 |       - actions.txt (list of action indices)
12 |       - success_rate.txt (list of success flags 0/1)
13 |       - folder of demos (images) for each index: e.g. "demo1_action_0"
14 |     Returns:
15 |       Dictionary with { "action": <str>, "success": <int>, "images": <Tensor> }
16 |     """
17 |     def __init__(self, actions_file, success_file, image_folder, action_dict, transform=None):
18 |         self.actions = np.loadtxt(actions_file, dtype=int)
19 |         self.success = np.loadtxt(success_file).astype(int)
20 |         self.image_folder = image_folder
21 |         self.transform = transform
22 |         self.action_dict = action_dict
23 |         self.max_seq_len = 50
24 | 
25 |         if len(self.actions) != len(self.success):
26 |             raise ValueError("Mismatch between number of actions and success labels.")
27 | 
28 |     def __len__(self):
29 |         return len(self.actions)
30 | 
31 |     def __getitem__(self, idx):
32 |         action_idx = self.actions[idx]
33 |         action_str = self.action_dict[action_idx]  # e.g. "Change the can color to red."
34 |         success_flag = self.success[idx]
35 |         
36 |         # Example subfolder: "demo1_action_0"
37 |         demo_folder = os.path.join(self.image_folder, f"demo{idx + 1}_action_{action_idx}")
38 |         
39 |         # Load all images from that folder
40 |         image_filenames = os.listdir(demo_folder)
41 |         images = []
42 |         for img_name in image_filenames:
43 |             img_path = os.path.join(demo_folder, img_name)
44 |             with Image.open(img_path).convert("RGB") as img:
45 |                 if self.transform:
46 |                     img = self.transform(img)
47 |                 images.append(img)
48 | 
49 |         # We store them in a fixed-size 3D tensor [seq_len, 3, H, W], truncated/padded
50 |         if len(images) == 0:
51 |             # Edge case: if no images exist (shouldn't happen ideally)
52 |             # Return a tensor of zeros or handle gracefully
53 |             padded_images = torch.zeros(self.max_seq_len, 3, 224, 224)
54 |         else:
55 |             # Create a zero-tensor
56 |             padded_images = torch.zeros(self.max_seq_len, *images[0].shape)
57 | 
58 |             seq_len = min(len(images), self.max_seq_len)
59 |             for i in range(seq_len):
60 |                 padded_images[i] = images[i]
61 | 
62 |         return {
63 |             "action": action_str,
64 |             "success": success_flag,
65 |             "images": padded_images,
66 |         }


--------------------------------------------------------------------------------
/utils/vit_clip_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import clip
 4 | from torch.nn.functional import normalize, relu
 5 | from torchvision.models import vit_b_16, ViT_B_16_Weights
 6 | from torch.utils.checkpoint import checkpoint
 7 | from torch.cuda.amp import autocast
 8 | import torch.nn.functional as F
 9 | 
10 | class ViTCLIPModel(nn.Module):
11 |     """
12 |     Combines a ViT image encoder with a CLIP text encoder
13 |     and outputs a binary classification (success vs fail)
14 |     plus an embedding for contrastive learning.
15 |     """
16 |     def __init__(self, clip_model_name="ViT-B/32", latent_dim=512, checkpoint_gradient=False):
17 |         super(ViTCLIPModel, self).__init__()
18 | 
19 |         # Vision Transformer for images
20 |         self.image_encoder = vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_V1)
21 |         # Replace final classifier layer with a trainable linear layer
22 |         in_features = self.image_encoder.heads.head.in_features
23 |         self.image_encoder.heads.head = nn.Linear(in_features, latent_dim)
24 | 
25 |         # CLIP model for text
26 |         self.clip_model, _ = clip.load(clip_model_name)
27 |         self.clip_model = self.clip_model.float()
28 |         for param in self.clip_model.parameters():
29 |             param.requires_grad = True  # Fine-tune text encoder?
30 | 
31 |         # Optionally enable gradient checkpointing
32 |         self.checkpoint_gradient = checkpoint_gradient
33 | 
34 |         # Fusion and classification layers
35 |         self.fc1 = nn.Linear(latent_dim * 2, 512)
36 |         self.fc2 = nn.Linear(512, 1)
37 | 
38 |     def encode_images(self, image_sequence):
39 |         """
40 |         image_sequence: [batch_size, seq_len, 3, H, W]
41 |         """
42 |         batch_size, seq_len, c, h, w = image_sequence.size()
43 |         # Flatten batch & seq together
44 |         x = image_sequence.view(batch_size * seq_len, c, h, w)
45 |         features = self.image_encoder(x)  # [batch_size*seq_len, latent_dim]
46 |         # Reshape back and average over seq_len dimension
47 |         features = features.view(batch_size, seq_len, -1).mean(dim=1)
48 |         return features
49 | 
50 |     def forward(self, images, actions):
51 |         """
52 |         images:  [batch_size, seq_len, 3, H, W]
53 |         actions: list of strings
54 |         Returns:
55 |           logit: shape [batch_size]
56 |         """
57 |         # -- encode images --
58 |         if self.checkpoint_gradient:
59 |             image_features = checkpoint(self.encode_images, images)
60 |         else:
61 |             image_features = self.encode_images(images)
62 | 
63 |         # -- encode text (CLIP) --
64 |         text_tokens = clip.tokenize(actions).to(images.device)
65 |         if self.checkpoint_gradient:
66 |             text_features = checkpoint(self.clip_model.encode_text, text_tokens)
67 |         else:
68 |             text_features = self.clip_model.encode_text(text_tokens)
69 | 
70 |         # Combine them
71 |         combined = torch.cat((image_features, text_features), dim=1)
72 |         combined = F.normalize(combined, p=2, dim=1)
73 | 
74 |         x = torch.relu(self.fc1(combined))
75 |         logit = self.fc2(x).squeeze(1)  # [batch_size]
76 |         return logit
77 | 
78 |     def get_embedding(self, images, actions):
79 |         """
80 |         Return an embedding (post-fc1) used for contrastive learning.
81 |         """
82 |         with autocast(dtype=torch.float16):
83 |             # Encode images
84 |             image_features = self.encode_images(images)
85 | 
86 |             # Encode text
87 |             text_tokens = clip.tokenize(actions).to(images.device)
88 |             text_features = self.clip_model.encode_text(text_tokens)
89 | 
90 |             combined = torch.cat((image_features, text_features), dim=1)
91 |             combined = F.normalize(combined, p=2, dim=1)
92 |             final_embedding = torch.relu(self.fc1(combined))
93 |         return final_embedding
94 | 
95 | 


--------------------------------------------------------------------------------
/videos/RSS.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/RSS.mp4


--------------------------------------------------------------------------------
/videos/real_world/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/1.mp4


--------------------------------------------------------------------------------
/videos/real_world/10.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/10.mp4


--------------------------------------------------------------------------------
/videos/real_world/11.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/11.mp4


--------------------------------------------------------------------------------
/videos/real_world/12.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/12.mp4


--------------------------------------------------------------------------------
/videos/real_world/13.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/13.mp4


--------------------------------------------------------------------------------
/videos/real_world/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/2.mp4


--------------------------------------------------------------------------------
/videos/real_world/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/3.mp4


--------------------------------------------------------------------------------
/videos/real_world/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/4.mp4


--------------------------------------------------------------------------------
/videos/real_world/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/5.mp4


--------------------------------------------------------------------------------
/videos/real_world/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/6.mp4


--------------------------------------------------------------------------------
/videos/real_world/7.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/7.mp4


--------------------------------------------------------------------------------
/videos/real_world/8.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/8.mp4


--------------------------------------------------------------------------------
/videos/real_world/9.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/real_world/9.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BC/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BC/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BC/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BC/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BC/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BC/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BC/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BC/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BCQ/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BCQ/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BCQ/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BCQ/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BCQ/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BCQ/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BCQ/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BCQ/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BCT/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BCT/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BCT/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BCT/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BCT/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BCT/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/BCT/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/BCT/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/Diff/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/Diff/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/Diff/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/Diff/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/Diff/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/Diff/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/Diff/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/Diff/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/HBC/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/HBC/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/HBC/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/HBC/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/HBC/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/HBC/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Can/HBC/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Can/HBC/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BC/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BC/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BC/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BC/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BC/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BC/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BC/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BC/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BC/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BC/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BC/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BC/6.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCQ/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCQ/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCQ/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCQ/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCQ/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCQ/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCQ/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCQ/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCQ/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCQ/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCQ/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCQ/6.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCT/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCT/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCT/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCT/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCT/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCT/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCT/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCT/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCT/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCT/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/BCT/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/BCT/6.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/Diff/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/Diff/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/Diff/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/Diff/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/Diff/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/Diff/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/Diff/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/Diff/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/Diff/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/Diff/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/Diff/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/Diff/6.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/HBC/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/HBC/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/HBC/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/HBC/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/HBC/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/HBC/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/HBC/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/HBC/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/HBC/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/HBC/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Lift/HBC/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Lift/HBC/6.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BC/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BC/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BC/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BC/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BC/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BC/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BC/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BC/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BC/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BC/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BCQ/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BCQ/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BCQ/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BCQ/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BCQ/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BCQ/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BCQ/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BCQ/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BCQ/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BCQ/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BCT/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BCT/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/BCT/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/BCT/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/Diff/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/Diff/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/Diff/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/Diff/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/Diff/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/Diff/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/Diff/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/Diff/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/Diff/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/Diff/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/HBC/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/HBC/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/HBC/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/HBC/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/HBC/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/HBC/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/HBC/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/HBC/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Square/HBC/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Square/HBC/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BC/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BC/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BC/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BC/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BC/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BC/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BC/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BC/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BC/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BC/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BC/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BC/6.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCQ/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCQ/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCQ/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCQ/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCQ/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCQ/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCQ/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCQ/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCQ/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCQ/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCQ/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCQ/6.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCT/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCT/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCT/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCT/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCT/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCT/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCT/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCT/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCT/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCT/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/BCT/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/BCT/6.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/Diff/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/Diff/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/Diff/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/Diff/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/Diff/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/Diff/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/Diff/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/Diff/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/Diff/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/Diff/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/Diff/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/Diff/6.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/HBC/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/HBC/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/HBC/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/HBC/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/HBC/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/HBC/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/HBC/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/HBC/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/HBC/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/HBC/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Stack/HBC/6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Stack/HBC/6.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BC/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BC/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BC/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BC/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BC/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BC/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BC/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BC/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BC/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BC/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BCQ/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BCQ/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BCQ/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BCQ/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BCQ/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BCQ/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BCQ/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BCQ/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BCQ/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BCQ/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BCT/1.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BCT/1.avi


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/BCT/5.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/BCT/5.avi


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/Diff/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/Diff/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/Diff/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/Diff/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/Diff/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/Diff/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/Diff/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/Diff/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/Diff/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/Diff/5.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/HBC/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/HBC/1.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/HBC/2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/HBC/2.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/HBC/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/HBC/3.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/HBC/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/HBC/4.mp4


--------------------------------------------------------------------------------
/videos/sim_videos/Thread/HBC/5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/somsagar07/RoboMD/350671a643fcd0871d3989757e3656447a501c02/videos/sim_videos/Thread/HBC/5.mp4


--------------------------------------------------------------------------------