├── experiments ├── 4_so101 │ └── .gitkeep ├── 5_widowx │ ├── .gitkeep │ ├── requirements.txt │ ├── README.md │ ├── widowx_env.py │ └── eval_widowx.py ├── 6_agibot │ └── .gitkeep ├── 7_franka │ ├── .gitkeep │ ├── requirements.txt │ ├── README.md │ ├── realsense_camera.py │ └── eval_franka.py ├── 8_vllmeval │ ├── vlm │ │ ├── __init__.py │ │ └── prompt.py │ ├── run.sh │ ├── dataset-config.json │ ├── dataset │ │ ├── eobench.py │ │ └── erqa.py │ └── README.md ├── 3_simpler │ ├── data-bridge.yaml │ ├── data-fractal.yaml │ ├── simpler_env │ │ ├── eval_simpler.sh │ │ └── main_inference.py │ ├── train_bridge.sh │ ├── train_fractal.sh │ └── README.md ├── 9_pretraining │ ├── zero1.json │ ├── zero3.json │ ├── launch_pretrain.sh │ └── README.md ├── 2_libero │ ├── data-libero.yaml │ ├── train.sh │ ├── README.md │ └── eval_libero.py └── 1_demo │ ├── train.sh │ ├── data-demo.yaml │ └── README.md ├── scripts ├── inference_service.py ├── chat_template.json ├── eval_policy.py └── train.py ├── tests ├── test_dataset.py └── test_vlm.py ├── .assets ├── logo.png ├── embodiments.png ├── merged_grid.gif ├── data_example.png └── openloop_example.png ├── demo_data ├── example1.jpg ├── example2.png └── refcoco │ ├── images │ ├── COCO_train2014_000000168643_2.jpg │ ├── COCO_train2014_000000263111_0.jpg │ ├── COCO_train2014_000000579299_4.jpg │ ├── COCO_train2014_000000580905_2.jpg │ ├── COCO_train2014_000000580957_2.jpg │ └── COCO_train2014_000000567396_13.jpg │ └── refcoco.jsonl ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── feature-request.yml │ └── bug-report.yml ├── PULL_REQUEST_TEMPLATE.md ├── workflows │ ├── security.yml │ ├── quality.yml │ └── release.yml └── test.yml ├── getting_started ├── 3_eval_deploy.ipynb └── 4_advanced_pretrain.ipynb ├── .gitmodules ├── eo ├── constants.py ├── data │ ├── schema.py │ ├── transforms.py │ └── multim_dataset.py ├── model │ └── configuration_eo1.py └── train │ ├── pipeline_config.py │ └── train_utils.py ├── CITATION.cff ├── docker └── Dockerfile ├── .pre-commit-config.yaml ├── pyproject.toml ├── tools ├── openloop.py └── group_length.py └── .gitignore /experiments/4_so101/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/inference_service.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/5_widowx/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/6_agibot/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/7_franka/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | # test datasets here 2 | -------------------------------------------------------------------------------- /experiments/5_widowx/requirements.txt: -------------------------------------------------------------------------------- 1 | gym 2 | funcsigs 3 | numpy==1.24.3 4 | -------------------------------------------------------------------------------- /.assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/.assets/logo.png -------------------------------------------------------------------------------- /.assets/embodiments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/.assets/embodiments.png -------------------------------------------------------------------------------- /.assets/merged_grid.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/.assets/merged_grid.gif -------------------------------------------------------------------------------- /demo_data/example1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/demo_data/example1.jpg -------------------------------------------------------------------------------- /demo_data/example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/demo_data/example2.png -------------------------------------------------------------------------------- /.assets/data_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/.assets/data_example.png -------------------------------------------------------------------------------- /.assets/openloop_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/.assets/openloop_example.png -------------------------------------------------------------------------------- /experiments/7_franka/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | transformers>=4.56.0 3 | opencv-python 4 | imageio 5 | tyro 6 | pillow 7 | pyrealsense2 8 | lerobot 9 | -------------------------------------------------------------------------------- /experiments/8_vllmeval/vlm/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import EO1VisionFlowMatchingChat, EO1VisionFlowMatchingChatAguvis 2 | from .prompt import Qwen2VLPromptMixin 3 | -------------------------------------------------------------------------------- /demo_data/refcoco/images/COCO_train2014_000000168643_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/demo_data/refcoco/images/COCO_train2014_000000168643_2.jpg -------------------------------------------------------------------------------- /demo_data/refcoco/images/COCO_train2014_000000263111_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/demo_data/refcoco/images/COCO_train2014_000000263111_0.jpg -------------------------------------------------------------------------------- /demo_data/refcoco/images/COCO_train2014_000000579299_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/demo_data/refcoco/images/COCO_train2014_000000579299_4.jpg -------------------------------------------------------------------------------- /demo_data/refcoco/images/COCO_train2014_000000580905_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/demo_data/refcoco/images/COCO_train2014_000000580905_2.jpg -------------------------------------------------------------------------------- /demo_data/refcoco/images/COCO_train2014_000000580957_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/demo_data/refcoco/images/COCO_train2014_000000580957_2.jpg -------------------------------------------------------------------------------- /demo_data/refcoco/images/COCO_train2014_000000567396_13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAILAB-IPEC/EO1/HEAD/demo_data/refcoco/images/COCO_train2014_000000567396_13.jpg -------------------------------------------------------------------------------- /experiments/8_vllmeval/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | export GPU=$(nvidia-smi --list-gpus | wc -l) 4 | torchrun --nproc-per-node=$GPU run.py --config dataset-config.json 5 | -------------------------------------------------------------------------------- /experiments/3_simpler/data-bridge.yaml: -------------------------------------------------------------------------------- 1 | mm_datasets: 2 | 3 | lerobot_datasets: 4 | 5 | - repo_id: bridge_orig_lerobot 6 | root: HF_LEROBOT_HOME 7 | select_video_keys: [observation.images.image_0] 8 | -------------------------------------------------------------------------------- /experiments/3_simpler/data-fractal.yaml: -------------------------------------------------------------------------------- 1 | mm_datasets: 2 | 3 | lerobot_datasets: 4 | 5 | - repo_id: fractal20220817_data_lerobot 6 | root: HF_LEROBOT_HOME 7 | select_video_keys: [observation.images.image] 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: "🙋 General Question" 4 | url: https://github.com/EO-Robotics/EO-1/discussions/new/choose 5 | about: Our preferred starting point if you have any questions about the project. 6 | -------------------------------------------------------------------------------- /getting_started/3_eval_deploy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "2fa50ceb", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [] 10 | } 11 | ], 12 | "metadata": { 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "nbformat": 4, 18 | "nbformat_minor": 5 19 | } 20 | -------------------------------------------------------------------------------- /getting_started/4_advanced_pretrain.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "a9616890", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [] 10 | } 11 | ], 12 | "metadata": { 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "nbformat": 4, 18 | "nbformat_minor": 5 19 | } 20 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "experiments/7_franka/deoxys_control"] 2 | path = experiments/7_franka/deoxys_control 3 | url = https://github.com/UT-Austin-RPL/deoxys_control.git 4 | [submodule "experiments/5_widowx/bridge_data_robot"] 5 | path = experiments/5_widowx/bridge_data_robot 6 | url = https://github.com/HaomingSong/bridge_data_robot.git 7 | [submodule "experiments/5_widowx/edgeml"] 8 | path = experiments/5_widowx/edgeml 9 | url = https://github.com/youliangtan/edgeml.git 10 | -------------------------------------------------------------------------------- /experiments/9_pretraining/zero1.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": false 4 | }, 5 | "bf16": { 6 | "enabled": true 7 | }, 8 | "gradient_accumulation_steps": "auto", 9 | "gradient_clipping": "auto", 10 | "steps_per_print": 2000, 11 | "train_batch_size": "auto", 12 | "train_micro_batch_size_per_gpu": "auto", 13 | "wall_clock_breakdown": true, 14 | "zero_optimization": { 15 | "stage": 1, 16 | "allgather_partitions": true, 17 | "allgather_bucket_size": 1e9, 18 | "overlap_comm": true, 19 | "reduce_scatter": true, 20 | "reduce_bucket_size": 1e9, 21 | "contiguous_gradients": true 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /experiments/9_pretraining/zero3.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | "train_micro_batch_size_per_gpu": "auto", 14 | "train_batch_size": "auto", 15 | "gradient_accumulation_steps": "auto", 16 | "zero_optimization": { 17 | "stage": 3, 18 | "overlap_comm": true, 19 | "contiguous_gradients": true, 20 | "sub_group_size": 1e9, 21 | "reduce_bucket_size": "auto", 22 | "stage3_prefetch_bucket_size": "auto", 23 | "stage3_param_persistence_threshold": "auto", 24 | "stage3_max_live_parameters": 1e9, 25 | "stage3_max_reuse_distance": 1e9, 26 | "stage3_gather_16bit_weights_on_model_save": true 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /experiments/3_simpler/simpler_env/eval_simpler.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | dist_tasks=( 4 | bridge.sh 5 | drawer_variant_agg.sh 6 | drawer_visual_matching.sh 7 | move_near_variant_agg.sh 8 | move_near_visual_matching.sh 9 | pick_coke_can_variant_agg.sh 10 | pick_coke_can_visual_matching.sh 11 | put_in_drawer_variant_agg.sh 12 | put_in_drawer_visual_matching.sh 13 | ) 14 | 15 | action_ensemble_temp=4 16 | 17 | ckpt_path=YOUR_CHECKPOINT_PATH 18 | model_name=eo 19 | job_name=simpler 20 | logging_dir=results_${model_name}/${job_name}_ck${action_ensemble_temp} 21 | mkdir -p $logging_dir 22 | 23 | conda activate simpler_env 24 | XDG_RUNTIME_DIR=/usr/lib 25 | LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH} 26 | 27 | for task in ${dist_tasks[@]}; do 28 | bash scripts/$task $ckpt_path $model_name \ 29 | $action_ensemble_temp $logging_dir 30 | done 31 | 32 | python tools/calc_metrics_evaluation_videos.py \ 33 | --log-dir-root $logging_dir 34 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # What does this PR do? 2 | 3 | 12 | 13 | 14 | 15 | Fixes # (issue) 16 | 17 | ## Before submitting 18 | 19 | - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). 20 | - [ ] Was this discussed/approved via a GitHub issue? Please add a link to it if that's the case. 21 | - [ ] Did you make sure to update the documentation with your changes? 22 | - [ ] Did you write any new necessary tests? 23 | -------------------------------------------------------------------------------- /experiments/2_libero/data-libero.yaml: -------------------------------------------------------------------------------- 1 | mm_datasets: 2 | 3 | lerobot_datasets: 4 | 5 | - repo_id: libero_spatial_no_noops_1.0.0_lerobot 6 | root: ./demo_data/ 7 | select_video_keys: [observation.images.image, observation.images.wrist_image] 8 | select_state_keys: [observation.state] 9 | select_action_keys: [action] 10 | 11 | # - repo_id: libero_90_no_noops_lerobot 12 | # root: HF_LEROBOT_HOME 13 | # select_video_keys: [observation.images.image, observation.images.wrist_image] 14 | # select_state_keys: [observation.state] 15 | # select_action_keys: [action] 16 | 17 | # - repo_id: libero_object_no_noops_1.0.0_lerobot 18 | # root: HF_LEROBOT_HOME 19 | # select_video_keys: [observation.images.image, observation.images.wrist_image] 20 | # select_state_keys: [observation.state] 21 | # select_action_keys: [action] 22 | 23 | # - repo_id: libero_10_no_noops_1.0.0_lerobot 24 | # root: HF_LEROBOT_HOME 25 | # select_video_keys: [observation.images.image, observation.images.wrist_image] 26 | # select_state_keys: [observation.state] 27 | # select_action_keys: [action] 28 | -------------------------------------------------------------------------------- /experiments/8_vllmeval/dataset-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "EO1-3B": { 4 | "class": "EO1VisionFlowMatchingChat", 5 | "min_pixels": 50176, 6 | "max_pixels": 100352, 7 | "use_custom_prompt": false, 8 | "model_path": "IPEC-COMMUNITY/EO-1-3B" 9 | } 10 | }, 11 | "data": { 12 | "EOBench": { 13 | "class": "EOBench", 14 | "dataset": "EOBench", 15 | "data_file": "IPEC-COMMUNITY/EO-Bench/benchmark_v1.jsonl", 16 | "data_root": "IPEC-COMMUNITY/EO-Bench" 17 | }, 18 | "ERQABench": { 19 | "class": "ERQABench", 20 | "dataset": "ERQABench", 21 | "data_root": "IPEC-COMMUNITY/ERQABench", 22 | "data_file": "IPEC-COMMUNITY/ERQABench/benchmark_v1.jsonl" 23 | }, 24 | "RoboVQA": { 25 | "class": "RoboVQA", 26 | "dataset": "RoboVQA", 27 | "data_root": "IPEC-COMMUNITY/RoboVQA", 28 | "data_file": "IPEC-COMMUNITY/RoboVQA/benchmark_v1.jsonl", 29 | "fps": 1 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: "🚀 Feature request" 2 | description: Submit a proposal/request for a new any4lerobot feature 3 | labels: ["Feature request"] 4 | body: 5 | - type: textarea 6 | id: feature-request 7 | validations: 8 | required: true 9 | attributes: 10 | label: Feature request 11 | description: | 12 | A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist. 13 | 14 | - type: textarea 15 | id: motivation 16 | validations: 17 | required: true 18 | attributes: 19 | label: Motivation 20 | description: | 21 | Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too. 22 | 23 | - type: textarea 24 | id: contribution 25 | validations: 26 | required: true 27 | attributes: 28 | label: Your contribution 29 | description: | 30 | Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md) 31 | -------------------------------------------------------------------------------- /scripts/chat_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% set state_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif content['type'] == 'state' or 'state' in content %}{% set state_count.value = state_count.value + 1 %}<|state_start|><|state_pad|><|state_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}{{ noise_prompt }}" 3 | } 4 | -------------------------------------------------------------------------------- /experiments/3_simpler/simpler_env/main_inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from simpler_env.evaluation.argparse import get_args 6 | from simpler_env.evaluation.maniskill2_evaluator import maniskill2_evaluator 7 | 8 | if __name__ == "__main__": 9 | args = get_args() 10 | os.environ["DISPLAY"] = "" 11 | os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false" 12 | gpus = tf.config.list_physical_devices("GPU") 13 | if len(gpus) > 0: 14 | tf.config.set_logical_device_configuration( 15 | gpus[0], 16 | [tf.config.LogicalDeviceConfiguration(memory_limit=args.tf_memory_limit)], 17 | ) 18 | print(f"**** {args.policy_model} ****") 19 | if args.policy_model in ["eo", "eo-1"]: 20 | assert args.ckpt_path is not None 21 | from simpler_env.policies.eo.eo_model import EOInference 22 | 23 | model = EOInference( 24 | saved_model_path=args.ckpt_path, 25 | policy_setup=args.policy_setup, 26 | action_scale=args.action_scale, 27 | action_ensemble_temp=args.action_ensemble_temp, 28 | ) 29 | else: 30 | raise NotImplementedError() 31 | 32 | # run real-to-sim evaluation 33 | success_arr = maniskill2_evaluator(model, args) 34 | print(args) 35 | print(" " * 10, "Average success", np.mean(success_arr)) 36 | -------------------------------------------------------------------------------- /experiments/1_demo/train.sh: -------------------------------------------------------------------------------- 1 | GPUS=1 2 | PER_DEVICE_BATCH_SIZE=8 3 | 4 | ACCELERATE_ARGS="--num_machines 1 --machine_rank 0 --num_processes=${GPUS}" 5 | 6 | # * datasets 7 | dataset=experiments/1_demo/data-demo.yaml 8 | dataset_name=$(basename ${dataset%.*}) 9 | 10 | # hparams 11 | lr=1e-4 12 | mlr=1e-4 13 | vlr=2e-5 14 | 15 | chunk_size=30 16 | epoch=50 17 | 18 | model_name_or_path= 19 | run_name=${dataset_name}_ck${chunk_size}_gpu${GPUS}_lr${lr}_vlr${vlr}_mlr${mlr}_bs${PER_DEVICE_BATCH_SIZE} 20 | 21 | 22 | conda activate eo 23 | 24 | accelerate launch $ACCELERATE_ARGS scripts/train.py \ 25 | --vlm-name-or-path ../pretrained/Qwen2.5-VL-3B-Instruct \ 26 | --data-path ${dataset} \ 27 | --chunk-size ${chunk_size} \ 28 | --dataloader-num-workers 8 \ 29 | --freeze-vision-tower False \ 30 | --freeze-llm False \ 31 | --freeze-merger False \ 32 | --bf16 True \ 33 | --tf32 True \ 34 | --fp16 False \ 35 | --num-train-epochs ${epoch} \ 36 | --per-device-train-batch-size ${PER_DEVICE_BATCH_SIZE} \ 37 | --learning-rate ${lr} \ 38 | --merger-lr ${mlr} \ 39 | --vision-lr ${vlr} \ 40 | --weight-decay 0.1 \ 41 | --warmup-ratio 0.03 \ 42 | --lr-scheduler-type cosine \ 43 | --gradient-checkpointing True \ 44 | --save-strategy steps \ 45 | --logging-steps 100 \ 46 | --save-steps 5000 \ 47 | --save-total-limit 3 \ 48 | --report-to none \ 49 | --run-name ${run_name} \ 50 | --attn-implementation flash_attention_2 51 | -------------------------------------------------------------------------------- /scripts/eval_policy.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | from PIL import Image 5 | from transformers import AutoModel, AutoProcessor 6 | 7 | argparser = argparse.ArgumentParser() 8 | argparser.add_argument( 9 | "--model_path", 10 | type=str, 11 | default="outputs/your_path", 12 | help="Path to the pretrained model", 13 | ) 14 | argparser.add_argument( 15 | "--repo_id", 16 | type=str, 17 | default="libero_spatial_no_noops_1.0.0_lerobot", 18 | help="Class name of the model", 19 | ) 20 | args = argparser.parse_args() 21 | 22 | 23 | def eval_policy(): 24 | # set the observation (image, state, etc.) 25 | import numpy as np 26 | 27 | image0 = (np.random.rand(224, 224, 3) * 255).astype(np.uint8) 28 | image1 = Image.fromarray(image0.copy()) 29 | 30 | model = ( 31 | AutoModel.from_pretrained(args.model_path, dtype=torch.bfloat16, trust_remote_code=True).eval().cuda() 32 | ) 33 | 34 | processor = AutoProcessor.from_pretrained(args.model_path, trust_remote_code=True) 35 | 36 | batch = { 37 | "observation.images.image": [image0], 38 | "observation.images.wrist_image": [image1], 39 | "observation.state": [torch.rand(8)], 40 | "task": ["put the object in the box."], 41 | # "repo_id": [args.repo_id], 42 | } 43 | ov_output = processor.select_action( 44 | model, 45 | batch, 46 | ) 47 | print(ov_output) 48 | 49 | 50 | if __name__ == "__main__": 51 | eval_policy() 52 | -------------------------------------------------------------------------------- /experiments/1_demo/data-demo.yaml: -------------------------------------------------------------------------------- 1 | mm_datasets: 2 | - json_path: demo_data/refcoco/refcoco.jsonl # jsonl file 3 | vision_base_path: demo_data/refcoco # base path for vision data files referenced in the JSONL 4 | sampling_strategy: random:100% # sampling strategy 5 | 6 | - json_path: demo_data/interleaved_demo.jsonl # interleaved data jsonl 7 | 8 | # @robot control config 9 | lerobot_datasets: 10 | - repo_id: demo25 11 | root: ./demo_data 12 | # Optional fields: 13 | # episodes: [1, 2, 3] # specific episodes to load (None = all) 14 | train_subtask: mix:0.9 # mix sub-task instructions and overall instructions with 90% sub-task 15 | delta_action: false # train with delta actions 16 | state_mode: "MEAN_STD" # state normalization mode 17 | # which camera streams to load 18 | select_video_keys: [observation.images.head, observation.images.hand_left, observation.images.hand_right] 19 | # proprioceptive states 20 | select_state_keys: [observation.states.joint.position, observation.states.effector.position] 21 | # action targets 22 | select_action_keys: [actions.joint.position, actions.effector.position] 23 | effector_indices: [14, 15] # indices of effector channels in the flattened action vector 24 | weight: 1.0 # dataset weight for sampling 25 | -------------------------------------------------------------------------------- /experiments/2_libero/train.sh: -------------------------------------------------------------------------------- 1 | GPUS=8 2 | PER_DEVICE_BATCH_SIZE=64 3 | 4 | ACCELERATE_ARGS="--num_machines 1 --machine_rank 0 --num_processes=${GPUS} --multi_gpu" 5 | 6 | # datasets 7 | dataset=experiments/2_libero/data-libero.yaml 8 | dataset_name=$(basename ${dataset%.*}) 9 | 10 | # hparams 11 | lr=1e-4 12 | mlr=1e-4 13 | vlr=2e-5 14 | 15 | chunk_size=8 16 | epoch=50 17 | 18 | model_name_or_path= 19 | run_name=${dataset_name}_ck${chunk_size}_gpu${GPUS}_lr${lr}_vlr${vlr}_mlr${mlr}_bs${PER_DEVICE_BATCH_SIZE} 20 | 21 | 22 | conda activate eo 23 | 24 | accelerate launch $ACCELERATE_ARGS scripts/train.py \ 25 | ${model_name_or_path:+--model-name-or-path $model_name_or_path} \ 26 | --vlm-name-or-path ../pretrained/Qwen2.5-VL-3B-Instruct \ 27 | --data-path ${dataset} \ 28 | --chunk-size ${chunk_size} \ 29 | --dataloader-num-workers 8 \ 30 | --freeze-vision-tower False \ 31 | --freeze-llm False \ 32 | --freeze-merger False \ 33 | --bf16 True \ 34 | --tf32 True \ 35 | --fp16 False \ 36 | --num-train-epochs ${epoch} \ 37 | --per-device-train-batch-size ${PER_DEVICE_BATCH_SIZE} \ 38 | --learning-rate ${lr} \ 39 | --merger-lr ${mlr} \ 40 | --vision-lr ${vlr} \ 41 | --weight-decay 0.1 \ 42 | --warmup-ratio 0.03 \ 43 | --lr-scheduler-type cosine \ 44 | --gradient-checkpointing True \ 45 | --save-strategy steps \ 46 | --logging-steps 100 \ 47 | --save-steps 5000 \ 48 | --save-total-limit 3 \ 49 | --report-to none \ 50 | --run-name ${run_name} \ 51 | --attn-implementation flash_attention_2 52 | -------------------------------------------------------------------------------- /experiments/3_simpler/train_bridge.sh: -------------------------------------------------------------------------------- 1 | GPUS=8 2 | PER_DEVICE_BATCH_SIZE=128 3 | 4 | ACCELERATE_ARGS="--main_process_ip=$MASTER_ADDR --main_process_port=$MASTER_PORT \ 5 | --num_machines 1 --machine_rank 0 --num_processes=${GPUS} --multi_gpu" 6 | 7 | dataset=experiments/3_simpler/data-bridge.yaml 8 | dataset_name=$(basename ${dataset%.*}) 9 | 10 | lr=1e-4 11 | mlr=1e-4 12 | vlr=2e-5 13 | 14 | chunk_size=4 15 | epoch=20 16 | 17 | model_name_or_path= 18 | run_name=${dataset_name}_ck${chunk_size}_gpu${GPUS}_lr${lr}_vlr${vlr}_mlr${mlr}_bs${PER_DEVICE_BATCH_SIZE} 19 | 20 | 21 | conda activate eo 22 | 23 | accelerate launch $ACCELERATE_ARGS scripts/train.py \ 24 | ${model_name_or_path:+--model-name-or-path $model_name_or_path} \ 25 | --vlm-name-or-path ../pretrained/Qwen2.5-VL-3B-Instruct \ 26 | --data-path ${dataset} \ 27 | --chunk-size ${chunk_size} \ 28 | --dataloader-num-workers 8 \ 29 | --freeze-vision-tower False \ 30 | --freeze-llm False \ 31 | --freeze-merger False \ 32 | --bf16 True \ 33 | --tf32 True \ 34 | --fp16 False \ 35 | --num-train-epochs ${epoch} \ 36 | --per-device-train-batch-size ${PER_DEVICE_BATCH_SIZE} \ 37 | --learning-rate ${lr} \ 38 | --merger-lr ${mlr} \ 39 | --vision-lr ${vlr} \ 40 | --weight-decay 0.1 \ 41 | --warmup-ratio 0.03 \ 42 | --lr-scheduler-type cosine \ 43 | --gradient-checkpointing True \ 44 | --save-strategy steps \ 45 | --logging-steps 100 \ 46 | --save-steps 5000 \ 47 | --save-total-limit 3 \ 48 | --report-to none \ 49 | --run-name ${run_name} \ 50 | --attn-implementation flash_attention_2 51 | -------------------------------------------------------------------------------- /experiments/3_simpler/train_fractal.sh: -------------------------------------------------------------------------------- 1 | GPUS=8 2 | PER_DEVICE_BATCH_SIZE=256 3 | 4 | ACCELERATE_ARGS="--main_process_ip=$MASTER_ADDR --main_process_port=$MASTER_PORT \ 5 | --num_machines 1 --machine_rank 0 --num_processes=${GPUS} --multi_gpu" 6 | 7 | dataset=experiments/3_simpler/data-fractal.yaml 8 | dataset_name=$(basename ${dataset%.*}) 9 | 10 | lr=1e-4 11 | mlr=1e-4 12 | vlr=2e-5 13 | 14 | chunk_size=4 15 | epoch=10 16 | 17 | model_name_or_path= 18 | run_name=${dataset_name}_ck${chunk_size}_gpu${GPUS}_lr${lr}_vlr${vlr}_mlr${mlr}_bs${PER_DEVICE_BATCH_SIZE} 19 | 20 | 21 | conda activate eo 22 | 23 | accelerate launch $ACCELERATE_ARGS scripts/train.py \ 24 | ${model_name_or_path:+--model-name-or-path $model_name_or_path} \ 25 | --vlm-name-or-path ../pretrained/Qwen2.5-VL-3B-Instruct \ 26 | --data-path ${dataset} \ 27 | --chunk-size ${chunk_size} \ 28 | --dataloader-num-workers 8 \ 29 | --freeze-vision-tower False \ 30 | --freeze-llm False \ 31 | --freeze-merger False \ 32 | --bf16 True \ 33 | --tf32 True \ 34 | --fp16 False \ 35 | --num-train-epochs ${epoch} \ 36 | --per-device-train-batch-size ${PER_DEVICE_BATCH_SIZE} \ 37 | --learning-rate ${lr} \ 38 | --merger-lr ${mlr} \ 39 | --vision-lr ${vlr} \ 40 | --weight-decay 0.1 \ 41 | --warmup-ratio 0.03 \ 42 | --lr-scheduler-type cosine \ 43 | --gradient-checkpointing True \ 44 | --save-strategy steps \ 45 | --logging-steps 100 \ 46 | --save-steps 5000 \ 47 | --save-total-limit 3 \ 48 | --report-to none \ 49 | --run-name ${run_name} \ 50 | --attn-implementation flash_attention_2 51 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: "🐛 Bug Report" 2 | description: Submit a bug report to help us improve EO-1 3 | labels: ["bug"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Thanks for taking the time to fill out this bug report! 🤗 9 | 10 | - type: textarea 11 | id: system-info 12 | attributes: 13 | label: System Info 14 | description: Please share your system info with us. 15 | placeholder: platform, python version, EO-1 commit, ... 16 | validations: 17 | required: true 18 | 19 | - type: textarea 20 | id: reproduction 21 | validations: 22 | required: true 23 | attributes: 24 | label: Reproduction 25 | description: | 26 | Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet. 27 | Please include relevant config information with your code, for example your Trainers, TRL, Peft, and DeepSpeed configs. 28 | If you have code snippets, error messages, stack traces please provide them here as well. 29 | Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting 30 | Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code. 31 | 32 | placeholder: | 33 | Steps to reproduce the behavior: 34 | 35 | 1. 36 | 2. 37 | 3. 38 | 39 | - type: textarea 40 | id: expected-behavior 41 | validations: 42 | required: true 43 | attributes: 44 | label: Expected behavior 45 | description: "A clear and concise description of what you would expect to happen." 46 | -------------------------------------------------------------------------------- /eo/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 EO-Robotics Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """This module defines constants used throughout the application, 16 | including system messages and various special tokens for language 17 | and vision models. 18 | These tokens are used to demarcate different types of input such 19 | as images, videos, actions, and states, with specific sets for 20 | different model architectures like LLaVA and datasets like LeRobot. 21 | """ 22 | 23 | SYSTEM_MESSAGE = "You are a helpful physical assistant." 24 | 25 | # qwen2.5-vl special tokens 26 | DEFAULT_IM_START_TOKEN = "<|im_start|>" 27 | DEFAULT_IM_END_TOKEN = "<|im_end|>" 28 | DEFAULT_IMAGE_TOKEN = "<|image_pad|>" 29 | DEFAULT_VIDEO_TOKEN = "<|video_pad|>" 30 | VISION_START_TOKEN = "<|vision_start|>" 31 | VISION_END_TOKEN = "<|vision_end|>" 32 | 33 | # EO-1 special tokens 34 | ACTION_START_TOKEN = "<|action_start|>" 35 | DEFAULT_ACTION_TOKEN = "<|action_pad|>" 36 | PASS_ACTION_TOKEN = "<|action_pass|>" 37 | ACTION_END_TOKEN = "<|action_end|>" 38 | STATE_START_TOKEN = "<|state_start|>" 39 | DEFAULT_STATE_TOKEN = "<|state_pad|>" 40 | STATE_END_TOKEN = "<|state_end|>" 41 | TASK_VLA_TOKEN = "<|vla|>" 42 | 43 | # llava style special tokens 44 | IGNORE_INDEX = -100 45 | LLAVA_IMAGE_TOKEN = "" 46 | LLAVA_VIDEO_TOKEN = "