├── .gitignore ├── LICENSE ├── README.md ├── assets ├── dataset.jpg └── teaser.png ├── benchmark ├── auto_run_32b.sh ├── auto_run_qwen2_5vl.sh ├── auto_run_qwen2_5vl_cot.sh ├── config_qwen2_5.json ├── config_vlm3dreasoner.json ├── eval_vsibench.py ├── eval_vsibench.sh ├── libero │ ├── auto_run_vla.sh │ ├── auto_run_vst_vla_libero.sh │ ├── auto_run_vst_vla_libero_norm.sh │ └── libero_requirements.txt ├── run.py ├── utils │ ├── eval_utils.py │ ├── merge_libero.py │ └── merge_vsibench.py └── vsibench.yaml ├── config ├── data │ └── llavanext.yaml └── veomni │ ├── cold_start_qwen2_5_vl_fspd1.yaml │ ├── qwen2_5_vl_fspd1_fov_packing_example.yaml │ └── qwen2_5vla │ ├── vla_qwen2_5_vl_fspd1.yaml │ └── vla_qwen2_5_vl_fspd1_new_token.yaml ├── cookbook ├── demo_image.jpg ├── object_detection_3d.ipynb └── scene_understanding.ipynb ├── docs ├── evaluation.md └── train.md ├── prepare_data ├── sft │ ├── convert_json_parquet.py │ └── convert_llavanext_parquet.py └── vla │ └── libero │ ├── norm_stats.json │ ├── preprocess_libero.py │ └── regenerate_libero_dataset.py ├── pyrightconfig.json ├── requirements.txt ├── scripts ├── auto_run.sh ├── debug.sh └── train.sh ├── tools ├── compute_num_token.py ├── download_hf_data.py └── veomni_to_hf.py └── vst ├── chat_template.py ├── constant.py ├── dataset_iterative.py ├── load.py ├── preprocess.py ├── preprocess_val.py ├── preprocess_vla.py ├── prerpocess_box3d.py ├── prompt.py ├── train.py ├── train_vla.py ├── utils ├── __init__.py ├── box3d_utils.py ├── data_utils.py ├── general.py ├── utils_model.py ├── vis_utils.py └── vision_process.py └── vla ├── action_tokenizer.py ├── libero ├── libero_utils.py ├── run_libero_eval.py └── run_libero_eval_mp.py ├── modeling_vst_vla.py └── robot_utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/README.md -------------------------------------------------------------------------------- /assets/dataset.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/assets/dataset.jpg -------------------------------------------------------------------------------- /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/assets/teaser.png -------------------------------------------------------------------------------- /benchmark/auto_run_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/auto_run_32b.sh -------------------------------------------------------------------------------- /benchmark/auto_run_qwen2_5vl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/auto_run_qwen2_5vl.sh -------------------------------------------------------------------------------- /benchmark/auto_run_qwen2_5vl_cot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/auto_run_qwen2_5vl_cot.sh -------------------------------------------------------------------------------- /benchmark/config_qwen2_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/config_qwen2_5.json -------------------------------------------------------------------------------- /benchmark/config_vlm3dreasoner.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/config_vlm3dreasoner.json -------------------------------------------------------------------------------- /benchmark/eval_vsibench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/eval_vsibench.py -------------------------------------------------------------------------------- /benchmark/eval_vsibench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/eval_vsibench.sh -------------------------------------------------------------------------------- /benchmark/libero/auto_run_vla.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/libero/auto_run_vla.sh -------------------------------------------------------------------------------- /benchmark/libero/auto_run_vst_vla_libero.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/libero/auto_run_vst_vla_libero.sh -------------------------------------------------------------------------------- /benchmark/libero/auto_run_vst_vla_libero_norm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/libero/auto_run_vst_vla_libero_norm.sh -------------------------------------------------------------------------------- /benchmark/libero/libero_requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/libero/libero_requirements.txt -------------------------------------------------------------------------------- /benchmark/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/run.py -------------------------------------------------------------------------------- /benchmark/utils/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/utils/eval_utils.py -------------------------------------------------------------------------------- /benchmark/utils/merge_libero.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/utils/merge_libero.py -------------------------------------------------------------------------------- /benchmark/utils/merge_vsibench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/utils/merge_vsibench.py -------------------------------------------------------------------------------- /benchmark/vsibench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/benchmark/vsibench.yaml -------------------------------------------------------------------------------- /config/data/llavanext.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/config/data/llavanext.yaml -------------------------------------------------------------------------------- /config/veomni/cold_start_qwen2_5_vl_fspd1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/config/veomni/cold_start_qwen2_5_vl_fspd1.yaml -------------------------------------------------------------------------------- /config/veomni/qwen2_5_vl_fspd1_fov_packing_example.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/config/veomni/qwen2_5_vl_fspd1_fov_packing_example.yaml -------------------------------------------------------------------------------- /config/veomni/qwen2_5vla/vla_qwen2_5_vl_fspd1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/config/veomni/qwen2_5vla/vla_qwen2_5_vl_fspd1.yaml -------------------------------------------------------------------------------- /config/veomni/qwen2_5vla/vla_qwen2_5_vl_fspd1_new_token.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/config/veomni/qwen2_5vla/vla_qwen2_5_vl_fspd1_new_token.yaml -------------------------------------------------------------------------------- /cookbook/demo_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/cookbook/demo_image.jpg -------------------------------------------------------------------------------- /cookbook/object_detection_3d.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/cookbook/object_detection_3d.ipynb -------------------------------------------------------------------------------- /cookbook/scene_understanding.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/cookbook/scene_understanding.ipynb -------------------------------------------------------------------------------- /docs/evaluation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/docs/evaluation.md -------------------------------------------------------------------------------- /docs/train.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/docs/train.md -------------------------------------------------------------------------------- /prepare_data/sft/convert_json_parquet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/prepare_data/sft/convert_json_parquet.py -------------------------------------------------------------------------------- /prepare_data/sft/convert_llavanext_parquet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/prepare_data/sft/convert_llavanext_parquet.py -------------------------------------------------------------------------------- /prepare_data/vla/libero/norm_stats.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/prepare_data/vla/libero/norm_stats.json -------------------------------------------------------------------------------- /prepare_data/vla/libero/preprocess_libero.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/prepare_data/vla/libero/preprocess_libero.py -------------------------------------------------------------------------------- /prepare_data/vla/libero/regenerate_libero_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/prepare_data/vla/libero/regenerate_libero_dataset.py -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/pyrightconfig.json -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/auto_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/scripts/auto_run.sh -------------------------------------------------------------------------------- /scripts/debug.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/scripts/debug.sh -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/scripts/train.sh -------------------------------------------------------------------------------- /tools/compute_num_token.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/tools/compute_num_token.py -------------------------------------------------------------------------------- /tools/download_hf_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/tools/download_hf_data.py -------------------------------------------------------------------------------- /tools/veomni_to_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/tools/veomni_to_hf.py -------------------------------------------------------------------------------- /vst/chat_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/chat_template.py -------------------------------------------------------------------------------- /vst/constant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/constant.py -------------------------------------------------------------------------------- /vst/dataset_iterative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/dataset_iterative.py -------------------------------------------------------------------------------- /vst/load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/load.py -------------------------------------------------------------------------------- /vst/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/preprocess.py -------------------------------------------------------------------------------- /vst/preprocess_val.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/preprocess_val.py -------------------------------------------------------------------------------- /vst/preprocess_vla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/preprocess_vla.py -------------------------------------------------------------------------------- /vst/prerpocess_box3d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/prerpocess_box3d.py -------------------------------------------------------------------------------- /vst/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/prompt.py -------------------------------------------------------------------------------- /vst/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/train.py -------------------------------------------------------------------------------- /vst/train_vla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/train_vla.py -------------------------------------------------------------------------------- /vst/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 [Visual Spatial Tuning] Authors -------------------------------------------------------------------------------- /vst/utils/box3d_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/utils/box3d_utils.py -------------------------------------------------------------------------------- /vst/utils/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/utils/data_utils.py -------------------------------------------------------------------------------- /vst/utils/general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/utils/general.py -------------------------------------------------------------------------------- /vst/utils/utils_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/utils/utils_model.py -------------------------------------------------------------------------------- /vst/utils/vis_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/utils/vis_utils.py -------------------------------------------------------------------------------- /vst/utils/vision_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/utils/vision_process.py -------------------------------------------------------------------------------- /vst/vla/action_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/vla/action_tokenizer.py -------------------------------------------------------------------------------- /vst/vla/libero/libero_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/vla/libero/libero_utils.py -------------------------------------------------------------------------------- /vst/vla/libero/run_libero_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/vla/libero/run_libero_eval.py -------------------------------------------------------------------------------- /vst/vla/libero/run_libero_eval_mp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/vla/libero/run_libero_eval_mp.py -------------------------------------------------------------------------------- /vst/vla/modeling_vst_vla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/vla/modeling_vst_vla.py -------------------------------------------------------------------------------- /vst/vla/robot_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yangr116/VST/HEAD/vst/vla/robot_utils.py --------------------------------------------------------------------------------